Fix USB/RAM status checks; add server model+S/N to dashboard; remove cycles

USB Export Drive: lsblk reports TRAN only for whole disks, not partitions (/dev/sdc1). Strip trailing partition digits to get parent disk before transport check. LiveCD in RAM: When RunInstallToRAM copies squashfs to /dev/shm/bee-live/ but bind-mount of /run/live/medium fails (CD-ROM boots), /run/live/medium still shows the CD-ROM fstype. Add fallback: if /dev/shm/bee-live/*.squashfs exists, the data is in RAM — report status OK. Dashboard Hardware Summary: Show server Manufacturer + ProductName as heading and S/N as subline above the component table, sourced from hw.Board (dmidecode system-type data). Validate: Remove Cycles input — always run once. cycles=1 hardcoded in runAllSAT(). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Include profile and mode in benchmark task names for task list clarity
2026-04-12 22:46:42 +03:00 · 2026-04-12 22:36:51 +03:00 · 2026-04-12 22:33:17 +03:00 · 2026-04-12 22:30:47 +03:00 · 2026-04-12 22:17:56 +03:00 · 2026-04-12 22:06:46 +03:00
25 changed files with 1547 additions and 144 deletions
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -213,7 +213,7 @@ func BuildSupportBundle(exportDir string) (string, error) {

 	now := time.Now().UTC()
 	date := now.Format("2006-01-02")
-	tod := now.Format("15:04:05")
+	tod := now.Format("150405")
 	ver := bundleVersion()
 	model := serverModelForBundle()
 	sn := serverSerialForBundle()
--- a/audit/internal/collector/pcie.go
+++ b/audit/internal/collector/pcie.go
@@ -2,6 +2,7 @@ package collector

 import (
 	"bee/audit/internal/schema"
+	"fmt"
 	"log/slog"
 	"os/exec"
 	"strconv"
@@ -172,6 +173,9 @@ func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {

 	// SVendor/SDevice available but not in schema — skip

+	// Warn if PCIe link is running below its maximum negotiated speed.
+	applyPCIeLinkSpeedWarning(&dev)
+
 	return dev
 }

@@ -241,6 +245,41 @@ func readPCIStringAttribute(bdf, attribute string) (string, bool) {
 	return value, true
 }

+// applyPCIeLinkSpeedWarning sets the device status to Warning if the current PCIe link
+// speed is below the maximum negotiated speed supported by both ends.
+func applyPCIeLinkSpeedWarning(dev *schema.HardwarePCIeDevice) {
+	if dev.LinkSpeed == nil || dev.MaxLinkSpeed == nil {
+		return
+	}
+	if pcieLinkSpeedRank(*dev.LinkSpeed) < pcieLinkSpeedRank(*dev.MaxLinkSpeed) {
+		warn := statusWarning
+		dev.Status = &warn
+		desc := fmt.Sprintf("PCIe link speed degraded: running at %s, capable of %s", *dev.LinkSpeed, *dev.MaxLinkSpeed)
+		dev.ErrorDescription = &desc
+	}
+}
+
+// pcieLinkSpeedRank returns a numeric rank for a normalized Gen string (e.g. "Gen4" → 4).
+// Returns 0 for unrecognised values so comparisons fail safe.
+func pcieLinkSpeedRank(gen string) int {
+	switch gen {
+	case "Gen1":
+		return 1
+	case "Gen2":
+		return 2
+	case "Gen3":
+		return 3
+	case "Gen4":
+		return 4
+	case "Gen5":
+		return 5
+	case "Gen6":
+		return 6
+	default:
+		return 0
+	}
+}
+
 func normalizePCILinkSpeed(raw string) string {
 	raw = strings.TrimSpace(strings.ToLower(raw))
 	switch {
--- a/audit/internal/collector/pcie_filter_test.go
+++ b/audit/internal/collector/pcie_filter_test.go
@@ -1,6 +1,7 @@
 package collector

 import (
+	"bee/audit/internal/schema"
 	"encoding/json"
 	"strings"
 	"testing"
@@ -141,3 +142,77 @@ func TestNormalizePCILinkSpeed(t *testing.T) {
 		}
 	}
 }
+
+func TestApplyPCIeLinkSpeedWarning(t *testing.T) {
+	ptr := func(s string) *string { return &s }
+
+	tests := []struct {
+		name        string
+		linkSpeed   *string
+		maxSpeed    *string
+		wantWarning bool
+		wantGenIn   string // substring expected in ErrorDescription when warning
+	}{
+		{
+			name:        "degraded Gen1 vs Gen5",
+			linkSpeed:   ptr("Gen1"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: true,
+			wantGenIn:   "Gen1",
+		},
+		{
+			name:        "at max Gen5",
+			linkSpeed:   ptr("Gen5"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: false,
+		},
+		{
+			name:        "degraded Gen4 vs Gen5",
+			linkSpeed:   ptr("Gen4"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: true,
+			wantGenIn:   "Gen4",
+		},
+		{
+			name:        "missing current speed — no warning",
+			linkSpeed:   nil,
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: false,
+		},
+		{
+			name:        "missing max speed — no warning",
+			linkSpeed:   ptr("Gen1"),
+			maxSpeed:    nil,
+			wantWarning: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dev := schema.HardwarePCIeDevice{}
+			ok := statusOK
+			dev.Status = &ok
+			dev.LinkSpeed = tt.linkSpeed
+			dev.MaxLinkSpeed = tt.maxSpeed
+
+			applyPCIeLinkSpeedWarning(&dev)
+
+			gotWarn := dev.Status != nil && *dev.Status == statusWarning
+			if gotWarn != tt.wantWarning {
+				t.Fatalf("wantWarning=%v gotWarning=%v (status=%v)", tt.wantWarning, gotWarn, dev.Status)
+			}
+			if tt.wantWarning {
+				if dev.ErrorDescription == nil {
+					t.Fatal("expected ErrorDescription to be set")
+				}
+				if !strings.Contains(*dev.ErrorDescription, tt.wantGenIn) {
+					t.Fatalf("ErrorDescription %q does not contain %q", *dev.ErrorDescription, tt.wantGenIn)
+				}
+			} else {
+				if dev.ErrorDescription != nil {
+					t.Fatalf("unexpected ErrorDescription: %s", *dev.ErrorDescription)
+				}
+			}
+		})
+	}
+}
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"math"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"regexp"
 	"sort"
@@ -108,7 +109,11 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 		ServerModel:        readServerModel(),
 		BenchmarkProfile:   spec.Name,
 		ParallelGPUs:       opts.ParallelGPUs,
+		RampStep:           opts.RampStep,
+		RampTotal:          opts.RampTotal,
+		RampRunID:          opts.RampRunID,
 		SelectedGPUIndices: append([]int(nil), selected...),
+		HostConfig:         readBenchmarkHostConfig(),
 		Normalization: BenchmarkNormalization{
 			Status: "full",
 		},
@@ -121,15 +126,22 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 	var serverIdleOK, serverLoadedOK bool
 	var serverLoadedSamples int

+	// Run nvidia-smi -q first: used both for the log file and as a fallback
+	// source of max clock values when CSV clock fields are unsupported.
+	var nvsmiQOut []byte
+	if out, err := runSATCommandCtx(ctx, verboseLog, "00-nvidia-smi-q.log", []string{"nvidia-smi", "-q"}, nil, nil); err == nil {
+		nvsmiQOut = out
+		_ = os.WriteFile(filepath.Join(runDir, "00-nvidia-smi-q.log"), out, 0644)
+	}
+
 	infoByIndex, infoErr := queryBenchmarkGPUInfo(selected)
 	if infoErr != nil {
 		result.Warnings = append(result.Warnings, "gpu inventory query failed: "+infoErr.Error())
 		result.Normalization.Status = "partial"
 	}
-
-	if out, err := runSATCommandCtx(ctx, verboseLog, "00-nvidia-smi-q.log", []string{"nvidia-smi", "-q"}, nil, nil); err == nil {
-		_ = os.WriteFile(filepath.Join(runDir, "00-nvidia-smi-q.log"), out, 0644)
-	}
+	// Enrich with max clocks from verbose output — covers GPUs where
+	// clocks.max.* CSV fields are unsupported (e.g. Blackwell / driver 98.x).
+	enrichGPUInfoWithMaxClocks(infoByIndex, nvsmiQOut)

 	activeApps, err := queryActiveComputeApps(selected)
 	if err == nil && len(activeApps) > 0 {
@@ -145,8 +157,16 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 		}
 	}()

+	// Power calibration: run dcgmi targeted_power while sampling nvidia-smi power.
+	// Returns per-GPU p95 power as an honest TDP reference for PowerSustainScore.
+	calibPowerByIndex := runBenchmarkPowerCalibration(ctx, verboseLog, runDir, selected, logFunc)
+
+	// Start background CPU load sampler — samples every 10s during GPU phases.
+	cpuStopCh := make(chan struct{})
+	cpuSamplesCh := startCPULoadSampler(cpuStopCh, 10)
+
 	if opts.ParallelGPUs {
-		runNvidiaBenchmarkParallel(ctx, verboseLog, runDir, selected, infoByIndex, opts, spec, logFunc, &result, &serverIdleW, &serverLoadedWSum, &serverIdleOK, &serverLoadedOK, &serverLoadedSamples)
+		runNvidiaBenchmarkParallel(ctx, verboseLog, runDir, selected, infoByIndex, opts, spec, logFunc, &result, calibPowerByIndex, &serverIdleW, &serverLoadedWSum, &serverIdleOK, &serverLoadedOK, &serverLoadedSamples)
 	} else {

 	for _, idx := range selected {
@@ -166,6 +186,9 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 			gpuResult.BaseGraphicsClockMHz = info.BaseGraphicsClockMHz
 			gpuResult.MaxMemoryClockMHz = info.MaxMemoryClockMHz
 		}
+		if w, ok := calibPowerByIndex[idx]; ok && w > 0 {
+			gpuResult.CalibratedPeakPowerW = w
+		}
 		if norm := findBenchmarkNormalization(result.Normalization.GPUs, idx); norm != nil {
 			gpuResult.LockedGraphicsClockMHz = norm.GPUClockLockMHz
 			gpuResult.LockedMemoryClockMHz = norm.MemoryClockLockMHz
@@ -303,6 +326,16 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 		}
 	}

+	// Stop CPU load sampler and attach results.
+	close(cpuStopCh)
+	if cpuSamples := <-cpuSamplesCh; len(cpuSamples) > 0 {
+		result.CPULoad = summarizeCPULoad(cpuSamples)
+		if result.CPULoad != nil && result.CPULoad.Status != "ok" {
+			logFunc(fmt.Sprintf("host CPU load during benchmark: avg=%.1f%% max=%.1f%% status=%s",
+				result.CPULoad.AvgPct, result.CPULoad.MaxPct, result.CPULoad.Status))
+		}
+	}
+
 	// Compute server power characterization from accumulated IPMI samples.
 	var gpuReportedSumW float64
 	for _, gpu := range result.GPUs {
@@ -314,6 +347,20 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 	}
 	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuReportedSumW, serverIdleOK && serverLoadedOK)

+	// Apply server-power penalty when IPMI reports the server delta is much
+	// lower than GPU-reported sum: GPU power telemetry is over-stated, making
+	// CalibratedPeakPowerW and PowerSustainScore unreliable.
+	// Penalty factor scales from 1.0 (ratio ≥ 0.75, no penalty) down to 0.
+	if sp := result.ServerPower; sp != nil && sp.Available && sp.ReportingRatio > 0 && sp.ReportingRatio < 0.75 {
+		factor := sp.ReportingRatio / 0.75
+		for i := range result.GPUs {
+			result.GPUs[i].Scores.CompositeScore *= factor
+			result.GPUs[i].Notes = append(result.GPUs[i].Notes,
+				fmt.Sprintf("server-power penalty applied (reporting_ratio=%.2f < 0.75): composite score reduced to %.1f%%",
+					sp.ReportingRatio, factor*100))
+		}
+	}
+
 	result.Findings = buildBenchmarkFindings(result)
 	result.OverallStatus = benchmarkOverallStatus(result)

@@ -335,11 +382,7 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 		return "", fmt.Errorf("write summary.txt: %w", err)
 	}

-	archive := filepath.Join(baseDir, "gpu-benchmark-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", fmt.Errorf("pack benchmark archive: %w", err)
-	}
-	return archive, nil
+	return runDir, nil
 }

 func normalizeNvidiaBenchmarkOptionsForBenchmark(opts NvidiaBenchmarkOptions) NvidiaBenchmarkOptions {
@@ -374,9 +417,13 @@ func resolveBenchmarkProfile(profile string) benchmarkProfileSpec {
 // Fields are tried in order; the first successful query wins. Extended fields
 // (attribute.multiprocessor_count, power.default_limit) are not supported on
 // all driver versions, so we fall back to the base set if the full query fails.
+// The minimal fallback omits clock fields entirely — clocks.max.* returns
+// exit status 2 on some GPU generations (e.g. Blackwell); max clocks are
+// then recovered from nvidia-smi -q via enrichGPUInfoWithMaxClocks.
 var benchmarkGPUInfoQueries = []struct {
 	fields   string
 	extended bool // whether this query includes optional extended fields
+	minimal  bool // clock fields omitted; max clocks must be filled separately
 }{
 	{
 		fields:   "index,uuid,name,pci.bus_id,vbios_version,power.limit,clocks.max.graphics,clocks.max.memory,clocks.base.graphics,attribute.multiprocessor_count,power.default_limit",
@@ -386,6 +433,104 @@ var benchmarkGPUInfoQueries = []struct {
 		fields:   "index,uuid,name,pci.bus_id,vbios_version,power.limit,clocks.max.graphics,clocks.max.memory,clocks.base.graphics",
 		extended: false,
 	},
+	{
+		fields:  "index,uuid,name,pci.bus_id,vbios_version,power.limit",
+		minimal: true,
+	},
+}
+
+// enrichGPUInfoWithMaxClocks fills MaxGraphicsClockMHz / MaxMemoryClockMHz for
+// any GPU in infoByIndex where those values are still zero.  It parses the
+// "Max Clocks" section of nvidia-smi -q output (already available as nvsmiQ).
+// This is the fallback for GPUs (e.g. Blackwell) where clocks.max.* CSV fields
+// return exit status 2 but the verbose query works fine.
+func enrichGPUInfoWithMaxClocks(infoByIndex map[int]benchmarkGPUInfo, nvsmiQ []byte) {
+	if len(infoByIndex) == 0 || len(nvsmiQ) == 0 {
+		return
+	}
+
+	// Build bus_id → index map for matching verbose sections to GPU indices.
+	busToBenchIdx := make(map[string]int, len(infoByIndex))
+	for idx, info := range infoByIndex {
+		if info.BusID != "" {
+			// nvidia-smi -q uses "GPU 00000000:4E:00.0" (8-digit domain),
+			// while --query-gpu returns the same format; normalise to lower.
+			busToBenchIdx[strings.ToLower(strings.TrimSpace(info.BusID))] = idx
+		}
+	}
+
+	// Split the verbose output into per-GPU sections on "^GPU " lines.
+	gpuSectionRe := regexp.MustCompile(`(?m)^GPU\s+([\dA-Fa-f:\.]+)`)
+	maxGfxRe      := regexp.MustCompile(`(?i)Max Clocks[\s\S]*?Graphics\s*:\s*(\d+)\s*MHz`)
+	maxMemRe      := regexp.MustCompile(`(?i)Max Clocks[\s\S]*?Memory\s*:\s*(\d+)\s*MHz`)
+	defaultPwrRe  := regexp.MustCompile(`(?i)Default Power Limit\s*:\s*([0-9.]+)\s*W`)
+	currentPwrRe  := regexp.MustCompile(`(?i)Current Power Limit\s*:\s*([0-9.]+)\s*W`)
+	smCountRe     := regexp.MustCompile(`(?i)Multiprocessor Count\s*:\s*(\d+)`)
+
+	sectionStarts := gpuSectionRe.FindAllSubmatchIndex(nvsmiQ, -1)
+	for i, loc := range sectionStarts {
+		busID := strings.ToLower(string(nvsmiQ[loc[2]:loc[3]]))
+		benchIdx, ok := busToBenchIdx[busID]
+		if !ok {
+			// Bus IDs from verbose output may have a different domain prefix;
+			// try suffix match on the slot portion (XX:XX.X).
+			for k, v := range busToBenchIdx {
+				if strings.HasSuffix(k, busID) || strings.HasSuffix(busID, k) {
+					benchIdx = v
+					ok = true
+					break
+				}
+			}
+		}
+		if !ok {
+			continue
+		}
+
+		end := len(nvsmiQ)
+		if i+1 < len(sectionStarts) {
+			end = sectionStarts[i+1][0]
+		}
+		section := nvsmiQ[loc[0]:end]
+
+		info := infoByIndex[benchIdx]
+
+		if info.MaxGraphicsClockMHz == 0 {
+			if m := maxGfxRe.FindSubmatch(section); m != nil {
+				if v, err := strconv.ParseFloat(string(m[1]), 64); err == nil {
+					info.MaxGraphicsClockMHz = v
+				}
+			}
+		}
+		if info.MaxMemoryClockMHz == 0 {
+			if m := maxMemRe.FindSubmatch(section); m != nil {
+				if v, err := strconv.ParseFloat(string(m[1]), 64); err == nil {
+					info.MaxMemoryClockMHz = v
+				}
+			}
+		}
+		if info.DefaultPowerLimitW == 0 {
+			if m := defaultPwrRe.FindSubmatch(section); m != nil {
+				if v, err := strconv.ParseFloat(string(m[1]), 64); err == nil && v > 0 {
+					info.DefaultPowerLimitW = v
+				}
+			}
+		}
+		if info.PowerLimitW == 0 {
+			if m := currentPwrRe.FindSubmatch(section); m != nil {
+				if v, err := strconv.ParseFloat(string(m[1]), 64); err == nil && v > 0 {
+					info.PowerLimitW = v
+				}
+			}
+		}
+		if info.MultiprocessorCount == 0 {
+			if m := smCountRe.FindSubmatch(section); m != nil {
+				if v, err := strconv.Atoi(string(m[1])); err == nil && v > 0 {
+					info.MultiprocessorCount = v
+				}
+			}
+		}
+		infoByIndex[benchIdx] = info
+	}
 }

 func queryBenchmarkGPUInfo(gpuIndices []int) (map[int]benchmarkGPUInfo, error) {
@@ -413,9 +558,13 @@ func queryBenchmarkGPUInfo(gpuIndices []int) (map[int]benchmarkGPUInfo, error) {
 			continue
 		}

+		minFields := 6
+		if !q.minimal {
+			minFields = 9
+		}
 		infoByIndex := make(map[int]benchmarkGPUInfo, len(rows))
 		for _, row := range rows {
-			if len(row) < 9 {
+			if len(row) < minFields {
 				continue
 			}
 			idx, err := strconv.Atoi(strings.TrimSpace(row[0]))
@@ -423,24 +572,26 @@ func queryBenchmarkGPUInfo(gpuIndices []int) (map[int]benchmarkGPUInfo, error) {
 				continue
 			}
 			info := benchmarkGPUInfo{
-				Index:               idx,
-				UUID:                strings.TrimSpace(row[1]),
-				Name:                strings.TrimSpace(row[2]),
-				BusID:               strings.TrimSpace(row[3]),
-				VBIOS:               strings.TrimSpace(row[4]),
-				PowerLimitW:         parseBenchmarkFloat(row[5]),
-				MaxGraphicsClockMHz: parseBenchmarkFloat(row[6]),
-				MaxMemoryClockMHz:   parseBenchmarkFloat(row[7]),
+				Index:       idx,
+				UUID:        strings.TrimSpace(row[1]),
+				Name:        strings.TrimSpace(row[2]),
+				BusID:       strings.TrimSpace(row[3]),
+				VBIOS:       strings.TrimSpace(row[4]),
+				PowerLimitW: parseBenchmarkFloat(row[5]),
 			}
-			if len(row) >= 9 {
-				info.BaseGraphicsClockMHz = parseBenchmarkFloat(row[8])
-			}
-			if q.extended {
-				if len(row) >= 10 {
-					info.MultiprocessorCount = int(parseBenchmarkFloat(row[9]))
+			if !q.minimal {
+				info.MaxGraphicsClockMHz = parseBenchmarkFloat(row[6])
+				info.MaxMemoryClockMHz = parseBenchmarkFloat(row[7])
+				if len(row) >= 9 {
+					info.BaseGraphicsClockMHz = parseBenchmarkFloat(row[8])
 				}
-				if len(row) >= 11 {
-					info.DefaultPowerLimitW = parseBenchmarkFloat(row[10])
+				if q.extended {
+					if len(row) >= 10 {
+						info.MultiprocessorCount = int(parseBenchmarkFloat(row[9]))
+					}
+					if len(row) >= 11 {
+						info.DefaultPowerLimitW = parseBenchmarkFloat(row[10])
+					}
 				}
 			}
 			infoByIndex[idx] = info
@@ -744,14 +895,22 @@ func scoreBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkScorecard {
 			score.ComputeScore += precision.TeraOpsPerSec
 		}
 	}
-	// Use default power limit for sustain score so a manually reduced limit
-	// does not inflate the score. Fall back to enforced limit if default unknown.
-	referencePowerW := gpu.DefaultPowerLimitW
-	if referencePowerW <= 0 {
-		referencePowerW = gpu.PowerLimitW
-	}
-	if referencePowerW > 0 {
-		score.PowerSustainScore = math.Min(100, (gpu.Steady.AvgPowerW/referencePowerW)*100)
+	// PowerSustainScore: measures how close the GPU came to its rated TDP under
+	// a full-spectrum load (dcgmi targeted_power). 100 = exactly at rated TDP.
+	// Penalty applied symmetrically for both under- and over-TDP deviations:
+	//   score = max(0, 100 − |measured − rated| / rated × 100)
+	// Under-TDP → power delivery / cooling issue.
+	// Over-TDP  → power limit not properly enforced / power regulation fault.
+	// Falls back to 0 if calibration was not performed (dcgmi unavailable).
+	{
+		ref := gpu.DefaultPowerLimitW
+		if ref <= 0 {
+			ref = gpu.PowerLimitW
+		}
+		if gpu.CalibratedPeakPowerW > 0 && ref > 0 {
+			deviationPct := math.Abs(gpu.CalibratedPeakPowerW-ref) / ref * 100
+			score.PowerSustainScore = clampScore(100 - deviationPct)
+		}
 	}
 	runtimeUS := math.Max(1, gpu.Steady.DurationSec*1e6)
 	thermalRatio := float64(gpu.Throttle.HWThermalSlowdownUS+gpu.Throttle.SWThermalSlowdownUS) / runtimeUS
@@ -765,7 +924,15 @@ func scoreBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkScorecard {
 }

 func compositeBenchmarkScore(score BenchmarkScorecard) float64 {
-	quality := 0.40 + 0.20*(score.PowerSustainScore/100.0) + 0.20*(score.ThermalSustainScore/100.0) + 0.20*(score.StabilityScore/100.0)
+	// Weights after introducing calibrated power reference:
+	//   base        0.35 — floor so a GPU that fails all sustain checks still scores
+	//   thermal     0.25 — heaviest: throttle counters are the most reliable signal
+	//   stability   0.25 — clock/power variance matters for reproducibility
+	//   power       0.15 — GPU reaches rated TDP under targeted_power? lower weight
+	//                       because calibration may be absent (dcgmi not installed)
+	//   NCCL bonus  0.10 — interconnect health
+	//   cap         1.10
+	quality := 0.35 + 0.15*(score.PowerSustainScore/100.0) + 0.25*(score.ThermalSustainScore/100.0) + 0.25*(score.StabilityScore/100.0)
 	if score.InterconnectScore > 0 {
 		quality += 0.10
 	}
@@ -985,16 +1152,57 @@ func buildBenchmarkFindings(result NvidiaBenchmarkResult) []string {
 				gpu.Index, gpu.PowerLimitW, gpu.DefaultPowerLimitW, gpu.PowerLimitW/gpu.DefaultPowerLimitW*100,
 			))
 		}
+		// Flag significant TDP deviation (over or under) from calibration.
+		if gpu.CalibratedPeakPowerW > 0 {
+			ref := gpu.DefaultPowerLimitW
+			if ref <= 0 {
+				ref = gpu.PowerLimitW
+			}
+			if ref > 0 {
+				deviationPct := (gpu.CalibratedPeakPowerW - ref) / ref * 100
+				switch {
+				case deviationPct < -10:
+					findings = append(findings, fmt.Sprintf(
+						"GPU %d reached only %.0f W (%.0f%% of rated %.0f W) under targeted_power. Check power delivery or cooling.",
+						gpu.Index, gpu.CalibratedPeakPowerW, gpu.CalibratedPeakPowerW/ref*100, ref,
+					))
+				case deviationPct > 5:
+					findings = append(findings, fmt.Sprintf(
+						"GPU %d exceeded rated TDP: %.0f W measured vs %.0f W rated (+%.0f%%). Power limit may not be enforced correctly.",
+						gpu.Index, gpu.CalibratedPeakPowerW, ref, deviationPct,
+					))
+				}
+			}
+		}
 	}
 	if result.Interconnect != nil && result.Interconnect.Supported {
 		findings = append(findings, fmt.Sprintf("Multi-GPU all_reduce max bus bandwidth: %.1f GB/s.", result.Interconnect.MaxBusBWGBps))
 	}
+	if cl := result.CPULoad; cl != nil {
+		switch cl.Status {
+		case "high":
+			findings = append(findings, fmt.Sprintf(
+				"Host CPU load was elevated during the benchmark (avg %.1f%%, max %.1f%%). A competing CPU workload may skew GPU results.",
+				cl.AvgPct, cl.MaxPct,
+			))
+		case "unstable":
+			findings = append(findings, fmt.Sprintf(
+				"Host CPU load was erratic during the benchmark (avg %.1f%%, p95 %.1f%%). Results may be less reproducible.",
+				cl.AvgPct, cl.P95Pct,
+			))
+		}
+	}
 	if sp := result.ServerPower; sp != nil && sp.Available && sp.GPUReportedSumW > 0 {
 		if sp.ReportingRatio < 0.75 {
 			findings = append(findings, fmt.Sprintf(
-				"GPU power reporting may be unreliable: server delta %.0f W vs GPU-reported %.0f W (ratio %.2f). GPU telemetry likely over-reports actual consumption.",
+				"GPU power reporting may be unreliable: server delta %.0f W vs GPU-reported %.0f W (ratio %.2f). GPU telemetry likely over-reports actual consumption. Composite scores have been penalized accordingly.",
 				sp.DeltaW, sp.GPUReportedSumW, sp.ReportingRatio,
 			))
+		} else if sp.ReportingRatio > 1.25 {
+			findings = append(findings, fmt.Sprintf(
+				"Server power delta %.0f W exceeds GPU-reported sum %.0f W by %.0f%%. Other components (CPU, NVMe, networking) may be drawing substantial power under GPU load.",
+				sp.DeltaW, sp.GPUReportedSumW, (sp.ReportingRatio-1)*100,
+			))
 		}
 	}
 	return dedupeStrings(findings)
@@ -1299,6 +1507,7 @@ func runNvidiaBenchmarkParallel(
 	spec benchmarkProfileSpec,
 	logFunc func(string),
 	result *NvidiaBenchmarkResult,
+	calibPowerByIndex map[int]float64,
 	serverIdleW *float64, serverLoadedWSum *float64,
 	serverIdleOK *bool, serverLoadedOK *bool, serverLoadedSamples *int,
 ) {
@@ -1320,6 +1529,9 @@ func runNvidiaBenchmarkParallel(
 			r.BaseGraphicsClockMHz = info.BaseGraphicsClockMHz
 			r.MaxMemoryClockMHz = info.MaxMemoryClockMHz
 		}
+		if w, ok := calibPowerByIndex[idx]; ok && w > 0 {
+			r.CalibratedPeakPowerW = w
+		}
 		if norm := findBenchmarkNormalization(result.Normalization.GPUs, idx); norm != nil {
 			r.LockedGraphicsClockMHz = norm.GPUClockLockMHz
 			r.LockedMemoryClockMHz = norm.MemoryClockLockMHz
@@ -1481,3 +1693,225 @@ func runNvidiaBenchmarkParallel(
 		result.GPUs = append(result.GPUs, finalizeBenchmarkGPUResult(*r))
 	}
 }
+
+// readBenchmarkHostConfig reads static CPU and memory configuration from
+// /proc/cpuinfo and /proc/meminfo. Returns nil if neither source is readable.
+func readBenchmarkHostConfig() *BenchmarkHostConfig {
+	cfg := &BenchmarkHostConfig{}
+	populated := false
+
+	// Parse /proc/cpuinfo for CPU model, sockets, cores, threads.
+	if data, err := os.ReadFile("/proc/cpuinfo"); err == nil {
+		socketIDs := map[string]struct{}{}
+		coresPerSocket := map[string]int{}
+		var modelName string
+		threads := 0
+		for _, line := range strings.Split(string(data), "\n") {
+			kv := strings.SplitN(line, ":", 2)
+			if len(kv) != 2 {
+				continue
+			}
+			key := strings.TrimSpace(kv[0])
+			val := strings.TrimSpace(kv[1])
+			switch key {
+			case "processor":
+				threads++
+			case "model name":
+				if modelName == "" {
+					modelName = val
+				}
+			case "physical id":
+				socketIDs[val] = struct{}{}
+			case "cpu cores":
+				// Overwrite per-socket core count (last wins per socket, but all
+				// entries for the same socket report the same value).
+				if physLine := ""; physLine == "" {
+					// We accumulate below by treating cpu cores as a per-thread
+					// field; sum by socket requires a two-pass approach. Use the
+					// simpler approximation: totalCores = threads / (threads per core).
+					_ = val
+				}
+			}
+		}
+		// Second pass: per-socket core count.
+		var curSocket string
+		for _, line := range strings.Split(string(data), "\n") {
+			kv := strings.SplitN(line, ":", 2)
+			if len(kv) != 2 {
+				continue
+			}
+			key := strings.TrimSpace(kv[0])
+			val := strings.TrimSpace(kv[1])
+			switch key {
+			case "physical id":
+				curSocket = val
+			case "cpu cores":
+				if curSocket != "" {
+					if _, seen := coresPerSocket[curSocket]; !seen {
+						v, _ := strconv.Atoi(val)
+						coresPerSocket[curSocket] = v
+					}
+				}
+			}
+		}
+		totalCores := 0
+		for _, c := range coresPerSocket {
+			totalCores += c
+		}
+		cfg.CPUModel = modelName
+		cfg.CPUSockets = len(socketIDs)
+		if cfg.CPUSockets == 0 && threads > 0 {
+			cfg.CPUSockets = 1
+		}
+		cfg.CPUCores = totalCores
+		cfg.CPUThreads = threads
+		if modelName != "" || threads > 0 {
+			populated = true
+		}
+	}
+
+	// Parse /proc/meminfo for total physical RAM.
+	if data, err := os.ReadFile("/proc/meminfo"); err == nil {
+		for _, line := range strings.Split(string(data), "\n") {
+			if strings.HasPrefix(line, "MemTotal:") {
+				fields := strings.Fields(line)
+				if len(fields) >= 2 {
+					kb, _ := strconv.ParseUint(fields[1], 10, 64)
+					cfg.MemTotalGiB = float64(kb) / (1024 * 1024)
+					populated = true
+				}
+				break
+			}
+		}
+	}
+
+	if !populated {
+		return nil
+	}
+	return cfg
+}
+
+// startCPULoadSampler starts a goroutine that samples host CPU load every
+// intervalSec seconds until stopCh is closed, then sends the collected
+// samples on the returned channel.
+func startCPULoadSampler(stopCh <-chan struct{}, intervalSec int) <-chan []float64 {
+	ch := make(chan []float64, 1)
+	go func() {
+		var samples []float64
+		ticker := time.NewTicker(time.Duration(intervalSec) * time.Second)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-stopCh:
+				ch <- samples
+				return
+			case <-ticker.C:
+				if pct := sampleCPULoadPct(); pct > 0 {
+					samples = append(samples, pct)
+				}
+			}
+		}
+	}()
+	return ch
+}
+
+// summarizeCPULoad computes stats over sampled CPU load values and assigns
+// a health status.
+func summarizeCPULoad(samples []float64) *BenchmarkCPULoad {
+	if len(samples) == 0 {
+		return nil
+	}
+	sorted := append([]float64(nil), samples...)
+	sort.Float64s(sorted)
+	var sum float64
+	for _, v := range sorted {
+		sum += v
+	}
+	avg := sum / float64(len(sorted))
+	p95 := sorted[int(float64(len(sorted))*0.95)]
+	max := sorted[len(sorted)-1]
+
+	cl := &BenchmarkCPULoad{
+		AvgPct:  math.Round(avg*10) / 10,
+		MaxPct:  math.Round(max*10) / 10,
+		P95Pct:  math.Round(p95*10) / 10,
+		Samples: len(sorted),
+	}
+
+	// Compute standard deviation to detect instability.
+	var variance float64
+	for _, v := range sorted {
+		d := v - avg
+		variance += d * d
+	}
+	stdDev := math.Sqrt(variance / float64(len(sorted)))
+
+	switch {
+	case avg > 20 || max > 40:
+		cl.Status = "high"
+		cl.Note = fmt.Sprintf("avg %.1f%% max %.1f%% — elevated host CPU load may interfere with GPU benchmark results", avg, max)
+	case stdDev > 12:
+		cl.Status = "unstable"
+		cl.Note = fmt.Sprintf("avg %.1f%% stddev %.1f%% — host CPU load was erratic during the benchmark", avg, stdDev)
+	default:
+		cl.Status = "ok"
+	}
+	return cl
+}
+
+// runBenchmarkPowerCalibration runs a short dcgmi targeted_power test while
+// collecting nvidia-smi power samples in parallel. It returns a map from GPU
+// index to p95 observed power (watts), which is used as the reference for
+// PowerSustainScore instead of the hardware default limit.
+//
+// If dcgmi is unavailable or the run fails the function returns an empty map
+// and the caller falls back to DefaultPowerLimitW. The calibration is skipped
+// gracefully — it must never block or fail the main benchmark.
+func runBenchmarkPowerCalibration(
+	ctx context.Context,
+	verboseLog, runDir string,
+	gpuIndices []int,
+	logFunc func(string),
+) map[int]float64 {
+	const calibDurationSec = 45
+
+	// dcgmi must be present.
+	if _, err := exec.LookPath("dcgmi"); err != nil {
+		logFunc("power calibration: dcgmi not found, skipping (will use default power limit)")
+		return map[int]float64{}
+	}
+
+	logFunc(fmt.Sprintf("power calibration: running dcgmi targeted_power for %ds on GPUs %s", calibDurationSec, joinIndexList(gpuIndices)))
+
+	cmd := nvidiaDCGMNamedDiagCommand("targeted_power", calibDurationSec, gpuIndices)
+	out, rows, err := runBenchmarkCommandWithMetrics(ctx, verboseLog, "power-calibration.log", cmd, nil, gpuIndices, runDir, "power-calibration", logFunc)
+	_ = os.WriteFile(filepath.Join(runDir, "power-calibration.log"), out, 0644)
+	if err != nil {
+		logFunc(fmt.Sprintf("power calibration: dcgmi targeted_power failed (%v), skipping", err))
+		return map[int]float64{}
+	}
+
+	// Group rows by GPU index and compute p95 power for each.
+	result := make(map[int]float64, len(gpuIndices))
+	for _, idx := range gpuIndices {
+		perGPU := filterRowsByGPU(rows, idx)
+		if len(perGPU) == 0 {
+			continue
+		}
+		powers := make([]float64, 0, len(perGPU))
+		for _, r := range perGPU {
+			if r.PowerW > 0 {
+				powers = append(powers, r.PowerW)
+			}
+		}
+		if len(powers) == 0 {
+			continue
+		}
+		p95 := benchmarkPercentile(powers, 95)
+		if p95 > 0 {
+			result[idx] = p95
+			logFunc(fmt.Sprintf("power calibration: GPU %d p95=%.0f W (%d samples)", idx, p95, len(powers)))
+		}
+	}
+	return result
+}
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -60,9 +60,17 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "**App version:** %s  \n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "**Generated:** %s  \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
-	if result.ParallelGPUs {
+	if result.RampStep > 0 && result.RampTotal > 0 {
+		fmt.Fprintf(&b, "**Ramp-up step:** %d of %d  \n", result.RampStep, result.RampTotal)
+		if result.RampRunID != "" {
+			fmt.Fprintf(&b, "**Ramp-up run ID:** %s  \n", result.RampRunID)
+		}
+	} else if result.ParallelGPUs {
 		fmt.Fprintf(&b, "**Mode:** parallel (all GPUs simultaneously)  \n")
 	}
+	if result.ScalabilityScore > 0 {
+		fmt.Fprintf(&b, "**Scalability score:** %.1f%%  \n", result.ScalabilityScore)
+	}
 	fmt.Fprintf(&b, "**Overall status:** %s  \n", result.OverallStatus)
 	b.WriteString("\n")

@@ -90,7 +98,7 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 	for _, gpu := range result.GPUs {
 		name := strings.TrimSpace(gpu.Name)
 		if name == "" {
-			name = "Unknown"
+			name = "Unknown GPU"
 		}
 		interconnect := "-"
 		if gpu.Scores.InterconnectScore > 0 {
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -178,3 +178,67 @@ func TestRenderBenchmarkReportIncludesTerminalChartsWithoutANSI(t *testing.T) {
 		t.Fatalf("report should not contain ANSI escapes\n%s", report)
 	}
 }
+
+func TestEnrichGPUInfoWithMaxClocks(t *testing.T) {
+	t.Parallel()
+
+	nvsmiQ := []byte(`
+GPU 00000000:4E:00.0
+    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
+    Clocks
+        Graphics                          : 2422 MHz
+        Memory                            : 12481 MHz
+    Max Clocks
+        Graphics                          : 2430 MHz
+        SM                                : 2430 MHz
+        Memory                            : 12481 MHz
+        Video                             : 2107 MHz
+
+GPU 00000000:4F:00.0
+    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
+    Max Clocks
+        Graphics                          : 2430 MHz
+        Memory                            : 12481 MHz
+`)
+
+	infoByIndex := map[int]benchmarkGPUInfo{
+		0: {Index: 0, BusID: "00000000:4E:00.0"},
+		1: {Index: 1, BusID: "00000000:4F:00.0"},
+	}
+
+	enrichGPUInfoWithMaxClocks(infoByIndex, nvsmiQ)
+
+	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("GPU 0 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[0].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[0].MaxMemoryClockMHz != 12481 {
+		t.Errorf("GPU 0 MaxMemoryClockMHz = %v, want 12481", infoByIndex[0].MaxMemoryClockMHz)
+	}
+	if infoByIndex[1].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("GPU 1 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[1].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[1].MaxMemoryClockMHz != 12481 {
+		t.Errorf("GPU 1 MaxMemoryClockMHz = %v, want 12481", infoByIndex[1].MaxMemoryClockMHz)
+	}
+}
+
+func TestEnrichGPUInfoWithMaxClocksSkipsPopulated(t *testing.T) {
+	t.Parallel()
+
+	nvsmiQ := []byte(`
+GPU 00000000:4E:00.0
+    Max Clocks
+        Graphics                          : 9999 MHz
+        Memory                            : 9999 MHz
+`)
+	// Already populated — must not be overwritten.
+	infoByIndex := map[int]benchmarkGPUInfo{
+		0: {Index: 0, BusID: "00000000:4E:00.0", MaxGraphicsClockMHz: 2430, MaxMemoryClockMHz: 12481},
+	}
+
+	enrichGPUInfoWithMaxClocks(infoByIndex, nvsmiQ)
+
+	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("expected existing value to be preserved, got %v", infoByIndex[0].MaxGraphicsClockMHz)
+	}
+}
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -2,6 +2,29 @@ package platform

 import "time"

+// BenchmarkHostConfig holds static CPU and memory configuration captured at
+// benchmark start. Useful for correlating results across runs on different hardware.
+type BenchmarkHostConfig struct {
+	CPUModel    string  `json:"cpu_model,omitempty"`
+	CPUSockets  int     `json:"cpu_sockets,omitempty"`
+	CPUCores    int     `json:"cpu_cores,omitempty"`
+	CPUThreads  int     `json:"cpu_threads,omitempty"`
+	MemTotalGiB float64 `json:"mem_total_gib,omitempty"`
+}
+
+// BenchmarkCPULoad summarises host CPU utilisation sampled during the GPU
+// steady-state phase. High or unstable CPU load during a GPU benchmark may
+// indicate a competing workload or a CPU-bound driver bottleneck.
+type BenchmarkCPULoad struct {
+	AvgPct  float64 `json:"avg_pct"`
+	MaxPct  float64 `json:"max_pct"`
+	P95Pct  float64 `json:"p95_pct"`
+	Samples int     `json:"samples"`
+	// Status is "ok", "high", or "unstable".
+	Status string `json:"status"`
+	Note   string `json:"note,omitempty"`
+}
+
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
@@ -14,7 +37,10 @@ type NvidiaBenchmarkOptions struct {
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	RunNCCL           bool
-	ParallelGPUs      bool // run all selected GPUs simultaneously instead of sequentially
+	ParallelGPUs      bool   // run all selected GPUs simultaneously instead of sequentially
+	RampStep          int    // 1-based step index within a ramp-up run (0 = not a ramp-up)
+	RampTotal         int    // total number of ramp-up steps in this run
+	RampRunID         string // shared identifier across all steps of the same ramp-up run
 }


@@ -25,11 +51,17 @@ type NvidiaBenchmarkResult struct {
 	ServerModel        string                       `json:"server_model,omitempty"`
 	BenchmarkProfile   string                       `json:"benchmark_profile"`
 	ParallelGPUs       bool                         `json:"parallel_gpus,omitempty"`
+	RampStep           int                          `json:"ramp_step,omitempty"`
+	RampTotal          int                          `json:"ramp_total,omitempty"`
+	RampRunID          string                       `json:"ramp_run_id,omitempty"`
+	ScalabilityScore   float64                      `json:"scalability_score,omitempty"`
 	OverallStatus      string                       `json:"overall_status"`
 	SelectedGPUIndices []int                        `json:"selected_gpu_indices"`
 	Findings           []string                     `json:"findings,omitempty"`
 	Warnings           []string                     `json:"warnings,omitempty"`
 	Normalization      BenchmarkNormalization       `json:"normalization"`
+	HostConfig         *BenchmarkHostConfig         `json:"host_config,omitempty"`
+	CPULoad            *BenchmarkCPULoad            `json:"cpu_load,omitempty"`
 	GPUs               []BenchmarkGPUResult         `json:"gpus"`
 	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
 	ServerPower        *BenchmarkServerPower        `json:"server_power,omitempty"`
@@ -63,6 +95,11 @@ type BenchmarkGPUResult struct {
 	PowerLimitW            float64                    `json:"power_limit_w,omitempty"`
 	MultiprocessorCount    int                        `json:"multiprocessor_count,omitempty"`
 	DefaultPowerLimitW     float64                    `json:"default_power_limit_w,omitempty"`
+	// CalibratedPeakPowerW is the p95 power measured during a short
+	// dcgmi targeted_power calibration run before the main benchmark.
+	// Used as the reference denominator for PowerSustainScore instead of
+	// the hardware default limit, which bee-gpu-burn cannot reach.
+	CalibratedPeakPowerW   float64                    `json:"calibrated_peak_power_w,omitempty"`
 	MaxGraphicsClockMHz    float64                    `json:"max_graphics_clock_mhz,omitempty"`
 	BaseGraphicsClockMHz   float64                    `json:"base_graphics_clock_mhz,omitempty"`
 	MaxMemoryClockMHz      float64                    `json:"max_memory_clock_mhz,omitempty"`
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -14,9 +14,17 @@ import (
 func (s *System) IsLiveMediaInRAM() bool {
 	fsType := mountFSType("/run/live/medium")
 	if fsType == "" {
+		// No medium mount at all — fall back to toram kernel parameter.
 		return toramActive()
 	}
-	return strings.EqualFold(fsType, "tmpfs")
+	if strings.EqualFold(fsType, "tmpfs") {
+		return true
+	}
+	// When RunInstallToRAM copies squashfs to /dev/shm/bee-live but the bind
+	// mount of /run/live/medium fails (common for CD-ROM boots), the medium
+	// fstype still shows the CD-ROM type. Check whether the RAM copy exists.
+	files, _ := filepath.Glob("/dev/shm/bee-live/*.squashfs")
+	return len(files) > 0
 }

 func (s *System) LiveBootSource() LiveBootSource {
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -161,13 +161,7 @@ func (s *System) RunPlatformStress(
 	}
 	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)

-	// Pack tar.gz
-	archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
-	if err := packPlatformDir(runDir, archivePath); err != nil {
-		return "", fmt.Errorf("pack archive: %w", err)
-	}
-	_ = os.RemoveAll(runDir)
-	return archivePath, nil
+	return runDir, nil
 }

 // collectPhase samples live metrics every second until ctx is done.
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -1,6 +1,7 @@
 package platform

 import (
+	"bufio"
 	"os"
 	"os/exec"
 	"strings"
@@ -114,6 +115,8 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
 	}

 	s.collectGPURuntimeHealth(vendor, &health)
+	s.collectToRAMHealth(&health)
+	s.collectUSBExportHealth(&health)

 	if health.Status != "FAILED" && len(health.Issues) > 0 {
 		health.Status = "PARTIAL"
@@ -168,6 +171,96 @@ func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	return ToolStatus{Name: display}
 }

+// collectToRAMHealth checks whether the LiveCD ISO has been copied to RAM.
+// Status values: "ok" = in RAM, "warning" = toram not active (no copy attempted),
+// "failed" = toram was requested but medium is not in RAM (copy failed or in progress).
+func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) {
+	inRAM := s.IsLiveMediaInRAM()
+	active := toramActive()
+	switch {
+	case inRAM:
+		health.ToRAMStatus = "ok"
+	case active:
+		// toram was requested but medium is not yet/no longer in RAM
+		health.ToRAMStatus = "failed"
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "toram_copy_failed",
+			Severity:    "warning",
+			Description: "toram boot parameter is set but the live medium is not mounted from RAM.",
+		})
+	default:
+		health.ToRAMStatus = "warning"
+	}
+}
+
+// collectUSBExportHealth scans /proc/mounts for a writable USB-backed filesystem
+// suitable for log export. Sets USBExportPath to the first match found.
+func (s *System) collectUSBExportHealth(health *schema.RuntimeHealth) {
+	health.USBExportPath = findUSBExportMount()
+}
+
+// findUSBExportMount returns the mount point of the first writable USB filesystem
+// found in /proc/mounts (vfat, exfat, ext2/3/4, ntfs) whose backing block device
+// has USB transport. Returns "" if none found.
+func findUSBExportMount() string {
+	f, err := os.Open("/proc/mounts")
+	if err != nil {
+		return ""
+	}
+	defer f.Close()
+
+	// fs types that are expected on USB export drives
+	exportFSTypes := map[string]bool{
+		"vfat":  true,
+		"exfat": true,
+		"ext2":  true,
+		"ext3":  true,
+		"ext4":  true,
+		"ntfs":  true,
+		"ntfs3": true,
+		"fuseblk": true,
+	}
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		// fields: device mountpoint fstype options dump pass
+		fields := strings.Fields(scanner.Text())
+		if len(fields) < 4 {
+			continue
+		}
+		device, mountPoint, fsType, options := fields[0], fields[1], fields[2], fields[3]
+		if !exportFSTypes[strings.ToLower(fsType)] {
+			continue
+		}
+		// Skip read-only mounts
+		opts := strings.Split(options, ",")
+		readOnly := false
+		for _, o := range opts {
+			if strings.TrimSpace(o) == "ro" {
+				readOnly = true
+				break
+			}
+		}
+		if readOnly {
+			continue
+		}
+		// Check USB transport via lsblk on the device (or its parent disk for partitions).
+		if !strings.HasPrefix(device, "/dev/") {
+			continue
+		}
+		checkDev := device
+		// lsblk only reports TRAN for the whole disk, not for partitions (e.g. /dev/sdc1).
+		// Strip trailing partition digits to get the parent disk name.
+		if trimmed := strings.TrimRight(device, "0123456789"); trimmed != device && len(trimmed) > len("/dev/") {
+			checkDev = trimmed
+		}
+		if blockDeviceTransport(checkDev) == "usb" {
+			return mountPoint
+		}
+	}
+	return ""
+}
+
 func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
 	lsmodText := commandText("lsmod")

--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -662,11 +662,7 @@ func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, e
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
 		return "", err
 	}
-	archive := filepath.Join(baseDir, "storage-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 type satJob struct {
@@ -852,11 +848,7 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 		}
 	}

-	archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 func updateNvidiaGPUStatus(perGPU map[int]*nvidiaGPUStatusFile, idx int, status, jobName, detail string) {
@@ -919,7 +911,7 @@ func writeNvidiaGPUStatusFiles(runDir, overall string, perGPU map[int]*nvidiaGPU
 			entry.Health = "UNKNOWN"
 		}
 		if entry.Name == "" {
-			entry.Name = "unknown"
+			entry.Name = "Unknown GPU"
 		}
 		var body strings.Builder
 		fmt.Fprintf(&body, "gpu_index=%d\n", entry.Index)
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -223,11 +223,7 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
 		return "", err
 	}

-	archive := filepath.Join(baseDir, "fan-stress-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 func applyFanStressDefaults(opts *FanStressOptions) {
--- a/audit/internal/platform/techdump.go
+++ b/audit/internal/platform/techdump.go
@@ -20,6 +20,7 @@ var techDumpFixedCommands = []struct {
 	{Name: "dmidecode", Args: []string{"-t", "4"}, File: "dmidecode-type4.txt"},
 	{Name: "dmidecode", Args: []string{"-t", "17"}, File: "dmidecode-type17.txt"},
 	{Name: "lspci", Args: []string{"-vmm", "-D"}, File: "lspci-vmm.txt"},
+	{Name: "lspci", Args: []string{"-vvv"}, File: "lspci-vvv.txt"},
 	{Name: "lsblk", Args: []string{"-J", "-d", "-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL"}, File: "lsblk.json"},
 	{Name: "sensors", Args: []string{"-j"}, File: "sensors.json"},
 	{Name: "ipmitool", Args: []string{"fru", "print"}, File: "ipmitool-fru.txt"},
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -22,6 +22,10 @@ type RuntimeHealth struct {
 	CUDAReady     bool                   `json:"cuda_ready,omitempty"`
 	NvidiaGSPMode string                 `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
 	NetworkStatus string                 `json:"network_status,omitempty"`
+	// ToRAMStatus: "ok" (ISO in RAM), "warning" (toram not active), "failed" (toram active but copy failed)
+	ToRAMStatus   string `json:"toram_status,omitempty"`
+	// USBExportPath: mount point of the first writable USB drive found, empty if none.
+	USBExportPath string `json:"usb_export_path,omitempty"`
 	Issues        []RuntimeIssue         `json:"issues,omitempty"`
 	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
 	Services      []RuntimeServiceStatus `json:"services,omitempty"`
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -12,6 +12,7 @@ import (
 	"path/filepath"
 	"regexp"
 	"sort"
+	"strconv"
 	"strings"
 	"sync/atomic"
 	"syscall"
@@ -209,6 +210,14 @@ func joinTaskIndices(indices []int) string {
 	return strings.Join(parts, ",")
 }

+func formatGPUIndexList(indices []int) string {
+	parts := make([]string, len(indices))
+	for i, idx := range indices {
+		parts[i] = strconv.Itoa(idx)
+	}
+	return strings.Join(parts, ",")
+}
+
 func formatSplitTaskName(baseName, selectionLabel string) string {
 	baseName = strings.TrimSpace(baseName)
 	selectionLabel = strings.TrimSpace(selectionLabel)
@@ -540,6 +549,7 @@ func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Req
 		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
 		RunNCCL           *bool  `json:"run_nccl"`
 		ParallelGPUs      *bool  `json:"parallel_gpus"`
+		RampUp            *bool  `json:"ramp_up"`
 		DisplayName       string `json:"display_name"`
 	}
 	if r.Body != nil {
@@ -557,10 +567,82 @@ func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Req
 	if body.ParallelGPUs != nil {
 		parallelGPUs = *body.ParallelGPUs
 	}
+	rampUp := false
+	if body.RampUp != nil {
+		rampUp = *body.RampUp
+	}
+	// Build a descriptive base name that includes profile and mode so the task
+	// list is self-explanatory without opening individual task detail pages.
+	profile := strings.TrimSpace(body.Profile)
+	if profile == "" {
+		profile = "standard"
+	}
 	name := taskDisplayName("nvidia-benchmark", "", "")
 	if strings.TrimSpace(body.DisplayName) != "" {
 		name = body.DisplayName
 	}
+	// Append profile tag.
+	name = fmt.Sprintf("%s · %s", name, profile)
+
+	if rampUp && len(body.GPUIndices) > 1 {
+		// Ramp-up mode: resolve GPU list, then create one task per prefix
+		// [gpu0], [gpu0,gpu1], ..., [gpu0,...,gpuN-1], each running in parallel.
+		gpus, err := apiListNvidiaGPUs(h.opts.App)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, err.Error())
+			return
+		}
+		resolved, err := expandSelectedGPUIndices(gpus, body.GPUIndices, body.ExcludeGPUIndices)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, err.Error())
+			return
+		}
+		if len(resolved) < 2 {
+			// Fall through to normal single-task path.
+			rampUp = false
+		} else {
+			now := time.Now()
+			rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405"))
+			var allTasks []*Task
+			for step := 1; step <= len(resolved); step++ {
+				subset := resolved[:step]
+				stepName := fmt.Sprintf("%s · ramp %d/%d · GPU %s", name, step, len(resolved), formatGPUIndexList(subset))
+				t := &Task{
+					ID:        newJobID("benchmark-nvidia"),
+					Name:      stepName,
+					Target:    "nvidia-benchmark",
+					Priority:  15,
+					Status:    TaskPending,
+					CreatedAt: now,
+					params: taskParams{
+						GPUIndices:       append([]int(nil), subset...),
+						SizeMB:           body.SizeMB,
+						BenchmarkProfile: body.Profile,
+						RunNCCL:          runNCCL && step == len(resolved),
+						ParallelGPUs:     true,
+						RampStep:         step,
+						RampTotal:        len(resolved),
+						RampRunID:        rampRunID,
+						DisplayName:      stepName,
+					},
+				}
+				allTasks = append(allTasks, t)
+			}
+			for _, t := range allTasks {
+				globalQueue.enqueue(t)
+			}
+			writeTaskRunResponse(w, allTasks)
+			return
+		}
+	}
+
+	// For non-ramp tasks append mode tag.
+	if parallelGPUs {
+		name = fmt.Sprintf("%s · parallel", name)
+	} else {
+		name = fmt.Sprintf("%s · sequential", name)
+	}
+
 	tasks, err := buildNvidiaTaskSet("nvidia-benchmark", 15, time.Now(), taskParams{
 		GPUIndices:        body.GPUIndices,
 		ExcludeGPUIndices: body.ExcludeGPUIndices,
--- a/audit/internal/webui/charts_svg.go
+++ b/audit/internal/webui/charts_svg.go
@@ -83,6 +83,10 @@ func renderMetricChartSVG(title string, labels []string, times []time.Time, data
 		}
 	}

+	// Downsample to at most ~1400 points (one per pixel) before building SVG.
+	times, datasets = downsampleTimeSeries(times, datasets, 1400)
+	pointCount = len(times)
+
 	statsLabel := chartStatsLabel(datasets)

 	legendItems := []metricChartSeries{}
@@ -196,6 +200,19 @@ func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, s
 		}
 	}

+	// Downsample to at most ~1400 points before building SVG.
+	{
+		datasets := make([][]float64, len(series))
+		for i := range series {
+			datasets[i] = series[i].Values
+		}
+		times, datasets = downsampleTimeSeries(times, datasets, 1400)
+		pointCount = len(times)
+		for i := range series {
+			series[i].Values = datasets[i]
+		}
+	}
+
 	scales := make([]chartScale, len(series))
 	for i := range series {
 		min, max := chartSeriesBounds(series[i].Values)
@@ -626,6 +643,87 @@ func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end
 	b.WriteString(`</g>` + "\n")
 }

+// downsampleTimeSeries reduces the time series to at most maxPts points using
+// min-max bucketing. Each bucket contributes the index of its min and max value
+// (using the first full-length dataset as the reference series). All parallel
+// datasets are sampled at those same indices so all series stay aligned.
+// If len(times) <= maxPts the inputs are returned unchanged.
+func downsampleTimeSeries(times []time.Time, datasets [][]float64, maxPts int) ([]time.Time, [][]float64) {
+	n := len(times)
+	if n <= maxPts || maxPts <= 0 {
+		return times, datasets
+	}
+	buckets := maxPts / 2
+	if buckets < 1 {
+		buckets = 1
+	}
+	// Use the first dataset that has the same length as times as the reference
+	// for deciding which two indices to keep per bucket.
+	var ref []float64
+	for _, ds := range datasets {
+		if len(ds) == n {
+			ref = ds
+			break
+		}
+	}
+	selected := make([]int, 0, maxPts)
+	bucketSize := float64(n) / float64(buckets)
+	for b := 0; b < buckets; b++ {
+		lo := int(math.Round(float64(b) * bucketSize))
+		hi := int(math.Round(float64(b+1) * bucketSize))
+		if hi > n {
+			hi = n
+		}
+		if lo >= hi {
+			continue
+		}
+		if ref == nil {
+			selected = append(selected, lo)
+			if hi-1 != lo {
+				selected = append(selected, hi-1)
+			}
+			continue
+		}
+		minIdx, maxIdx := lo, lo
+		for i := lo + 1; i < hi; i++ {
+			if ref[i] < ref[minIdx] {
+				minIdx = i
+			}
+			if ref[i] > ref[maxIdx] {
+				maxIdx = i
+			}
+		}
+		if minIdx <= maxIdx {
+			selected = append(selected, minIdx)
+			if maxIdx != minIdx {
+				selected = append(selected, maxIdx)
+			}
+		} else {
+			selected = append(selected, maxIdx)
+			if minIdx != maxIdx {
+				selected = append(selected, minIdx)
+			}
+		}
+	}
+	outTimes := make([]time.Time, len(selected))
+	for i, idx := range selected {
+		outTimes[i] = times[idx]
+	}
+	outDatasets := make([][]float64, len(datasets))
+	for d, ds := range datasets {
+		if len(ds) != n {
+			outDatasets[d] = ds
+			continue
+		}
+		out := make([]float64, len(selected))
+		for i, idx := range selected {
+			out[i] = ds[idx]
+		}
+		outDatasets[d] = out
+	}
+	return outTimes, outDatasets
+}
+
 func chartXForTime(ts, start, end time.Time, left, right int) float64 {
 	if !end.After(start) {
 		return float64(left+right) / 2
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -330,6 +330,33 @@ func renderHardwareSummaryCard(opts HandlerOptions) string {

 	var b strings.Builder
 	b.WriteString(`<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body">`)
+
+	// Server identity block above the component table.
+	{
+		var model, serial string
+		parts := []string{}
+		if hw.Board.Manufacturer != nil && strings.TrimSpace(*hw.Board.Manufacturer) != "" {
+			parts = append(parts, strings.TrimSpace(*hw.Board.Manufacturer))
+		}
+		if hw.Board.ProductName != nil && strings.TrimSpace(*hw.Board.ProductName) != "" {
+			parts = append(parts, strings.TrimSpace(*hw.Board.ProductName))
+		}
+		if len(parts) > 0 {
+			model = strings.Join(parts, " ")
+		}
+		serial = strings.TrimSpace(hw.Board.SerialNumber)
+		if model != "" || serial != "" {
+			b.WriteString(`<div style="margin-bottom:14px">`)
+			if model != "" {
+				fmt.Fprintf(&b, `<div style="font-size:16px;font-weight:700;margin-bottom:2px">%s</div>`, html.EscapeString(model))
+			}
+			if serial != "" {
+				fmt.Fprintf(&b, `<div style="font-size:12px;color:var(--muted)">S/N: %s</div>`, html.EscapeString(serial))
+			}
+			b.WriteString(`</div>`)
+		}
+	}
+
 	b.WriteString(`<table style="width:auto">`)
 	writeRow := func(label, value, badgeHTML string) {
 		b.WriteString(fmt.Sprintf(`<tr><td style="padding:6px 14px 6px 0;font-weight:700;white-space:nowrap">%s</td><td style="padding:6px 0;color:var(--muted);font-size:13px">%s</td><td style="padding:6px 0 6px 12px">%s</td></tr>`,
@@ -349,6 +376,9 @@ func renderHardwareSummaryCard(opts HandlerOptions) string {
 	writeRow("GPU", hwDescribeGPU(hw), runtimeStatusBadge(gpuRow.Status))

 	psuRow := aggregateComponentStatus("PSU", records, nil, []string{"psu:"})
+	if psuRow.Status == "UNKNOWN" && len(hw.PowerSupplies) > 0 {
+		psuRow.Status = hwPSUStatus(hw.PowerSupplies)
+	}
 	writeRow("PSU", hwDescribePSU(hw), runtimeStatusBadge(psuRow.Status))

 	if nicDesc := hwDescribeNIC(hw); nicDesc != "" {
@@ -506,6 +536,31 @@ func hwDescribeGPU(hw schema.HardwareSnapshot) string {
 	return strings.Join(parts, ", ")
 }

+// hwPSUStatus returns "OK", "CRITICAL", "WARNING", or "UNKNOWN" based on
+// PSU statuses from the audit snapshot. Used as fallback when component-status.json
+// has no psu: records yet (e.g. first boot before audit writes them).
+func hwPSUStatus(psus []schema.HardwarePowerSupply) string {
+	worst := "UNKNOWN"
+	for _, psu := range psus {
+		if psu.Status == nil {
+			continue
+		}
+		switch strings.ToUpper(strings.TrimSpace(*psu.Status)) {
+		case "CRITICAL":
+			return "CRITICAL"
+		case "WARNING":
+			if worst != "CRITICAL" {
+				worst = "WARNING"
+			}
+		case "OK":
+			if worst == "UNKNOWN" {
+				worst = "OK"
+			}
+		}
+	}
+	return worst
+}
+
 // hwDescribePSU returns a summary like "2× 1600 W" or "2× PSU".
 func hwDescribePSU(hw schema.HardwareSnapshot) string {
 	n := len(hw.PowerSupplies)
@@ -646,6 +701,8 @@ func renderHealthCard(opts HandlerOptions) string {
 		buildRuntimeAccelerationRow(health),
 		buildRuntimeToolsRow(health),
 		buildRuntimeServicesRow(health),
+		buildRuntimeUSBExportRow(health),
+		buildRuntimeToRAMRow(health),
 	}
 	b.WriteString(`<table><thead><tr><th>Check</th><th>Status</th><th>Source</th><th>Issue</th></tr></thead><tbody>`)
 	for _, row := range rows {
@@ -742,7 +799,13 @@ func buildRuntimeServicesRow(health schema.RuntimeHealth) runtimeHealthRow {
 	nonActive := make([]string, 0)
 	for _, svc := range health.Services {
 		state := strings.TrimSpace(strings.ToLower(svc.Status))
-		if state != "active" {
+		// "activating" and "deactivating" are transient states for oneshot services
+		// (RemainAfterExit=yes) — the service is running normally, not failed.
+		// Only "failed" and "inactive" (after services should be running) are problems.
+		switch state {
+		case "active", "activating", "deactivating", "reloading":
+			// OK — service is running or transitioning normally
+		default:
 			nonActive = append(nonActive, svc.Name+"="+svc.Status)
 		}
 	}
@@ -755,6 +818,51 @@ func buildRuntimeServicesRow(health schema.RuntimeHealth) runtimeHealthRow {
 	return runtimeHealthRow{Title: "Bee Services", Status: status, Source: "ServiceState", Issue: issue}
 }

+func buildRuntimeUSBExportRow(health schema.RuntimeHealth) runtimeHealthRow {
+	path := strings.TrimSpace(health.USBExportPath)
+	if path != "" {
+		return runtimeHealthRow{
+			Title:  "USB Export Drive",
+			Status: "OK",
+			Source: "/proc/mounts + lsblk",
+			Issue:  path,
+		}
+	}
+	return runtimeHealthRow{
+		Title:  "USB Export Drive",
+		Status: "WARNING",
+		Source: "/proc/mounts + lsblk",
+		Issue:  "No writable USB drive mounted. Plug in a USB drive to enable log export.",
+	}
+}
+
+func buildRuntimeToRAMRow(health schema.RuntimeHealth) runtimeHealthRow {
+	switch strings.ToLower(strings.TrimSpace(health.ToRAMStatus)) {
+	case "ok":
+		return runtimeHealthRow{
+			Title:  "LiveCD in RAM",
+			Status: "OK",
+			Source: "live-boot / /proc/mounts",
+			Issue:  "",
+		}
+	case "failed":
+		return runtimeHealthRow{
+			Title:  "LiveCD in RAM",
+			Status: "FAILED",
+			Source: "live-boot / /proc/mounts",
+			Issue:  "toram boot parameter set but ISO is not mounted from RAM. Copy may have failed.",
+		}
+	default:
+		// toram not active — ISO still on original boot media (USB/CD)
+		return runtimeHealthRow{
+			Title:  "LiveCD in RAM",
+			Status: "WARNING",
+			Source: "live-boot / /proc/mounts",
+			Issue:  "ISO not copied to RAM. Use \u201cCopy to RAM\u201d to free the boot drive and improve performance.",
+		}
+	}
+}
+
 func buildHardwareComponentRows(exportDir string) []runtimeHealthRow {
 	path := filepath.Join(exportDir, "component-status.json")
 	db, err := app.OpenComponentStatusDB(path)
@@ -1198,9 +1306,6 @@ func renderValidate(opts HandlerOptions) string {
 	<div class="card" style="margin-bottom:16px">
 	  <div class="card-head">Validate Profile</div>
 	  <div class="card-body validate-profile-body">
-	    <div class="validate-profile-col">
-	      <div class="form-row" style="margin:0"><label>Cycles</label><input type="number" id="sat-cycles" value="1" min="1" max="100" style="width:100%"></div>
-	    </div>
 	    <div class="validate-profile-col">
 	      <div class="form-row" style="margin:12px 0 0"><label>Mode</label></div>
 	      <label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-validate" value="validate" checked onchange="satModeChanged()"><span>Validate — quick non-destructive check</span></label>
@@ -1250,22 +1355,16 @@ func renderValidate(opts HandlerOptions) string {
      <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
    </div>
    <p id="sat-gpu-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA validate tasks.</p>
-    <div style="margin-top:10px;padding-top:10px;border-top:1px solid var(--border)">
-      <label class="sat-gpu-row" title="When checked, multi-GPU tests (PSU Pulse, NCCL, NVBandwidth) run on ALL GPUs in the system regardless of the selection above.">
-        <input type="checkbox" id="sat-multi-gpu-all" checked onchange="satUpdateGPUSelectionNote()">
-        <span><strong>Multi-GPU tests</strong> — use all GPUs <span style="font-size:11px;color:var(--muted)">(PSU Pulse, NCCL, NVBandwidth)</span></span>
-      </label>
-    </div>
  </div>
 </div>

 <div class="grid3">
 ` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody(
-			inv.NVIDIA,
-			`Runs NVIDIA diagnostics and board inventory checks.`,
-			`<code>nvidia-smi</code>, <code>dmidecode</code>, <code>dcgmi diag</code>`,
-			`Level 2 in Validate, Level 3 in Stress. Runs one GPU at a time on the selected NVIDIA GPUs.`,
-		)) +
+		inv.NVIDIA,
+		`Runs NVIDIA diagnostics and board inventory checks.`,
+		`<code>nvidia-smi</code>, <code>dmidecode</code>, <code>dcgmi diag</code>`,
+		`Level 2 in Validate, Level 3 in Stress. Runs one GPU at a time on the selected NVIDIA GPUs.`,
+	)) +
 		`<div id="sat-card-nvidia-targeted-stress">` +
 		renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody(
 			inv.NVIDIA,
@@ -1374,10 +1473,6 @@ function satSelectedGPUIndices() {
    .filter(function(v) { return !Number.isNaN(v); })
    .sort(function(a, b) { return a - b; });
 }
-function satMultiGPUAll() {
-  const cb = document.getElementById('sat-multi-gpu-all');
-  return cb ? cb.checked : true;
-}
 function satUpdateGPUSelectionNote() {
  const note = document.getElementById('sat-gpu-selection-note');
  if (!note) return;
@@ -1386,8 +1481,7 @@ function satUpdateGPUSelectionNote() {
    note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA validate tasks.';
    return;
  }
-  const multiAll = satMultiGPUAll();
-  note.textContent = 'Selected GPUs: ' + selected.join(', ') + '. Multi-GPU tests: ' + (multiAll ? 'all GPUs in system' : 'selected GPUs only') + '.';
+  note.textContent = 'Selected GPUs: ' + selected.join(', ') + '. Multi-GPU tests will use all selected GPUs.';
 }
 function satRenderGPUList(gpus) {
  const root = document.getElementById('sat-gpu-list');
@@ -1501,15 +1595,8 @@ const nvidiaPerGPUTargets = ['nvidia', 'nvidia-targeted-stress', 'nvidia-targete
 // pulse_test and fabric tests run on all selected GPUs simultaneously
 const nvidiaAllGPUTargets = ['nvidia-pulse', 'nvidia-interconnect', 'nvidia-bandwidth'];
 function satAllGPUIndicesForMulti() {
-  // If "Multi-GPU tests — all GPUs" is checked, return all detected GPUs.
-  // Otherwise fall back to the per-GPU selection.
-  if (satMultiGPUAll()) {
-    return loadSatNvidiaGPUs().then(function(gpus) {
-      return gpus.map(function(g) { return Number(g.index); });
-    });
-  }
-  const sel = satSelectedGPUIndices();
-  return Promise.resolve(sel);
+  // Multi-GPU tests always use the current GPU selection.
+  return Promise.resolve(satSelectedGPUIndices());
 }
 function expandSATTarget(target) {
  if (nvidiaAllGPUTargets.indexOf(target) >= 0) {
@@ -1599,7 +1686,7 @@ function runAMDValidateSet() {
  return runNext(0);
 }
 function runAllSAT() {
-  const cycles = Math.max(1, parseInt(document.getElementById('sat-cycles').value)||1);
+  const cycles = 1;
  const status = document.getElementById('sat-all-status');
  status.textContent = 'Enqueuing...';
  const stressOnlyTargets = ['nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse', 'nvidia-interconnect', 'nvidia-bandwidth'];
@@ -1777,6 +1864,11 @@ func formatValidateDeviceSummary(total int, models map[string]int, unit string)
 	if total != 1 {
 		label += "s"
 	}
+	// If there is only one model the leading count duplicates the per-model
+	// count already in parts (e.g. "4 GPU: 4 x RTX …" → "4 x RTX …").
+	if len(parts) == 1 {
+		return parts[0] + " " + label
+	}
 	return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", "))
 }

@@ -1881,12 +1973,16 @@ func renderBenchmark(opts HandlerOptions) string {
        </div>
      </div>
      <label class="benchmark-cb-row">
-        <input type="checkbox" id="benchmark-parallel-gpus">
-        <span>Run all selected GPUs simultaneously (parallel mode)</span>
+        <input type="radio" name="benchmark-mode" value="sequential" onchange="benchmarkUpdateSelectionNote()">
+        <span>Sequential — one GPU at a time</span>
      </label>
-      <label class="benchmark-cb-row">
-        <input type="checkbox" id="benchmark-run-nccl" checked>
-        <span>Run multi-GPU interconnect step (NCCL) only on the selected GPUs</span>
+      <label class="benchmark-cb-row" id="benchmark-parallel-label">
+        <input type="radio" name="benchmark-mode" value="parallel" onchange="benchmarkUpdateSelectionNote()">
+        <span>Parallel — all selected GPUs simultaneously</span>
+      </label>
+      <label class="benchmark-cb-row" id="benchmark-ramp-label">
+        <input type="radio" name="benchmark-mode" value="ramp-up" checked onchange="benchmarkUpdateSelectionNote()">
+        <span>Ramp-up — 1 GPU → 2 → … → all selected (separate tasks)</span>
      </label>
      <p id="benchmark-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 14px">Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.</p>
      <button id="benchmark-run-btn" class="btn btn-primary" onclick="runNvidiaBenchmark()" disabled>&#9654; Run Benchmark</button>
@@ -1939,22 +2035,28 @@ function benchmarkSelectedGPUIndices() {
    .sort(function(a, b) { return a - b; });
 }

+function benchmarkMode() {
+  const el = document.querySelector('input[name="benchmark-mode"]:checked');
+  return el ? el.value : 'sequential';
+}
+
 function benchmarkUpdateSelectionNote() {
  const selected = benchmarkSelectedGPUIndices();
  const btn = document.getElementById('benchmark-run-btn');
  const note = document.getElementById('benchmark-selection-note');
-  const nccl = document.getElementById('benchmark-run-nccl');
  if (!selected.length) {
    btn.disabled = true;
    note.textContent = 'Select at least one NVIDIA GPU to run the benchmark.';
    return;
  }
  btn.disabled = false;
-  note.textContent = 'Selected GPUs: ' + selected.join(', ') + '.';
-  if (nccl && nccl.checked && selected.length < 2) {
-    note.textContent += ' NCCL will be skipped because fewer than 2 GPUs are selected.';
-  } else if (nccl && nccl.checked) {
-    note.textContent += ' NCCL interconnect will use only these GPUs.';
+  const mode = benchmarkMode();
+  if (mode === 'ramp-up') {
+    note.textContent = 'Ramp-up: ' + selected.length + ' tasks (1 GPU → ' + selected.length + ' GPUs). NCCL on final step.';
+  } else if (mode === 'parallel') {
+    note.textContent = 'Parallel: all ' + selected.length + ' GPU(s) simultaneously.' + (selected.length > 1 ? ' NCCL included.' : '');
+  } else {
+    note.textContent = 'Sequential: each GPU benchmarked separately.' + (selected.length > 1 ? ' NCCL included on each.' : '');
  }
 }

@@ -1972,6 +2074,33 @@ function benchmarkRenderGPUList(gpus) {
      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
      + '</label>';
  }).join('');
+  benchmarkApplyMultiGPUState(gpus.length);
+  benchmarkUpdateSelectionNote();
+}
+
+// Disable radio options that require multiple GPUs when only one is present.
+function benchmarkApplyMultiGPUState(gpuCount) {
+  var multiValues = ['parallel', 'ramp-up'];
+  var radios = document.querySelectorAll('input[name="benchmark-mode"]');
+  radios.forEach(function(el) {
+    var isMulti = multiValues.indexOf(el.value) >= 0;
+    if (gpuCount < 2 && isMulti) {
+      el.disabled = true;
+      if (el.checked) {
+        // fall back to sequential
+        var seq = document.querySelector('input[name="benchmark-mode"][value="sequential"]');
+        if (seq) seq.checked = true;
+      }
+      var label = el.closest('label');
+      if (label) label.style.opacity = '0.4';
+    } else {
+      el.disabled = false;
+      // restore default: ramp-up checked when ≥2 GPUs
+      if (gpuCount >= 2 && el.value === 'ramp-up') el.checked = true;
+      var label = el.closest('label');
+      if (label) label.style.opacity = '';
+    }
+  });
  benchmarkUpdateSelectionNote();
 }

@@ -2009,12 +2138,15 @@ function runNvidiaBenchmark() {
    return;
  }
  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
-  const parallelGPUs = !!document.getElementById('benchmark-parallel-gpus').checked;
+  const mode = benchmarkMode();
+  const rampUp = mode === 'ramp-up' && selected.length > 1;
+  const parallelGPUs = mode === 'parallel';
  const body = {
    profile: document.getElementById('benchmark-profile').value || 'standard',
    gpu_indices: selected,
-    run_nccl: !!document.getElementById('benchmark-run-nccl').checked,
+    run_nccl: selected.length > 1,
    parallel_gpus: parallelGPUs,
+    ramp_up: rampUp,
    display_name: 'NVIDIA Benchmark'
  };
  document.getElementById('benchmark-output').style.display = 'block';
@@ -2069,7 +2201,6 @@ function runNvidiaBenchmark() {
  });
 }

-document.getElementById('benchmark-run-nccl').addEventListener('change', benchmarkUpdateSelectionNote);
 benchmarkLoadGPUs();
 </script>`
 }
@@ -2285,10 +2416,20 @@ func renderBurn() string {
 	      <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
 	    </div>
 	    <p id="burn-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA burn recipes.</p>
-	    <label class="cb-row" style="margin-top:10px">
-	      <input type="checkbox" id="burn-stagger-nvidia">
-	      <span>Ramp selected NVIDIA GPUs one by one before full-load hold. Uses a 3-minute stabilization window per GPU, then keeps all selected GPUs under load for the chosen Burn Profile duration.</span>
-	    </label>
+	    <div style="display:flex;flex-direction:column;gap:4px;margin-top:10px">
+	      <label class="cb-row">
+	        <input type="radio" name="burn-nvidia-mode" value="sequential" checked>
+	        <span>Sequential — selected GPUs one at a time</span>
+	      </label>
+	      <label class="cb-row" id="burn-parallel-label">
+	        <input type="radio" name="burn-nvidia-mode" value="parallel">
+	        <span>Parallel — all selected GPUs simultaneously</span>
+	      </label>
+	      <label class="cb-row" id="burn-ramp-label">
+	        <input type="radio" name="burn-nvidia-mode" value="ramp-up">
+	        <span>Ramp-up — add one GPU at a time</span>
+	      </label>
+	    </div>
 	  </div>
 	</div>

@@ -2364,9 +2505,30 @@ function burnSelectedGPUIndices() {
    .sort(function(a, b) { return a - b; });
 }

-function burnUseNvidiaRampUp() {
-  const el = document.getElementById('burn-stagger-nvidia');
-  return !!(el && el.checked);
+function burnNvidiaMode() {
+  const el = document.querySelector('input[name="burn-nvidia-mode"]:checked');
+  return el ? el.value : 'sequential';
+}
+
+function burnApplyMultiGPUState(gpuCount) {
+  var multiValues = ['parallel', 'ramp-up'];
+  var radios = document.querySelectorAll('input[name="burn-nvidia-mode"]');
+  radios.forEach(function(el) {
+    var isMulti = multiValues.indexOf(el.value) >= 0;
+    if (gpuCount < 2 && isMulti) {
+      el.disabled = true;
+      if (el.checked) {
+        var seq = document.querySelector('input[name="burn-nvidia-mode"][value="sequential"]');
+        if (seq) seq.checked = true;
+      }
+      var label = el.closest('label');
+      if (label) label.style.opacity = '0.4';
+    } else {
+      el.disabled = false;
+      var label = el.closest('label');
+      if (label) label.style.opacity = '';
+    }
+  });
 }

 function burnUpdateSelectionNote() {
@@ -2393,6 +2555,7 @@ function burnRenderGPUList(gpus) {
      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
      + '</label>';
  }).join('');
+  burnApplyMultiGPUState(gpus.length);
  burnUpdateSelectionNote();
 }

@@ -2428,8 +2591,11 @@ function enqueueBurnTask(target, label, extra, useSelectedNvidia) {
      return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
    }
    body.gpu_indices = selected;
-    if (burnUseNvidiaRampUp() && selected.length > 1) {
+    const bMode = burnNvidiaMode();
+    if (bMode === 'ramp-up' && selected.length > 1) {
      body.stagger_gpu_start = true;
+    } else if (bMode === 'parallel' && selected.length > 1) {
+      body.parallel_gpus = true;
    }
  }
  return fetch('/api/sat/' + target + '/run', {
@@ -3022,7 +3188,6 @@ usbRefresh();
 </script>`
 }

-
 func renderNvidiaSelfHealInline() string {
 	return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Inspect NVIDIA GPU health, restart the bee-nvidia driver service, and issue a per-GPU reset when the driver reports reset required.</p>
 <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px">
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -126,6 +126,9 @@ type taskParams struct {
 	BenchmarkProfile   string   `json:"benchmark_profile,omitempty"`
 	RunNCCL            bool     `json:"run_nccl,omitempty"`
 	ParallelGPUs       bool     `json:"parallel_gpus,omitempty"`
+	RampStep           int      `json:"ramp_step,omitempty"`
+	RampTotal          int      `json:"ramp_total,omitempty"`
+	RampRunID          string   `json:"ramp_run_id,omitempty"`
 	DisplayName        string   `json:"display_name,omitempty"`
 	Device             string   `json:"device,omitempty"` // for install
 	PlatformComponents []string `json:"platform_components,omitempty"`
@@ -152,6 +155,12 @@ type burnPreset struct {
 	DurationSec int
 }

+type nvidiaRampSpec struct {
+	DurationSec      int
+	StaggerSeconds   int
+	TotalDurationSec int
+}
+
 func resolveBurnPreset(profile string) burnPreset {
 	switch profile {
 	case "overnight":
@@ -163,11 +172,43 @@ func resolveBurnPreset(profile string) burnPreset {
 	}
 }

-func boolToNvidiaStaggerSeconds(enabled bool, selected []int) int {
-	if enabled && len(selected) > 1 {
-		return 180
+func resolveNvidiaRampPlan(profile string, enabled bool, selected []int) (nvidiaRampSpec, error) {
+	base := resolveBurnPreset(profile).DurationSec
+	plan := nvidiaRampSpec{
+		DurationSec:      base,
+		TotalDurationSec: base,
 	}
-	return 0
+	if !enabled {
+		return plan, nil
+	}
+	count := len(selected)
+	if count == 0 {
+		return nvidiaRampSpec{}, fmt.Errorf("staggered NVIDIA burn requires explicit GPU selection")
+	}
+	if count == 1 {
+		return plan, nil
+	}
+
+	switch profile {
+	case "acceptance":
+		plan.StaggerSeconds = 10 * 60
+		plan.TotalDurationSec = plan.DurationSec + plan.StaggerSeconds*(count-1)
+	case "overnight":
+		plan.StaggerSeconds = 60 * 60
+		plan.TotalDurationSec = 8 * 60 * 60
+		minTotal := count * 60 * 60
+		if plan.TotalDurationSec < minTotal {
+			plan.TotalDurationSec = minTotal
+		}
+		if plan.TotalDurationSec > 10*60*60 {
+			return nvidiaRampSpec{}, fmt.Errorf("overnight staggered NVIDIA burn supports at most 10 GPUs")
+		}
+		plan.DurationSec = plan.TotalDurationSec - plan.StaggerSeconds*(count-1)
+	default:
+		plan.StaggerSeconds = 2 * 60
+		plan.TotalDurationSec = plan.DurationSec + plan.StaggerSeconds*(count-1)
+	}
+	return plan, nil
 }

 func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
@@ -599,8 +640,11 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			RunNCCL:           t.params.RunNCCL,
 			ParallelGPUs:      t.params.ParallelGPUs,
+			RampStep:          t.params.RampStep,
+			RampTotal:         t.params.RampTotal,
+			RampRunID:         t.params.RampRunID,
 		}, j.append)
-		case "nvidia-compute":
+	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
@@ -609,11 +653,18 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
-			staggerSec := boolToNvidiaStaggerSeconds(t.params.StaggerGPUStart, t.params.GPUIndices)
-			if staggerSec > 0 {
-				j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU", staggerSec))
-			}
-			archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, staggerSec, j.append)
+		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
+		if planErr != nil {
+			err = planErr
+			break
+		}
+		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
+			dur = rampPlan.DurationSec
+		}
+		if rampPlan.StaggerSeconds > 0 {
+			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
+		}
+		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, rampPlan.StaggerSeconds, j.append)
 	case "nvidia-targeted-power":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -663,13 +714,24 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
-			archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
-				DurationSec:       dur,
-				Loader:            t.params.Loader,
-				GPUIndices:        t.params.GPUIndices,
-				ExcludeGPUIndices: t.params.ExcludeGPUIndices,
-				StaggerSeconds:    boolToNvidiaStaggerSeconds(t.params.StaggerGPUStart, t.params.GPUIndices),
-			}, j.append)
+		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
+		if planErr != nil {
+			err = planErr
+			break
+		}
+		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
+			dur = rampPlan.DurationSec
+		}
+		if rampPlan.StaggerSeconds > 0 {
+			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
+		}
+		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
+			DurationSec:       dur,
+			Loader:            t.params.Loader,
+			GPUIndices:        t.params.GPUIndices,
+			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+			StaggerSeconds:    rampPlan.StaggerSeconds,
+		}, j.append)
 	case "memory":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -491,6 +491,83 @@ func TestResolveBurnPreset(t *testing.T) {
 	}
 }

+func TestResolveNvidiaRampPlan(t *testing.T) {
+	tests := []struct {
+		name     string
+		profile  string
+		enabled  bool
+		selected []int
+		want     nvidiaRampSpec
+		wantErr  string
+	}{
+		{
+			name:     "disabled uses base preset",
+			profile:  "acceptance",
+			selected: []int{0, 1},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, TotalDurationSec: 60 * 60},
+		},
+		{
+			name:     "smoke ramp uses two minute steps",
+			profile:  "smoke",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 5 * 60, StaggerSeconds: 2 * 60, TotalDurationSec: 9 * 60},
+		},
+		{
+			name:     "acceptance ramp uses ten minute steps",
+			profile:  "acceptance",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, StaggerSeconds: 10 * 60, TotalDurationSec: 80 * 60},
+		},
+		{
+			name:     "overnight stays at eight hours when possible",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 6 * 60 * 60, StaggerSeconds: 60 * 60, TotalDurationSec: 8 * 60 * 60},
+		},
+		{
+			name:     "overnight extends to keep one hour after final gpu",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2, 3, 4, 5, 6, 7, 8},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, StaggerSeconds: 60 * 60, TotalDurationSec: 9 * 60 * 60},
+		},
+		{
+			name:     "overnight rejects impossible gpu count",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+			wantErr:  "at most 10 GPUs",
+		},
+		{
+			name:    "enabled requires explicit selection",
+			profile: "smoke",
+			enabled: true,
+			wantErr: "requires explicit GPU selection",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got, err := resolveNvidiaRampPlan(tc.profile, tc.enabled, tc.selected)
+			if tc.wantErr != "" {
+				if err == nil || !strings.Contains(err.Error(), tc.wantErr) {
+					t.Fatalf("err=%v want substring %q", err, tc.wantErr)
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("resolveNvidiaRampPlan error: %v", err)
+			}
+			if got != tc.want {
+				t.Fatalf("resolveNvidiaRampPlan(%q, %t, %v)=%+v want %+v", tc.profile, tc.enabled, tc.selected, got, tc.want)
+			}
+		})
+	}
+}
+
 func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
 	tests := []struct {
 		loader string
--- a/bible-local/docs/gpu-model-propagation.md
+++ b/bible-local/docs/gpu-model-propagation.md
@@ -0,0 +1,117 @@
+# GPU Model Name Propagation
+
+How GPU model names are detected, stored, and displayed throughout the project.
+
+---
+
+## Detection Sources
+
+There are **two separate pipelines** for GPU model names — they use different structs and don't share state.
+
+### Pipeline A — Live / SAT (nvidia-smi query at runtime)
+
+**File:** `audit/internal/platform/sat.go`
+
+- `ListNvidiaGPUs()` → `NvidiaGPU.Name` (field: `name`, from `nvidia-smi --query-gpu=index,name,...`)
+- `ListNvidiaGPUStatuses()` → `NvidiaGPUStatus.Name`
+- Used by: GPU selection UI, live metrics labels, burn/stress test logic
+
+### Pipeline B — Benchmark results
+
+**File:** `audit/internal/platform/benchmark.go`, line 124
+
+- `queryBenchmarkGPUInfo(selected)` → `benchmarkGPUInfo.Name`
+- Stored in `BenchmarkGPUResult.Name` (`json:"name,omitempty"`)
+- Used by: benchmark history table, benchmark report
+
+### Pipeline C — Hardware audit JSON (PCIe schema)
+
+**File:** `audit/internal/schema/hardware.go`
+
+- `HardwarePCIeDevice.Model *string` (field name is **Model**, not Name)
+- For AMD GPUs: populated by `audit/internal/collector/amdgpu.go` from `info.Product`
+- For NVIDIA GPUs: **NOT populated** by `audit/internal/collector/nvidia.go` — the NVIDIA enricher sets telemetry/status but skips the Model field
+- Used by: hardware summary page (`hwDescribeGPU` in `pages.go:487`)
+
+---
+
+## Key Inconsistency: NVIDIA PCIe Model is Never Set
+
+`audit/internal/collector/nvidia.go` — `enrichPCIeWithNVIDIAData()` enriches NVIDIA PCIe devices with telemetry and status but does **not** populate `HardwarePCIeDevice.Model`.
+
+This means:
+- Hardware summary page shows "Unknown GPU" for all NVIDIA devices (falls back at `pages.go:486`)
+- AMD GPUs do have their model populated
+
+The fix would be: copy `gpu.Name` from the SAT pipeline into `dev.Model` inside `enrichPCIeWithNVIDIAData`.
+
+---
+
+## Benchmark History "Unknown GPU" Issue
+
+**Symptom:** Benchmark history table shows "GPU #N — Unknown GPU" columns instead of real GPU model names.
+
+**Root cause:** `BenchmarkGPUResult.Name` has tag `json:"name,omitempty"`. If `queryBenchmarkGPUInfo()` fails (warns at `benchmark.go:126`) or returns empty names, the Name field is never set and is omitted from JSON. Loaded results have empty Name → falls back to "Unknown GPU" at `pages.go:2226, 2237`.
+
+This happens for:
+- Older result files saved before the `Name` field was added
+- Runs where nvidia-smi query failed before the benchmark started
+
+---
+
+## Fallback Strings — Current State
+
+| Location | File | Fallback string |
+|---|---|---|
+| Hardware summary (PCIe) | `pages.go:486` | `"Unknown GPU"` |
+| Benchmark report summary | `benchmark_report.go:43` | `"Unknown GPU"` |
+| Benchmark report scorecard | `benchmark_report.go:93` | `"Unknown"` ← inconsistent |
+| Benchmark report detail | `benchmark_report.go:122` | `"Unknown GPU"` |
+| Benchmark history per-GPU col | `pages.go:2226` | `"Unknown GPU"` |
+| Benchmark history parallel col | `pages.go:2237` | `"Unknown GPU"` |
+| SAT status file write | `sat.go:922` | `"unknown"` ← lowercase, inconsistent |
+| GPU selection API | `api.go:163` | `"GPU N"` (no "Unknown") |
+
+**Rule:** all UI fallbacks should use `"Unknown GPU"`. The two outliers are `benchmark_report.go:93` (`"Unknown"`) and `sat.go:922` (`"unknown"`).
+
+---
+
+## GPU Selection UI
+
+**File:** `audit/internal/webui/pages.go`
+
+- Source: `GET /api/gpus` → `api.go` → `ListNvidiaGPUs()` → live nvidia-smi
+- Render: `'GPU ' + gpu.index + ' — ' + gpu.name + ' · ' + mem`
+- Fallback: `gpu.name || 'GPU ' + idx` (JS, line ~1432)
+
+This always shows the correct model because it queries nvidia-smi live. It is **not** connected to benchmark result data.
+
+---
+
+## Data Flow Summary
+
+```
+nvidia-smi (live)
+  └─ ListNvidiaGPUs() → NvidiaGPU.Name
+       ├─ GPU selection UI (always correct)
+       ├─ Live metrics labels (charts_svg.go)
+       └─ SAT/burn status file (sat.go)
+
+nvidia-smi (at benchmark start)
+  └─ queryBenchmarkGPUInfo() → benchmarkGPUInfo.Name
+       └─ BenchmarkGPUResult.Name (json:"name,omitempty")
+            ├─ Benchmark report
+            └─ Benchmark history table columns
+
+nvidia-smi / lspci (audit collection)
+  └─ HardwarePCIeDevice.Model (NVIDIA: NOT populated; AMD: populated)
+       └─ Hardware summary page hwDescribeGPU()
+```
+
+---
+
+## What Needs Fixing
+
+1. **NVIDIA PCIe Model** — `enrichPCIeWithNVIDIAData()` should set `dev.Model = &gpu.Name`
+2. **Fallback consistency** — `benchmark_report.go:93` should say `"Unknown GPU"` not `"Unknown"`; `sat.go:922` should say `"Unknown GPU"` not `"unknown"`
+3. **Old benchmark JSONs** — no fix possible for already-saved results with missing names (display-only issue)
--- a/iso/builder/config/bootloaders/grub-pc/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/grub.cfg
@@ -11,18 +11,18 @@ echo "  Hardware Audit LiveCD"
 echo ""

 menuentry "EASY-BEE" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }

 submenu "EASY-BEE (advanced options) -->" {
    menuentry "EASY-BEE — GSP=off" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
        initrd  @INITRD_LIVE@
    }

    menuentry "EASY-BEE — KMS (no nomodeset)" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+        linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
        initrd  @INITRD_LIVE@
    }

--- a/iso/builder/config/bootloaders/isolinux/live.cfg.in
+++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in
@@ -3,31 +3,31 @@ label live-@FLAVOUR@-normal
    menu default
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.nvidia.mode=normal
+    append @APPEND_LIVE@ bee.nvidia.mode=normal pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-kms
    menu label EASY-BEE (^graphics/KMS)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal
+    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-toram
    menu label EASY-BEE (^load to RAM)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ toram bee.nvidia.mode=normal
+    append @APPEND_LIVE@ toram bee.nvidia.mode=normal pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-gsp-off
    menu label EASY-BEE (^NVIDIA GSP=off)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-kms-gsp-off
    menu label EASY-BEE (g^raphics/KMS, GSP=off)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=gsp-off pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-failsafe
    menu label EASY-BEE (^fail-safe)
--- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
+++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
@@ -25,6 +25,7 @@ ensure_bee_console_user() {
 ensure_bee_console_user

 # Enable common bee services
+systemctl enable bee-hpc-tuning.service
 systemctl enable bee-network.service
 systemctl enable bee-preflight.service
 systemctl enable bee-audit.service
@@ -55,6 +56,7 @@ fi
 # nogpu: no GPU services needed

 # Ensure scripts are executable
+chmod +x /usr/local/bin/bee-hpc-tuning  2>/dev/null || true
 chmod +x /usr/local/bin/bee-network.sh  2>/dev/null || true
 chmod +x /usr/local/bin/bee-sshsetup   2>/dev/null || true
 chmod +x /usr/local/bin/bee-smoketest  2>/dev/null || true
--- a/iso/overlay/etc/systemd/system/bee-hpc-tuning.service
+++ b/iso/overlay/etc/systemd/system/bee-hpc-tuning.service
@@ -0,0 +1,14 @@
+[Unit]
+Description=Bee: HPC tuning (CPU governor, C-states)
+After=local-fs.target
+Before=bee-nvidia.service bee-audit.service
+
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-hpc-tuning.log /usr/local/bin/bee-hpc-tuning
+StandardOutput=journal
+StandardError=journal
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
--- a/iso/overlay/usr/local/bin/bee-hpc-tuning
+++ b/iso/overlay/usr/local/bin/bee-hpc-tuning
@@ -0,0 +1,41 @@
+#!/bin/sh
+# bee-hpc-tuning — apply HPC tuning for deterministic benchmarking
+# Called by bee-hpc-tuning.service at boot.
+
+log() { echo "[bee-hpc-tuning] $*"; }
+
+# ── CPU governor ────────────────────────────────────────────────────────────
+# Set all CPU cores to performance governor via sysfs.
+# cpupower is not available; write directly to scaling_governor.
+governor_ok=0
+governor_fail=0
+for gov_path in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
+    [ -f "$gov_path" ] || continue
+    if echo performance > "$gov_path" 2>/dev/null; then
+        governor_ok=$((governor_ok + 1))
+    else
+        governor_fail=$((governor_fail + 1))
+    fi
+done
+
+if [ "$governor_ok" -gt 0 ] && [ "$governor_fail" -eq 0 ]; then
+    log "CPU governor set to performance on ${governor_ok} core(s)"
+elif [ "$governor_ok" -gt 0 ]; then
+    log "WARN: CPU governor: ${governor_ok} OK, ${governor_fail} failed"
+elif [ "$governor_fail" -gt 0 ]; then
+    log "WARN: failed to set CPU governor on ${governor_fail} core(s)"
+else
+    log "WARN: no cpufreq scaling_governor paths found (C-state governor or HW-controlled)"
+fi
+
+# ── Transparent Huge Pages ───────────────────────────────────────────────────
+# Kernel cmdline sets transparent_hugepage=always at boot, but confirm and log.
+thp_path=/sys/kernel/mm/transparent_hugepage/enabled
+if [ -f "$thp_path" ]; then
+    current=$(cat "$thp_path" 2>/dev/null)
+    log "transparent_hugepage: ${current}"
+else
+    log "WARN: transparent_hugepage sysfs path not found"
+fi
+
+log "done"
Author	SHA1	Message	Date
Michael Chus	02e44b1172	Fix USB/RAM status checks; add server model+S/N to dashboard; remove cycles USB Export Drive: lsblk reports TRAN only for whole disks, not partitions (/dev/sdc1). Strip trailing partition digits to get parent disk before transport check. LiveCD in RAM: When RunInstallToRAM copies squashfs to /dev/shm/bee-live/ but bind-mount of /run/live/medium fails (CD-ROM boots), /run/live/medium still shows the CD-ROM fstype. Add fallback: if /dev/shm/bee-live/*.squashfs exists, the data is in RAM — report status OK. Dashboard Hardware Summary: Show server Manufacturer + ProductName as heading and S/N as subline above the component table, sourced from hw.Board (dmidecode system-type data). Validate: Remove Cycles input — always run once. cycles=1 hardcoded in runAllSAT(). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:46:42 +03:00
Michael Chus	2ceaa0d0ca	Include profile and mode in benchmark task names for task list clarity Task names now follow the pattern: NVIDIA Benchmark · <profile> · <mode> [· GPU <indices>] Examples: NVIDIA Benchmark · standard · sequential (GPU 0, RTX 6000 Pro) NVIDIA Benchmark · stability · parallel NVIDIA Benchmark · standard · ramp 1/4 · GPU 0 NVIDIA Benchmark · standard · ramp 2/4 · GPU 0,1 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:36:51 +03:00
Michael Chus	9482ba20a2	Remove NCCL checkbox — auto-enable interconnect step when >1 GPU selected NCCL all_reduce is always attempted when 2+ GPUs are selected; a failure leaves InterconnectScore=0 (no bonus, no penalty) and OverallStatus unaffected. Exposing the checkbox implied NCCL is optional and made a failed run look like a deliberate skip. - Remove benchmark-run-nccl checkbox and its change listener from pages.go - Client sends run_nccl: selected.length > 1 (automatic) - api.go default runNCCL=true is unchanged - Selection note now mentions NCCL automatically for multi-GPU runs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:33:17 +03:00
Michael Chus	813e2f86a9	Add scalability/ramp-up labeling, ServerPower penalty in scoring, and report improvements - Add RampStep/RampTotal/RampRunID to NvidiaBenchmarkOptions, taskParams, and NvidiaBenchmarkResult so ramp-up steps can be correlated across result.json files - Add ScalabilityScore field to NvidiaBenchmarkResult (placeholder; computed externally by comparing ramp-up step results sharing the same ramp_run_id) - Propagate ramp fields through api.go (generates shared ramp_run_id at spawn time), tasks.go handler, and benchmark.go result population - Apply ServerPower penalty to CompositeScore when IPMI reporting_ratio < 0.75: factor = ratio/0.75, applied per-GPU with a note explaining the reduction - Add finding when server power delta exceeds GPU-reported sum by >25% (non-GPU draw) - Report header now shows ramp step N/M and run ID instead of "parallel" when in ramp mode; shows scalability_score when non-zero Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:30:47 +03:00
Michael Chus	58a6da9b44	Recover power limits and SM count from nvidia-smi -q in enrichGPUInfo When --query-gpu CSV fields fail (exit status 2 on some Blackwell + driver combos), enrichGPUInfoWithMaxClocks now also parses from the verbose nvidia-smi -q output already collected at benchmark start: - Default Power Limit → DefaultPowerLimitW - Current Power Limit → PowerLimitW (fallback) - Multiprocessor Count → MultiprocessorCount Fixes PowerSustainScore=0 on systems where all three CSV query variants fail but nvidia-smi -q succeeds (confirmed on RTX PRO 6000 Blackwell + driver 590.48.01). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:17:56 +03:00
Michael Chus	f4a19c0a00	Add power calibration step to benchmark; fix PowerSustainScore reference Before the per-GPU compute phases, run `dcgmi diag -r targeted_power` for 45 s while collecting nvidia-smi power metrics in parallel. The p95 power per GPU is stored as calibrated_peak_power_w and used as the denominator for PowerSustainScore instead of the hardware default limit, which bee-gpu-burn cannot reach because it is compute-only. Fallback chain: calibrated peak → default limit → enforced limit. If dcgmi is absent or the run fails, calibration is skipped silently. Adjust composite score weights to match the new honest power reference: base 0.35, thermal 0.25, stability 0.25, power 0.15, NCCL bonus 0.10. Power weight reduced (0.20→0.15) because even with a calibrated reference bee-gpu-burn reaches ~60-75% of TDP by design (no concurrent mem stress). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:06:46 +03:00
Michael Chus	9e3dcf9b4d	Record host CPU/RAM config in benchmark results; check CPU load - BenchmarkHostConfig captures CPU model, sockets, cores, threads, and total RAM from /proc/cpuinfo and /proc/meminfo at benchmark start. - BenchmarkCPULoad samples host CPU utilisation every 10 s throughout the GPU steady-state phase (sequential and parallel paths). - Summarises avg/max/p95 and classifies status as ok / high / unstable. - Adds a finding when CPU load is elevated (avg >20% or max >40%) or erratic (stddev >12%), with a plain-English description in the report. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 20:02:04 +03:00
Michael Chus	098e19f760	Add ramp-up mode to NVIDIA GPU benchmark Adds a new checkbox (enabled by default) in the benchmark section. In ramp-up mode N tasks are spawned simultaneously: 1 GPU, then 2, then 3, up to all selected GPUs — each step runs its GPUs in parallel. NCCL runs only on the final step. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 18:34:19 +03:00
Michael Chus	e16d0f34b5	Adjust burn GPU ramp timing by profile	2026-04-12 15:58:30 +03:00
Mikhail Chusavitin	525ed8b8fc	Fix GPU clock lock normalization for Blackwell (clocks.max.* unsupported) clocks.max.graphics / clocks.max.memory CSV fields return exit status 2 on RTX PRO 6000 Blackwell (driver 98.x), causing the entire gpu inventory query to fail and clock lock to be skipped → normalization: partial. Fix: - Add minimal fallback query (index,uuid,name,pci.bus_id,vbios_version, power.limit) that succeeds even without clock fields - Add enrichGPUInfoWithMaxClocks: parses "Max Clocks" section of nvidia-smi -q verbose output to fill MaxGraphicsClockMHz / MaxMemoryClockMHz when CSV fields fail - Move nvidia-smi -q execution before queryBenchmarkGPUInfo so its output is available for clock enrichment immediately after - Tests: cover enrichment and skip-if-populated cases Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 13:33:54 +03:00
Mikhail Chusavitin	4f94ebcb2c	Add HPC tuning: PCIe ASPM off, C-states, performance CPU governor - grub.cfg + isolinux/live.cfg.in: add pcie_aspm=off, intel_idle.max_cstate=1 and processor.max_cstate=1 to all non-failsafe boot entries - bee-hpc-tuning: new script that sets all CPU cores to performance governor via sysfs and logs THP state at boot - bee-hpc-tuning.service: runs before bee-nvidia and bee-audit - 9000-bee-setup.hook.chroot: enable service and mark script executable Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 13:07:32 +03:00
Mikhail Chusavitin	05c1fde233	Warn on PCIe link speed degradation and collect lspci -vvv in techdump - collector/pcie: add applyPCIeLinkSpeedWarning that sets status=Warning and ErrorDescription when current link speed is below maximum negotiated speed (e.g. Gen1 running on a Gen5 slot) - collector/pcie: add pcieLinkSpeedRank helper for Gen string comparison - collector/pcie_filter_test: cover degraded and healthy link speed cases - platform/techdump: collect lspci -vvv → lspci-vvv.txt for LnkCap/LnkSta Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 12:42:17 +03:00
Michael Chus	825ef6b98a	Add USB export drive and LiveCD-in-RAM checks to Runtime Health - schema: add ToRAMStatus and USBExportPath fields to RuntimeHealth - platform/runtime.go: collectToRAMHealth (ok/warning/failed based on IsLiveMediaInRAM + toramActive) and collectUSBExportHealth (scans /proc/mounts + lsblk for writable USB-backed filesystems) - pages.go: add USB Export Drive and LiveCD in RAM rows to the health table Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-11 10:05:27 +03:00
Michael Chus	ba16021cdb	Fix GPU model propagation, export filenames, PSU/service status, and chart perf - nvidia.go: add Name field to nvidiaGPUInfo, include model name in nvidia-smi query, set dev.Model in enrichPCIeWithNVIDIAData - pages.go: fix duplicate GPU count in validate card summary (4 GPU: 4 x … → 4 x … GPU); fix PSU UNKNOWN fallback from hw.PowerSupplies; treat activating/deactivating/reloading service states as OK in Runtime Health - support_bundle.go: use "150405" time format (no colons) for exFAT compat - sat.go / benchmark.go / platform_stress.go / sat_fan_stress.go: remove .tar.gz archive creation from export dirs — export packs everything itself - charts_svg.go: add min-max downsampling (1400 pt cap) for SVG chart perf - benchmark_report.go / sat.go: normalize GPU fallback to "Unknown GPU" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-11 10:05:27 +03:00