Unify benchmark exports and drop ASCII charts

Add per-precision benchmark phases, weighted TOPS scoring, and ECC tracking
- Split steady window into 6 equal slots: fp8/fp16/fp32/fp64/fp4 + combined - Each precision phase runs bee-gpu-burn with --precision filter so PowerCVPct reflects single-kernel stability (not round-robin artifact) - Add fp4 support in bee-gpu-stress.c for Blackwell (cc>=100) via existing CUDA_R_4F_E2M1 guard - Weighted TOPS: fp64×2.0, fp32×1.0, fp16×0.5, fp8×0.25, fp4×0.125 - SyntheticScore = sum of weighted TOPS from per-precision phases - MixedScore = sum from combined phase; MixedEfficiency = Mixed/Synthetic - ComputeScore = SyntheticScore × (1 + MixedEfficiency × 0.3) - ECC volatile counters sampled before/after each phase and overall - DegradationReasons: ecc_uncorrected_errors, ecc_corrected_errors - Report: per-precision stability table with ECC columns, methodology section - Ramp-up history table redesign: GPU indices as columns, runs as rows Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-13 21:38:28 +03:00 · 2026-04-13 10:49:49 +03:00 · 2026-04-12 22:46:42 +03:00 · 2026-04-12 22:36:51 +03:00 · 2026-04-12 22:33:17 +03:00 · 2026-04-12 22:30:47 +03:00
52 changed files with 3083 additions and 1746 deletions
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -117,7 +117,7 @@ type satRunner interface {
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
-	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
@@ -139,7 +139,6 @@ type satRunner interface {
 	RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
 	RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error)
 	RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
-	RunHPL(ctx context.Context, baseDir string, opts platform.HPLOptions, logFunc func(string)) (string, *platform.HPLResult, error)
 }

 type runtimeChecker interface {
@@ -191,6 +190,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro
 	}
 	result := collector.Run(runtimeMode)
 	applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir, a.StatusDB)
+	writePSUStatusesToDB(a.StatusDB, result.Hardware.PowerSupplies)
 	if health, err := ReadRuntimeHealth(DefaultRuntimeJSONPath); err == nil {
 		result.Runtime = &health
 	}
@@ -567,11 +567,11 @@ func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts pl
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }

-func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
+	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, staggerSec, logFunc)
 }

 func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
@@ -738,13 +738,6 @@ func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
 	return ActionResult{Title: "NCCL bandwidth test", Body: body}, err
 }

-func (a *App) RunHPL(ctx context.Context, baseDir string, opts platform.HPLOptions, logFunc func(string)) (string, *platform.HPLResult, error) {
-	if a == nil {
-		return "", nil, fmt.Errorf("app not configured")
-	}
-	return a.sat.RunHPL(ctx, baseDir, opts, logFunc)
-}
-
 func (a *App) RunFanStressTestResult(ctx context.Context, opts platform.FanStressOptions) (ActionResult, error) {
 	path, err := a.RunFanStressTest(ctx, "", opts)
 	body := formatFanStressResult(path)
@@ -934,6 +927,41 @@ func bodyOr(body, fallback string) string {
 	return body
 }

+// writePSUStatusesToDB records PSU statuses collected during audit into the
+// component-status DB so they are visible in the Hardware Summary card.
+// PSU status is sourced from IPMI (ipmitool fru + sdr) during audit.
+func writePSUStatusesToDB(db *ComponentStatusDB, psus []schema.HardwarePowerSupply) {
+	if db == nil || len(psus) == 0 {
+		return
+	}
+	const source = "audit:ipmi"
+	worstStatus := "OK"
+	for _, psu := range psus {
+		if psu.Status == nil {
+			continue
+		}
+		slot := "?"
+		if psu.Slot != nil {
+			slot = *psu.Slot
+		}
+		st := *psu.Status
+		detail := ""
+		if psu.ErrorDescription != nil {
+			detail = *psu.ErrorDescription
+		}
+		db.Record("psu:"+slot, source, st, detail)
+		switch st {
+		case "Critical":
+			worstStatus = "Critical"
+		case "Warning":
+			if worstStatus != "Critical" {
+				worstStatus = "Warning"
+			}
+		}
+	}
+	db.Record("psu:all", source, worstStatus, "")
+}
+
 func ReadRuntimeHealth(path string) (schema.RuntimeHealth, error) {
 	raw, err := os.ReadFile(path)
 	if err != nil {
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -161,7 +161,7 @@ func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir
 	return f.runNvidiaFn(baseDir)
 }

-func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ int, _ func(string)) (string, error) {
 	if f.runNvidiaComputeFn != nil {
 		return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
 	}
@@ -282,9 +282,6 @@ func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.Platf
 func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) {
 	return "", nil
 }
-func (f fakeSAT) RunHPL(_ context.Context, _ string, _ platform.HPLOptions, _ func(string)) (string, *platform.HPLResult, error) {
-	return "", nil, nil
-}

 func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
 	t.Parallel()
@@ -545,8 +542,6 @@ func TestActionResultsUseFallbackBody(t *testing.T) {
 }

 func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
-	t.Parallel()
-
 	tmp := t.TempDir()
 	oldExportDir := DefaultExportDir
 	DefaultExportDir = tmp
@@ -583,8 +578,6 @@ func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
 }

 func TestExportSupportBundleResultDoesNotPretendSuccessOnError(t *testing.T) {
-	t.Parallel()
-
 	tmp := t.TempDir()
 	oldExportDir := DefaultExportDir
 	DefaultExportDir = tmp
@@ -646,8 +639,6 @@ func TestRunNvidiaAcceptancePackResult(t *testing.T) {
 }

 func TestRunSATDefaultsToExportDir(t *testing.T) {
-	t.Parallel()
-
 	oldSATBaseDir := DefaultSATBaseDir
 	DefaultSATBaseDir = "/tmp/export/bee-sat"
 	t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -54,7 +54,7 @@ if ! command -v lspci >/dev/null 2>&1; then
  exit 0
 fi
 found=0
-for gpu in $(lspci -Dn | awk '$3 ~ /^10de:/ {print $1}'); do
+	for gpu in $(lspci -Dn | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ {print $1}'); do
  found=1
  echo "=== GPU $gpu ==="
  lspci -s "$gpu" -vv 2>&1 || true
@@ -73,8 +73,13 @@ fi
 	{name: "system/pcie-nvidia-link.txt", cmd: []string{"sh", "-c", `
 for d in /sys/bus/pci/devices/*/; do
  vendor=$(cat "$d/vendor" 2>/dev/null)
-  [ "$vendor" = "0x10de" ] || continue
-  dev=$(basename "$d")
+	  [ "$vendor" = "0x10de" ] || continue
+	  class=$(cat "$d/class" 2>/dev/null)
+	  case "$class" in
+	    0x030000|0x030200) ;;
+	    *) continue ;;
+	  esac
+	  dev=$(basename "$d")
  echo "=== $dev ==="
  for f in current_link_speed current_link_width max_link_speed max_link_width; do
    printf "  %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
@@ -192,7 +197,7 @@ var supportBundleOptionalFiles = []struct {
 	{name: "system/syslog.txt", src: "/var/log/syslog"},
 }

-const supportBundleGlob = "bee-support-*.tar.gz"
+const supportBundleGlob = "????-??-?? (BEE-SP*)*.tar.gz"

 func BuildSupportBundle(exportDir string) (string, error) {
 	exportDir = strings.TrimSpace(exportDir)
@@ -206,9 +211,14 @@ func BuildSupportBundle(exportDir string) (string, error) {
 		return "", err
 	}

-	host := sanitizeFilename(hostnameOr("unknown"))
-	ts := time.Now().UTC().Format("20060102-150405")
-	stageRoot := filepath.Join(os.TempDir(), fmt.Sprintf("bee-support-%s-%s", host, ts))
+	now := time.Now().UTC()
+	date := now.Format("2006-01-02")
+	tod := now.Format("150405")
+	ver := bundleVersion()
+	model := serverModelForBundle()
+	sn := serverSerialForBundle()
+
+	stageRoot := filepath.Join(os.TempDir(), fmt.Sprintf("bee-support-stage-%s-%s", sanitizeFilename(hostnameOr("unknown")), now.Format("20060102-150405")))
 	if err := os.MkdirAll(stageRoot, 0755); err != nil {
 		return "", err
 	}
@@ -240,7 +250,8 @@ func BuildSupportBundle(exportDir string) (string, error) {
 		return "", err
 	}

-	archivePath := filepath.Join(os.TempDir(), fmt.Sprintf("bee-support-%s-%s.tar.gz", host, ts))
+	archiveName := fmt.Sprintf("%s (BEE-SP v%s) %s %s %s.tar.gz", date, ver, model, sn, tod)
+	archivePath := filepath.Join(os.TempDir(), archiveName)
 	if err := createSupportTarGz(archivePath, stageRoot); err != nil {
 		return "", err
 	}
@@ -397,6 +408,60 @@ func writeManifest(dst, exportDir, stageRoot string) error {
 	return os.WriteFile(dst, []byte(body.String()), 0644)
 }

+func bundleVersion() string {
+	v := buildVersion()
+	v = strings.TrimPrefix(v, "v")
+	v = strings.TrimPrefix(v, "V")
+	if v == "" || v == "unknown" {
+		return "0.0"
+	}
+	return v
+}
+
+func serverModelForBundle() string {
+	raw, err := exec.Command("dmidecode", "-t", "1").Output()
+	if err != nil {
+		return "unknown"
+	}
+	for _, line := range strings.Split(string(raw), "\n") {
+		line = strings.TrimSpace(line)
+		key, val, ok := strings.Cut(line, ": ")
+		if !ok {
+			continue
+		}
+		if strings.TrimSpace(key) == "Product Name" {
+			val = strings.TrimSpace(val)
+			if val == "" {
+				return "unknown"
+			}
+			return strings.ReplaceAll(val, " ", "_")
+		}
+	}
+	return "unknown"
+}
+
+func serverSerialForBundle() string {
+	raw, err := exec.Command("dmidecode", "-t", "1").Output()
+	if err != nil {
+		return "unknown"
+	}
+	for _, line := range strings.Split(string(raw), "\n") {
+		line = strings.TrimSpace(line)
+		key, val, ok := strings.Cut(line, ": ")
+		if !ok {
+			continue
+		}
+		if strings.TrimSpace(key) == "Serial Number" {
+			val = strings.TrimSpace(val)
+			if val == "" {
+				return "unknown"
+			}
+			return val
+		}
+	}
+	return "unknown"
+}
+
 func buildVersion() string {
 	raw, err := exec.Command("bee", "version").CombinedOutput()
 	if err != nil {
--- a/audit/internal/collector/nic_mellanox.go
+++ b/audit/internal/collector/nic_mellanox.go
@@ -179,11 +179,3 @@ func commandOutputWithTimeout(timeout time.Duration, name string, args ...string
 	defer cancel()
 	return exec.CommandContext(ctx, name, args...).Output()
 }
-
-func interfaceHasCarrier(iface string) bool {
-	raw, err := readNetCarrierFile(iface)
-	if err != nil {
-		return false
-	}
-	return strings.TrimSpace(raw) == "1"
-}
--- a/audit/internal/collector/nic_telemetry.go
+++ b/audit/internal/collector/nic_telemetry.go
@@ -58,12 +58,10 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
 			}
 		}

-		if interfaceHasCarrier(iface) {
-			if out, err := ethtoolModuleQuery(iface); err == nil {
-				if injectSFPDOMTelemetry(&devs[i], out) {
-					enriched++
-					continue
-				}
+		if out, err := ethtoolModuleQuery(iface); err == nil {
+			if injectSFPDOMTelemetry(&devs[i], out) {
+				enriched++
+				continue
 			}
 		}
 		if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
@@ -115,8 +113,38 @@ func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool {
 		}
 		key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
 		val := strings.TrimSpace(trimmed[idx+1:])
+		if val == "" || strings.EqualFold(val, "not supported") || strings.EqualFold(val, "unknown") {
+			continue
+		}

 		switch {
+		case key == "identifier":
+			s := parseSFPIdentifier(val)
+			dev.SFPIdentifier = &s
+			t := true
+			dev.SFPPresent = &t
+			changed = true
+		case key == "connector":
+			s := parseSFPConnector(val)
+			dev.SFPConnector = &s
+			changed = true
+		case key == "vendor name":
+			s := strings.TrimSpace(val)
+			dev.SFPVendor = &s
+			changed = true
+		case key == "vendor pn":
+			s := strings.TrimSpace(val)
+			dev.SFPPartNumber = &s
+			changed = true
+		case key == "vendor sn":
+			s := strings.TrimSpace(val)
+			dev.SFPSerialNumber = &s
+			changed = true
+		case strings.Contains(key, "laser wavelength"):
+			if f, ok := firstFloat(val); ok {
+				dev.SFPWavelengthNM = &f
+				changed = true
+			}
 		case strings.Contains(key, "module temperature"):
 			if f, ok := firstFloat(val); ok {
 				dev.SFPTemperatureC = &f
@@ -147,12 +175,61 @@ func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool {
 	return changed
 }

+// parseSFPIdentifier extracts the human-readable transceiver type from the
+// raw ethtool identifier line, e.g. "0x03 (SFP)" → "SFP".
+func parseSFPIdentifier(val string) string {
+	if s := extractParens(val); s != "" {
+		return s
+	}
+	return val
+}
+
+// parseSFPConnector extracts the connector type from the raw ethtool line,
+// e.g. "0x07 (LC)" → "LC".
+func parseSFPConnector(val string) string {
+	if s := extractParens(val); s != "" {
+		return s
+	}
+	return val
+}
+
+var parenRe = regexp.MustCompile(`\(([^)]+)\)`)
+
+func extractParens(s string) string {
+	m := parenRe.FindStringSubmatch(s)
+	if len(m) < 2 {
+		return ""
+	}
+	return strings.TrimSpace(m[1])
+}
+
 func parseSFPDOM(raw string) map[string]any {
 	dev := schema.HardwarePCIeDevice{}
 	if !injectSFPDOMTelemetry(&dev, raw) {
 		return map[string]any{}
 	}
 	out := map[string]any{}
+	if dev.SFPPresent != nil {
+		out["sfp_present"] = *dev.SFPPresent
+	}
+	if dev.SFPIdentifier != nil {
+		out["sfp_identifier"] = *dev.SFPIdentifier
+	}
+	if dev.SFPConnector != nil {
+		out["sfp_connector"] = *dev.SFPConnector
+	}
+	if dev.SFPVendor != nil {
+		out["sfp_vendor"] = *dev.SFPVendor
+	}
+	if dev.SFPPartNumber != nil {
+		out["sfp_part_number"] = *dev.SFPPartNumber
+	}
+	if dev.SFPSerialNumber != nil {
+		out["sfp_serial_number"] = *dev.SFPSerialNumber
+	}
+	if dev.SFPWavelengthNM != nil {
+		out["sfp_wavelength_nm"] = *dev.SFPWavelengthNM
+	}
 	if dev.SFPTemperatureC != nil {
 		out["sfp_temperature_c"] = *dev.SFPTemperatureC
 	}
--- a/audit/internal/collector/nic_telemetry_test.go
+++ b/audit/internal/collector/nic_telemetry_test.go
@@ -122,10 +122,7 @@ func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T)
 	readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
 	readNetCarrierFile = func(string) (string, error) { return "0", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
-	ethtoolModuleQuery = func(string) (string, error) {
-		t.Fatal("ethtool -m should not be called without carrier")
-		return "", nil
-	}
+	ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("no module") }

 	class := "EthernetController"
 	bdf := "0000:18:00.0"
--- a/audit/internal/collector/nvidia.go
+++ b/audit/internal/collector/nvidia.go
@@ -15,6 +15,7 @@ const nvidiaVendorID = 0x10de
 type nvidiaGPUInfo struct {
 	Index              int
 	BDF                string
+	Name               string
 	Serial             string
 	VBIOS              string
 	TemperatureC       *float64
@@ -73,6 +74,9 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 			continue
 		}

+		if v := strings.TrimSpace(info.Name); v != "" {
+			devs[i].Model = &v
+		}
 		if v := strings.TrimSpace(info.Serial); v != "" {
 			devs[i].SerialNumber = &v
 		}
@@ -99,7 +103,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 func queryNVIDIAGPUs() (map[string]nvidiaGPUInfo, error) {
 	out, err := exec.Command(
 		"nvidia-smi",
-		"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
+		"--query-gpu=index,pci.bus_id,name,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
@@ -123,8 +127,8 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		if len(rec) == 0 {
 			continue
 		}
-		if len(rec) < 13 {
-			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 13", len(rec))
+		if len(rec) < 14 {
+			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 14", len(rec))
 		}

 		bdf := normalizePCIeBDF(rec[1])
@@ -135,17 +139,18 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		info := nvidiaGPUInfo{
 			Index:              parseRequiredInt(rec[0]),
 			BDF:                bdf,
-			Serial:             strings.TrimSpace(rec[2]),
-			VBIOS:              strings.TrimSpace(rec[3]),
-			TemperatureC:       parseMaybeFloat(rec[4]),
-			PowerW:             parseMaybeFloat(rec[5]),
-			ECCUncorrected:     parseMaybeInt64(rec[6]),
-			ECCCorrected:       parseMaybeInt64(rec[7]),
-			HWSlowdown:         parseMaybeBool(rec[8]),
-			PCIeLinkGenCurrent: parseMaybeInt(rec[9]),
-			PCIeLinkGenMax:     parseMaybeInt(rec[10]),
-			PCIeLinkWidthCur:   parseMaybeInt(rec[11]),
-			PCIeLinkWidthMax:   parseMaybeInt(rec[12]),
+			Name:               strings.TrimSpace(rec[2]),
+			Serial:             strings.TrimSpace(rec[3]),
+			VBIOS:              strings.TrimSpace(rec[4]),
+			TemperatureC:       parseMaybeFloat(rec[5]),
+			PowerW:             parseMaybeFloat(rec[6]),
+			ECCUncorrected:     parseMaybeInt64(rec[7]),
+			ECCCorrected:       parseMaybeInt64(rec[8]),
+			HWSlowdown:         parseMaybeBool(rec[9]),
+			PCIeLinkGenCurrent: parseMaybeInt(rec[10]),
+			PCIeLinkGenMax:     parseMaybeInt(rec[11]),
+			PCIeLinkWidthCur:   parseMaybeInt(rec[12]),
+			PCIeLinkWidthMax:   parseMaybeInt(rec[13]),
 		}
 		result[bdf] = info
 	}
--- a/audit/internal/collector/nvidia_test.go
+++ b/audit/internal/collector/nvidia_test.go
@@ -6,7 +6,7 @@ import (
 )

 func TestParseNVIDIASMIQuery(t *testing.T) {
-	raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
+	raw := "0, 00000000:65:00.0, NVIDIA H100 80GB HBM3, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
 	byBDF, err := parseNVIDIASMIQuery(raw)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
@@ -16,6 +16,9 @@ func TestParseNVIDIASMIQuery(t *testing.T) {
 	if !ok {
 		t.Fatalf("gpu by normalized bdf not found")
 	}
+	if gpu.Name != "NVIDIA H100 80GB HBM3" {
+		t.Fatalf("name: got %q", gpu.Name)
+	}
 	if gpu.Serial != "GPU-SERIAL-1" {
 		t.Fatalf("serial: got %q", gpu.Serial)
 	}
--- a/audit/internal/collector/pcie.go
+++ b/audit/internal/collector/pcie.go
@@ -2,6 +2,7 @@ package collector

 import (
 	"bee/audit/internal/schema"
+	"fmt"
 	"log/slog"
 	"os/exec"
 	"strconv"
@@ -79,6 +80,25 @@ func shouldIncludePCIeDevice(class, vendor, device string) bool {
 		}
 	}

+	// Exclude BMC/management virtual VGA adapters — these are firmware video chips,
+	// not real GPUs, and pollute the GPU inventory (e.g. iBMC, iDRAC, iLO VGA).
+	if strings.Contains(c, "vga") || strings.Contains(c, "display") || strings.Contains(c, "3d") {
+		bmcPatterns := []string{
+			"management system chip",
+			"management controller",
+			"ibmc",
+			"idrac",
+			"ilo vga",
+			"aspeed",
+			"matrox",
+		}
+		for _, bad := range bmcPatterns {
+			if strings.Contains(d, bad) {
+				return false
+			}
+		}
+	}
+
 	if strings.Contains(v, "advanced micro devices") || strings.Contains(v, "[amd]") {
 		internalAMDPatterns := []string{
 			"dummy function",
@@ -153,6 +173,9 @@ func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {

 	// SVendor/SDevice available but not in schema — skip

+	// Warn if PCIe link is running below its maximum negotiated speed.
+	applyPCIeLinkSpeedWarning(&dev)
+
 	return dev
 }

@@ -222,6 +245,41 @@ func readPCIStringAttribute(bdf, attribute string) (string, bool) {
 	return value, true
 }

+// applyPCIeLinkSpeedWarning sets the device status to Warning if the current PCIe link
+// speed is below the maximum negotiated speed supported by both ends.
+func applyPCIeLinkSpeedWarning(dev *schema.HardwarePCIeDevice) {
+	if dev.LinkSpeed == nil || dev.MaxLinkSpeed == nil {
+		return
+	}
+	if pcieLinkSpeedRank(*dev.LinkSpeed) < pcieLinkSpeedRank(*dev.MaxLinkSpeed) {
+		warn := statusWarning
+		dev.Status = &warn
+		desc := fmt.Sprintf("PCIe link speed degraded: running at %s, capable of %s", *dev.LinkSpeed, *dev.MaxLinkSpeed)
+		dev.ErrorDescription = &desc
+	}
+}
+
+// pcieLinkSpeedRank returns a numeric rank for a normalized Gen string (e.g. "Gen4" → 4).
+// Returns 0 for unrecognised values so comparisons fail safe.
+func pcieLinkSpeedRank(gen string) int {
+	switch gen {
+	case "Gen1":
+		return 1
+	case "Gen2":
+		return 2
+	case "Gen3":
+		return 3
+	case "Gen4":
+		return 4
+	case "Gen5":
+		return 5
+	case "Gen6":
+		return 6
+	default:
+		return 0
+	}
+}
+
 func normalizePCILinkSpeed(raw string) string {
 	raw = strings.TrimSpace(strings.ToLower(raw))
 	switch {
--- a/audit/internal/collector/pcie_filter_test.go
+++ b/audit/internal/collector/pcie_filter_test.go
@@ -1,6 +1,7 @@
 package collector

 import (
+	"bee/audit/internal/schema"
 	"encoding/json"
 	"strings"
 	"testing"
@@ -29,6 +30,8 @@ func TestShouldIncludePCIeDevice(t *testing.T) {
 		{name: "raid", class: "RAID bus controller", want: true},
 		{name: "nvme", class: "Non-Volatile memory controller", want: true},
 		{name: "vga", class: "VGA compatible controller", want: true},
+		{name: "ibmc vga", class: "VGA compatible controller", vendor: "Huawei Technologies Co., Ltd.", device: "Hi171x Series [iBMC Intelligent Management system chip w/VGA support]", want: false},
+		{name: "aspeed vga", class: "VGA compatible controller", vendor: "ASPEED Technology, Inc.", device: "ASPEED Graphics Family", want: false},
 		{name: "other encryption controller", class: "Encryption controller", vendor: "Intel Corporation", device: "QuickAssist", want: true},
 	}

@@ -139,3 +142,77 @@ func TestNormalizePCILinkSpeed(t *testing.T) {
 		}
 	}
 }
+
+func TestApplyPCIeLinkSpeedWarning(t *testing.T) {
+	ptr := func(s string) *string { return &s }
+
+	tests := []struct {
+		name        string
+		linkSpeed   *string
+		maxSpeed    *string
+		wantWarning bool
+		wantGenIn   string // substring expected in ErrorDescription when warning
+	}{
+		{
+			name:        "degraded Gen1 vs Gen5",
+			linkSpeed:   ptr("Gen1"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: true,
+			wantGenIn:   "Gen1",
+		},
+		{
+			name:        "at max Gen5",
+			linkSpeed:   ptr("Gen5"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: false,
+		},
+		{
+			name:        "degraded Gen4 vs Gen5",
+			linkSpeed:   ptr("Gen4"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: true,
+			wantGenIn:   "Gen4",
+		},
+		{
+			name:        "missing current speed — no warning",
+			linkSpeed:   nil,
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: false,
+		},
+		{
+			name:        "missing max speed — no warning",
+			linkSpeed:   ptr("Gen1"),
+			maxSpeed:    nil,
+			wantWarning: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dev := schema.HardwarePCIeDevice{}
+			ok := statusOK
+			dev.Status = &ok
+			dev.LinkSpeed = tt.linkSpeed
+			dev.MaxLinkSpeed = tt.maxSpeed
+
+			applyPCIeLinkSpeedWarning(&dev)
+
+			gotWarn := dev.Status != nil && *dev.Status == statusWarning
+			if gotWarn != tt.wantWarning {
+				t.Fatalf("wantWarning=%v gotWarning=%v (status=%v)", tt.wantWarning, gotWarn, dev.Status)
+			}
+			if tt.wantWarning {
+				if dev.ErrorDescription == nil {
+					t.Fatal("expected ErrorDescription to be set")
+				}
+				if !strings.Contains(*dev.ErrorDescription, tt.wantGenIn) {
+					t.Fatalf("ErrorDescription %q does not contain %q", *dev.ErrorDescription, tt.wantGenIn)
+				}
+			} else {
+				if dev.ErrorDescription != nil {
+					t.Fatalf("unexpected ErrorDescription: %s", *dev.ErrorDescription)
+				}
+			}
+		})
+	}
+}
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -2,25 +2,15 @@ package platform

 import (
 	"fmt"
-	"os"
-	"path/filepath"
-	"regexp"
 	"strings"
 	"time"
 )

 func renderBenchmarkReport(result NvidiaBenchmarkResult) string {
-	return renderBenchmarkReportWithCharts(result, nil)
+	return renderBenchmarkReportWithCharts(result)
 }

-type benchmarkReportChart struct {
-	Title   string
-	Content string
-}
-
-var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*m`)
-
-func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benchmarkReportChart) string {
+func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 	var b strings.Builder

 	// ── Header ────────────────────────────────────────────────────────────────
@@ -60,9 +50,17 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "**App version:** %s  \n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "**Generated:** %s  \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
-	if result.ParallelGPUs {
+	if result.RampStep > 0 && result.RampTotal > 0 {
+		fmt.Fprintf(&b, "**Ramp-up step:** %d of %d  \n", result.RampStep, result.RampTotal)
+		if result.RampRunID != "" {
+			fmt.Fprintf(&b, "**Ramp-up run ID:** %s  \n", result.RampRunID)
+		}
+	} else if result.ParallelGPUs {
 		fmt.Fprintf(&b, "**Mode:** parallel (all GPUs simultaneously)  \n")
 	}
+	if result.ScalabilityScore > 0 {
+		fmt.Fprintf(&b, "**Scalability score:** %.1f%%  \n", result.ScalabilityScore)
+	}
 	fmt.Fprintf(&b, "**Overall status:** %s  \n", result.OverallStatus)
 	b.WriteString("\n")

@@ -83,14 +81,28 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		b.WriteString("\n")
 	}

+	// ── Scoring methodology ───────────────────────────────────────────────────
+	b.WriteString("## Scoring Methodology\n\n")
+	b.WriteString("**Compute score** is derived from two phases:\n\n")
+	b.WriteString("- **Synthetic** — each precision type (fp8, fp16, fp32, fp64, fp4) runs alone for a dedicated window. ")
+	b.WriteString("Measures peak throughput with the full GPU dedicated to one kernel type. ")
+	b.WriteString("Each result is normalised to fp32-equivalent TOPS using precision weights: ")
+	b.WriteString("fp64 ×2.0 · fp32 ×1.0 · fp16 ×0.5 · fp8 ×0.25 · fp4 ×0.125.\n")
+	b.WriteString("- **Mixed** — all precision types run simultaneously (combined phase). ")
+	b.WriteString("Reflects real inference workloads where fp8 matrix ops, fp16 attention and fp32 accumulation compete for bandwidth and SM scheduler slots.\n\n")
+	b.WriteString("**Formula:** `Compute = Synthetic × (1 + MixedEfficiency × 0.3)`\n\n")
+	b.WriteString("where `MixedEfficiency = Mixed / Synthetic`. A GPU that sustains 90 % throughput under mixed load ")
+	b.WriteString("receives a +27 % bonus over its synthetic score; one that drops to 60 % receives +18 %.\n\n")
+	b.WriteString("**Composite score** = `Compute × quality_factor` where quality factors in power sustain, thermal sustain, stability, and interconnect.\n\n")
+
 	// ── Scorecard table ───────────────────────────────────────────────────────
 	b.WriteString("## Scorecard\n\n")
-	b.WriteString("| GPU | Status | Composite | Compute | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
-	b.WriteString("|-----|--------|-----------|---------|-------------|---------------|-----------------|-----------|-------------|\n")
+	b.WriteString("| GPU | Status | Composite | Compute | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
+	b.WriteString("|-----|--------|-----------|---------|-----------|-------|------------|-------------|---------------|-----------------|-----------|-------------|\n")
 	for _, gpu := range result.GPUs {
 		name := strings.TrimSpace(gpu.Name)
 		if name == "" {
-			name = "Unknown"
+			name = "Unknown GPU"
 		}
 		interconnect := "-"
 		if gpu.Scores.InterconnectScore > 0 {
@@ -100,11 +112,26 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		if gpu.Scores.TOPSPerSMPerGHz > 0 {
 			topsPerSM = fmt.Sprintf("%.3f", gpu.Scores.TOPSPerSMPerGHz)
 		}
-		fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %.1f | %.1f | %.1f | %s |\n",
+		synthetic := "-"
+		if gpu.Scores.SyntheticScore > 0 {
+			synthetic = fmt.Sprintf("%.2f", gpu.Scores.SyntheticScore)
+		}
+		mixed := "-"
+		if gpu.Scores.MixedScore > 0 {
+			mixed = fmt.Sprintf("%.2f", gpu.Scores.MixedScore)
+		}
+		mixedEff := "-"
+		if gpu.Scores.MixedEfficiency > 0 {
+			mixedEff = fmt.Sprintf("%.1f%%", gpu.Scores.MixedEfficiency*100)
+		}
+		fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %s | %s | %s | %.1f | %.1f | %.1f | %s |\n",
 			gpu.Index, name,
 			gpu.Status,
 			gpu.Scores.CompositeScore,
 			gpu.Scores.ComputeScore,
+			synthetic,
+			mixed,
+			mixedEff,
 			topsPerSM,
 			gpu.Scores.PowerSustainScore,
 			gpu.Scores.ThermalSustainScore,
@@ -154,6 +181,34 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		fmt.Fprintf(&b, "| GPU utilisation | %.1f %% | — |\n", gpu.Steady.AvgUsagePct)
 		b.WriteString("\n")

+		// Per-precision stability phases.
+		if len(gpu.PrecisionSteady) > 0 {
+			b.WriteString("**Per-precision stability:**\n\n")
+			b.WriteString("| Precision | Clock CV | Power CV | Clock Drift | ECC corr | ECC uncorr |\n|-----------|----------|----------|-------------|----------|------------|\n")
+			for _, p := range gpu.PrecisionSteady {
+				eccCorr := "—"
+				eccUncorr := "—"
+				if !p.ECC.IsZero() {
+					eccCorr = fmt.Sprintf("%d", p.ECC.Corrected)
+					eccUncorr = fmt.Sprintf("%d", p.ECC.Uncorrected)
+				}
+				fmt.Fprintf(&b, "| %s | %.1f%% | %.1f%% | %.1f%% | %s | %s |\n",
+					p.Precision, p.Steady.ClockCVPct, p.Steady.PowerCVPct, p.Steady.ClockDriftPct,
+					eccCorr, eccUncorr)
+			}
+			b.WriteString("\n")
+		} else {
+			// Legacy: show combined-window variance.
+			fmt.Fprintf(&b, "**Clock/power variance (combined window):** clock CV %.1f%% · power CV %.1f%% · clock drift %.1f%%\n\n",
+				gpu.Steady.ClockCVPct, gpu.Steady.PowerCVPct, gpu.Steady.ClockDriftPct)
+		}
+
+		// ECC summary
+		if !gpu.ECC.IsZero() {
+			fmt.Fprintf(&b, "**ECC errors (total):** corrected=%d uncorrected=%d\n\n",
+				gpu.ECC.Corrected, gpu.ECC.Uncorrected)
+		}
+
 		// Throttle
 		throttle := formatThrottleLine(gpu.Throttle, gpu.Steady.DurationSec)
 		if throttle != "none" {
@@ -163,12 +218,14 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		// Precision results
 		if len(gpu.PrecisionResults) > 0 {
 			b.WriteString("**Precision results:**\n\n")
-			b.WriteString("| Precision | TOPS | Lanes | Iterations |\n|-----------|------|-------|------------|\n")
+			b.WriteString("| Precision | TOPS (raw) | Weight | TOPS (fp32-eq) | Lanes | Iterations |\n|-----------|------------|--------|----------------|-------|------------|\n")
 			for _, p := range gpu.PrecisionResults {
 				if p.Supported {
-					fmt.Fprintf(&b, "| %s | %.2f | %d | %d |\n", p.Name, p.TeraOpsPerSec, p.Lanes, p.Iterations)
+					weightStr := fmt.Sprintf("×%.3g", p.Weight)
+					fmt.Fprintf(&b, "| %s | %.2f | %s | %.2f | %d | %d |\n",
+						p.Name, p.TeraOpsPerSec, weightStr, p.WeightedTeraOpsPerSec, p.Lanes, p.Iterations)
 				} else {
-					fmt.Fprintf(&b, "| %s | — (unsupported) | — | — |\n", p.Name)
+					fmt.Fprintf(&b, "| %s | — (unsupported) | — | — | — | — |\n", p.Name)
 				}
 			}
 			b.WriteString("\n")
@@ -229,18 +286,6 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		}
 	}

-	// ── Terminal charts (steady-state only) ───────────────────────────────────
-	if len(charts) > 0 {
-		b.WriteString("## Steady-State Charts\n\n")
-		for _, chart := range charts {
-			content := strings.TrimSpace(stripANSIEscapeSequences(chart.Content))
-			if content == "" {
-				continue
-			}
-			fmt.Fprintf(&b, "### %s\n\n```\n%s\n```\n\n", chart.Title, content)
-		}
-	}
-
 	// ── Methodology ───────────────────────────────────────────────────────────
 	b.WriteString("## Methodology\n\n")
 	fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline → warmup → steady-state → interconnect → cooldown phases.\n", result.BenchmarkProfile)
@@ -251,39 +296,13 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 	// ── Raw files ─────────────────────────────────────────────────────────────
 	b.WriteString("## Raw Files\n\n")
 	b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n")
-	b.WriteString("- `gpu-*-baseline-metrics.csv/html/term.txt`\n")
-	b.WriteString("- `gpu-*-warmup.log`\n")
-	b.WriteString("- `gpu-*-steady.log`\n")
-	b.WriteString("- `gpu-*-steady-metrics.csv/html/term.txt`\n")
-	b.WriteString("- `gpu-*-cooldown-metrics.csv/html/term.txt`\n")
+	b.WriteString("- `gpu-metrics.csv`\n- `gpu-metrics.html`\n- `gpu-burn.log`\n")
 	if result.Interconnect != nil {
 		b.WriteString("- `nccl-all-reduce.log`\n")
 	}
 	return b.String()
 }

-// loadBenchmarkReportCharts loads only steady-state terminal charts (baseline and
-// cooldown charts are not useful for human review).
-func loadBenchmarkReportCharts(runDir string, gpuIndices []int) []benchmarkReportChart {
-	var charts []benchmarkReportChart
-	for _, idx := range gpuIndices {
-		path := filepath.Join(runDir, fmt.Sprintf("gpu-%d-steady-metrics-term.txt", idx))
-		raw, err := os.ReadFile(path)
-		if err != nil || len(raw) == 0 {
-			continue
-		}
-		charts = append(charts, benchmarkReportChart{
-			Title:   fmt.Sprintf("GPU %d — Steady State", idx),
-			Content: string(raw),
-		})
-	}
-	return charts
-}
-
-func stripANSIEscapeSequences(raw string) string {
-	return ansiEscapePattern.ReplaceAllString(raw, "")
-}
-
 // formatThrottleLine renders throttle counters as human-readable percentages of
 // the steady-state window.  Only non-zero counters are shown.  When the steady
 // duration is unknown (0), raw seconds are shown instead.
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -147,34 +147,89 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 	}
 }

-func TestRenderBenchmarkReportIncludesTerminalChartsWithoutANSI(t *testing.T) {
+func TestRenderBenchmarkReportListsUnifiedArtifacts(t *testing.T) {
 	t.Parallel()

-	report := renderBenchmarkReportWithCharts(NvidiaBenchmarkResult{
+	report := renderBenchmarkReport(NvidiaBenchmarkResult{
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "OK",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "full",
 		},
-	}, []benchmarkReportChart{
-		{
-			Title:   "GPU 0 Steady State",
-			Content: "\x1b[31mGPU 0 chart\x1b[0m\n 42┤───",
-		},
 	})

 	for _, needle := range []string{
-		"Steady-State Charts",
-		"GPU 0 Steady State",
-		"GPU 0 chart",
-		"42┤───",
+		"gpu-metrics.csv",
+		"gpu-metrics.html",
+		"gpu-burn.log",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
-	if strings.Contains(report, "\x1b[31m") {
-		t.Fatalf("report should not contain ANSI escapes\n%s", report)
+}
+
+func TestEnrichGPUInfoWithMaxClocks(t *testing.T) {
+	t.Parallel()
+
+	nvsmiQ := []byte(`
+GPU 00000000:4E:00.0
+    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
+    Clocks
+        Graphics                          : 2422 MHz
+        Memory                            : 12481 MHz
+    Max Clocks
+        Graphics                          : 2430 MHz
+        SM                                : 2430 MHz
+        Memory                            : 12481 MHz
+        Video                             : 2107 MHz
+
+GPU 00000000:4F:00.0
+    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
+    Max Clocks
+        Graphics                          : 2430 MHz
+        Memory                            : 12481 MHz
+`)
+
+	infoByIndex := map[int]benchmarkGPUInfo{
+		0: {Index: 0, BusID: "00000000:4E:00.0"},
+		1: {Index: 1, BusID: "00000000:4F:00.0"},
+	}
+
+	enrichGPUInfoWithMaxClocks(infoByIndex, nvsmiQ)
+
+	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("GPU 0 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[0].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[0].MaxMemoryClockMHz != 12481 {
+		t.Errorf("GPU 0 MaxMemoryClockMHz = %v, want 12481", infoByIndex[0].MaxMemoryClockMHz)
+	}
+	if infoByIndex[1].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("GPU 1 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[1].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[1].MaxMemoryClockMHz != 12481 {
+		t.Errorf("GPU 1 MaxMemoryClockMHz = %v, want 12481", infoByIndex[1].MaxMemoryClockMHz)
+	}
+}
+
+func TestEnrichGPUInfoWithMaxClocksSkipsPopulated(t *testing.T) {
+	t.Parallel()
+
+	nvsmiQ := []byte(`
+GPU 00000000:4E:00.0
+    Max Clocks
+        Graphics                          : 9999 MHz
+        Memory                            : 9999 MHz
+`)
+	// Already populated — must not be overwritten.
+	infoByIndex := map[int]benchmarkGPUInfo{
+		0: {Index: 0, BusID: "00000000:4E:00.0", MaxGraphicsClockMHz: 2430, MaxMemoryClockMHz: 12481},
+	}
+
+	enrichGPUInfoWithMaxClocks(infoByIndex, nvsmiQ)
+
+	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("expected existing value to be preserved, got %v", infoByIndex[0].MaxGraphicsClockMHz)
 	}
 }
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -2,6 +2,29 @@ package platform

 import "time"

+// BenchmarkHostConfig holds static CPU and memory configuration captured at
+// benchmark start. Useful for correlating results across runs on different hardware.
+type BenchmarkHostConfig struct {
+	CPUModel    string  `json:"cpu_model,omitempty"`
+	CPUSockets  int     `json:"cpu_sockets,omitempty"`
+	CPUCores    int     `json:"cpu_cores,omitempty"`
+	CPUThreads  int     `json:"cpu_threads,omitempty"`
+	MemTotalGiB float64 `json:"mem_total_gib,omitempty"`
+}
+
+// BenchmarkCPULoad summarises host CPU utilisation sampled during the GPU
+// steady-state phase. High or unstable CPU load during a GPU benchmark may
+// indicate a competing workload or a CPU-bound driver bottleneck.
+type BenchmarkCPULoad struct {
+	AvgPct  float64 `json:"avg_pct"`
+	MaxPct  float64 `json:"max_pct"`
+	P95Pct  float64 `json:"p95_pct"`
+	Samples int     `json:"samples"`
+	// Status is "ok", "high", or "unstable".
+	Status string `json:"status"`
+	Note   string `json:"note,omitempty"`
+}
+
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
@@ -14,10 +37,12 @@ type NvidiaBenchmarkOptions struct {
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	RunNCCL           bool
-	ParallelGPUs      bool // run all selected GPUs simultaneously instead of sequentially
+	ParallelGPUs      bool   // run all selected GPUs simultaneously instead of sequentially
+	RampStep          int    // 1-based step index within a ramp-up run (0 = not a ramp-up)
+	RampTotal         int    // total number of ramp-up steps in this run
+	RampRunID         string // shared identifier across all steps of the same ramp-up run
 }

-
 type NvidiaBenchmarkResult struct {
 	BenchmarkVersion   string                       `json:"benchmark_version"`
 	GeneratedAt        time.Time                    `json:"generated_at"`
@@ -25,11 +50,17 @@ type NvidiaBenchmarkResult struct {
 	ServerModel        string                       `json:"server_model,omitempty"`
 	BenchmarkProfile   string                       `json:"benchmark_profile"`
 	ParallelGPUs       bool                         `json:"parallel_gpus,omitempty"`
+	RampStep           int                          `json:"ramp_step,omitempty"`
+	RampTotal          int                          `json:"ramp_total,omitempty"`
+	RampRunID          string                       `json:"ramp_run_id,omitempty"`
+	ScalabilityScore   float64                      `json:"scalability_score,omitempty"`
 	OverallStatus      string                       `json:"overall_status"`
 	SelectedGPUIndices []int                        `json:"selected_gpu_indices"`
 	Findings           []string                     `json:"findings,omitempty"`
 	Warnings           []string                     `json:"warnings,omitempty"`
 	Normalization      BenchmarkNormalization       `json:"normalization"`
+	HostConfig         *BenchmarkHostConfig         `json:"host_config,omitempty"`
+	CPULoad            *BenchmarkCPULoad            `json:"cpu_load,omitempty"`
 	GPUs               []BenchmarkGPUResult         `json:"gpus"`
 	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
 	ServerPower        *BenchmarkServerPower        `json:"server_power,omitempty"`
@@ -52,30 +83,38 @@ type BenchmarkNormalizationGPU struct {
 }

 type BenchmarkGPUResult struct {
-	Index                  int                        `json:"index"`
-	UUID                   string                     `json:"uuid,omitempty"`
-	Name                   string                     `json:"name,omitempty"`
-	BusID                  string                     `json:"bus_id,omitempty"`
-	VBIOS                  string                     `json:"vbios,omitempty"`
-	ComputeCapability      string                     `json:"compute_capability,omitempty"`
-	Backend                string                     `json:"backend,omitempty"`
-	Status                 string                     `json:"status"`
-	PowerLimitW            float64                    `json:"power_limit_w,omitempty"`
-	MultiprocessorCount    int                        `json:"multiprocessor_count,omitempty"`
-	DefaultPowerLimitW     float64                    `json:"default_power_limit_w,omitempty"`
-	MaxGraphicsClockMHz    float64                    `json:"max_graphics_clock_mhz,omitempty"`
-	BaseGraphicsClockMHz   float64                    `json:"base_graphics_clock_mhz,omitempty"`
-	MaxMemoryClockMHz      float64                    `json:"max_memory_clock_mhz,omitempty"`
-	LockedGraphicsClockMHz float64                    `json:"locked_graphics_clock_mhz,omitempty"`
-	LockedMemoryClockMHz   float64                    `json:"locked_memory_clock_mhz,omitempty"`
-	Baseline               BenchmarkTelemetrySummary  `json:"baseline"`
-	Steady                 BenchmarkTelemetrySummary  `json:"steady"`
-	Cooldown               BenchmarkTelemetrySummary  `json:"cooldown"`
-	Throttle               BenchmarkThrottleCounters  `json:"throttle_counters"`
-	PrecisionResults       []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
-	Scores                 BenchmarkScorecard         `json:"scores"`
-	DegradationReasons     []string                   `json:"degradation_reasons,omitempty"`
-	Notes                  []string                   `json:"notes,omitempty"`
+	Index               int     `json:"index"`
+	UUID                string  `json:"uuid,omitempty"`
+	Name                string  `json:"name,omitempty"`
+	BusID               string  `json:"bus_id,omitempty"`
+	VBIOS               string  `json:"vbios,omitempty"`
+	ComputeCapability   string  `json:"compute_capability,omitempty"`
+	Backend             string  `json:"backend,omitempty"`
+	Status              string  `json:"status"`
+	PowerLimitW         float64 `json:"power_limit_w,omitempty"`
+	MultiprocessorCount int     `json:"multiprocessor_count,omitempty"`
+	DefaultPowerLimitW  float64 `json:"default_power_limit_w,omitempty"`
+	// CalibratedPeakPowerW is the p95 power measured during a short
+	// dcgmi targeted_power calibration run before the main benchmark.
+	// Used as the reference denominator for PowerSustainScore instead of
+	// the hardware default limit, which bee-gpu-burn cannot reach.
+	CalibratedPeakPowerW   float64                         `json:"calibrated_peak_power_w,omitempty"`
+	MaxGraphicsClockMHz    float64                         `json:"max_graphics_clock_mhz,omitempty"`
+	BaseGraphicsClockMHz   float64                         `json:"base_graphics_clock_mhz,omitempty"`
+	MaxMemoryClockMHz      float64                         `json:"max_memory_clock_mhz,omitempty"`
+	LockedGraphicsClockMHz float64                         `json:"locked_graphics_clock_mhz,omitempty"`
+	LockedMemoryClockMHz   float64                         `json:"locked_memory_clock_mhz,omitempty"`
+	Baseline               BenchmarkTelemetrySummary       `json:"baseline"`
+	Steady                 BenchmarkTelemetrySummary       `json:"steady"`
+	PrecisionSteady        []BenchmarkPrecisionSteadyPhase `json:"precision_steady,omitempty"`
+	Cooldown               BenchmarkTelemetrySummary       `json:"cooldown"`
+	Throttle               BenchmarkThrottleCounters       `json:"throttle_counters"`
+	// ECC error delta accumulated over the full benchmark (all phases combined).
+	ECC                BenchmarkECCCounters       `json:"ecc,omitempty"`
+	PrecisionResults   []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
+	Scores             BenchmarkScorecard         `json:"scores"`
+	DegradationReasons []string                   `json:"degradation_reasons,omitempty"`
+	Notes              []string                   `json:"notes,omitempty"`
 }

 type BenchmarkTelemetrySummary struct {
@@ -105,6 +144,18 @@ type BenchmarkThrottleCounters struct {
 	HWPowerBrakeSlowdownUS uint64 `json:"hw_power_brake_slowdown_us"`
 }

+// BenchmarkECCCounters holds ECC error counts sampled at a point in time.
+// Corrected = single-bit errors fixed by ECC (DRAM degradation).
+// Uncorrected = double-bit errors that could not be corrected (serious fault).
+// Both are volatile (since last driver reset), not persistent.
+type BenchmarkECCCounters struct {
+	Corrected   uint64 `json:"corrected"`
+	Uncorrected uint64 `json:"uncorrected"`
+}
+
+func (e BenchmarkECCCounters) Total() uint64 { return e.Corrected + e.Uncorrected }
+func (e BenchmarkECCCounters) IsZero() bool  { return e.Corrected == 0 && e.Uncorrected == 0 }
+
 type BenchmarkPrecisionResult struct {
 	Name          string  `json:"name"`
 	Category      string  `json:"category"`
@@ -115,19 +166,31 @@ type BenchmarkPrecisionResult struct {
 	K             uint64  `json:"k,omitempty"`
 	Iterations    uint64  `json:"iterations,omitempty"`
 	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
-	Notes         string  `json:"notes,omitempty"`
+	// Weight is the fp32-equivalence factor for this precision category.
+	// fp32 = 1.0 (baseline), fp64 = 2.0, fp16 = 0.5, fp8 = 0.25, fp4 = 0.125.
+	// WeightedTOPS = TeraOpsPerSec * Weight gives fp32-equivalent throughput.
+	Weight                float64 `json:"weight,omitempty"`
+	WeightedTeraOpsPerSec float64 `json:"weighted_teraops_per_sec,omitempty"`
+	Notes                 string  `json:"notes,omitempty"`
 }

 type BenchmarkScorecard struct {
-	ComputeScore        float64 `json:"compute_score"`
+	ComputeScore float64 `json:"compute_score"`
+	// SyntheticScore is the sum of fp32-equivalent TOPS from per-precision
+	// steady phases (each precision ran alone, full GPU dedicated).
+	SyntheticScore float64 `json:"synthetic_score,omitempty"`
+	// MixedScore is the sum of fp32-equivalent TOPS from the combined phase
+	// (all precisions competing simultaneously — closer to real workloads).
+	MixedScore float64 `json:"mixed_score,omitempty"`
+	// MixedEfficiency = MixedScore / SyntheticScore. Measures how well the GPU
+	// sustains throughput under concurrent mixed-precision load.
+	MixedEfficiency     float64 `json:"mixed_efficiency,omitempty"`
 	PowerSustainScore   float64 `json:"power_sustain_score"`
 	ThermalSustainScore float64 `json:"thermal_sustain_score"`
 	StabilityScore      float64 `json:"stability_score"`
 	InterconnectScore   float64 `json:"interconnect_score"`
 	CompositeScore      float64 `json:"composite_score"`
 	// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
-	// Comparable across throttle levels and GPU generations. Low value at normal
-	// clocks indicates silicon degradation.
 	TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
 }

@@ -145,6 +208,20 @@ type BenchmarkServerPower struct {
 	Notes           []string `json:"notes,omitempty"`
 }

+// BenchmarkPrecisionSteadyPhase holds per-precision-category telemetry collected
+// during a dedicated single-precision steady window.  Because only one kernel
+// type runs at a time the PowerCVPct here is a genuine stability signal.
+type BenchmarkPrecisionSteadyPhase struct {
+	Precision             string                    `json:"precision"` // e.g. "fp8", "fp16", "fp32"
+	Steady                BenchmarkTelemetrySummary `json:"steady"`
+	TeraOpsPerSec         float64                   `json:"teraops_per_sec,omitempty"`
+	WeightedTeraOpsPerSec float64                   `json:"weighted_teraops_per_sec,omitempty"`
+	// ECC errors accumulated during this precision phase only.
+	// Non-zero corrected = stress-induced DRAM errors for this kernel type.
+	// Any uncorrected = serious fault triggered by this precision workload.
+	ECC BenchmarkECCCounters `json:"ecc,omitempty"`
+}
+
 type BenchmarkInterconnectResult struct {
 	Status             string   `json:"status"`
 	Attempted          bool     `json:"attempted"`
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -13,6 +13,7 @@ import (

 // GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
 type GPUMetricRow struct {
+	Stage       string  `json:"stage,omitempty"`
 	ElapsedSec  float64 `json:"elapsed_sec"`
 	GPUIndex    int     `json:"index"`
 	TempC       float64 `json:"temp_c"`
@@ -141,14 +142,20 @@ func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
-	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
+	b.WriteString("stage,elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
 	for _, r := range rows {
-		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
-			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
+		fmt.Fprintf(&b, "%s,%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
+			strconv.Quote(strings.TrimSpace(r.Stage)), r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
 	}
 	return os.WriteFile(path, b.Bytes(), 0644)
 }

+type gpuMetricStageSpan struct {
+	Name  string
+	Start float64
+	End   float64
+}
+
 // WriteGPUMetricsHTML writes a standalone HTML file with one SVG chart per GPU.
 func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 	// Group by GPU index preserving order.
@@ -163,9 +170,25 @@ func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
 	}

+	stageSpans := buildGPUMetricStageSpans(rows)
+	stageColorByName := make(map[string]string, len(stageSpans))
+	for i, span := range stageSpans {
+		stageColorByName[span.Name] = gpuMetricStagePalette[i%len(gpuMetricStagePalette)]
+	}
+
+	var legend strings.Builder
+	if len(stageSpans) > 0 {
+		legend.WriteString(`<div class="stage-legend">`)
+		for _, span := range stageSpans {
+			fmt.Fprintf(&legend, `<span class="stage-chip"><span class="stage-swatch" style="background:%s"></span>%s</span>`,
+				stageColorByName[span.Name], gpuHTMLEscape(span.Name))
+		}
+		legend.WriteString(`</div>`)
+	}
+
 	var svgs strings.Builder
 	for _, gpuIdx := range order {
-		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx))
+		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx, stageSpans, stageColorByName))
 		svgs.WriteString("\n")
 	}

@@ -175,21 +198,39 @@ func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 <meta charset="utf-8">
 <title>GPU Stress Test Metrics</title>
 <style>
-body { font-family: sans-serif; background: #f0f0f0; margin: 0; padding: 20px; }
-h1 { text-align: center; color: #333; margin: 0 0 8px; }
-p  { text-align: center; color: #888; font-size: 13px; margin: 0 0 24px; }
+:root{--bg:#fff;--surface:#fff;--surface-2:#f9fafb;--border:rgba(34,36,38,.15);--border-lite:rgba(34,36,38,.1);--ink:rgba(0,0,0,.87);--muted:rgba(0,0,0,.6)}
+*{box-sizing:border-box}
+body{font:14px/1.5 Lato,"Helvetica Neue",Arial,Helvetica,sans-serif;background:var(--bg);color:var(--ink);margin:0}
+.page{padding:24px}
+.card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);overflow:hidden}
+.card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px}
+.card-body{padding:16px}
+h1{font-size:22px;margin:0 0 6px}
+p{color:var(--muted);font-size:13px;margin:0 0 16px}
+.stage-legend{display:flex;flex-wrap:wrap;gap:10px;margin:0 0 16px}
+.stage-chip{display:inline-flex;align-items:center;gap:8px;padding:4px 10px;border-radius:999px;background:var(--surface-2);border:1px solid var(--border-lite);font-size:12px}
+.stage-swatch{display:inline-block;width:12px;height:12px;border-radius:999px}
+.chart-block{margin-top:16px}
 </style>
 </head><body>
+<div class="page">
+<div class="card">
+<div class="card-head">GPU Stress Test Metrics</div>
+<div class="card-body">
 <h1>GPU Stress Test Metrics</h1>
 <p>Generated %s</p>
 %s
-</body></html>`, ts, svgs.String())
+<div class="chart-block">%s</div>
+</div>
+</div>
+</div>
+</body></html>`, ts, legend.String(), svgs.String())

 	return os.WriteFile(path, []byte(html), 0644)
 }

 // drawGPUChartSVG generates a self-contained SVG chart for one GPU.
-func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
+func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int, stageSpans []gpuMetricStageSpan, stageColorByName map[string]string) string {
 	// Layout
 	const W, H = 960, 520
 	const plotX1 = 120 // usage axis / chart left border
@@ -284,6 +325,23 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	}
 	b.WriteString("</g>\n")

+	// Stage backgrounds
+	for _, span := range stageSpans {
+		x1 := xv(span.Start)
+		x2 := xv(span.End)
+		if x2 < x1 {
+			x1, x2 = x2, x1
+		}
+		if x2-x1 < 1 {
+			x2 = x1 + 1
+		}
+		color := stageColorByName[span.Name]
+		fmt.Fprintf(&b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="%s" fill-opacity="0.18"/>`+"\n",
+			x1, plotY1, x2-x1, PH, color)
+		fmt.Fprintf(&b, `<text x="%.1f" y="%d" font-family="sans-serif" font-size="10" fill="#444" text-anchor="middle">%s</text>`+"\n",
+			x1+(x2-x1)/2, plotY1+12, gpuHTMLEscape(span.Name))
+	}
+
 	// Chart border
 	fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d"`+
 		` fill="none" stroke="#333" stroke-width="1"/>`+"\n",
@@ -382,224 +440,6 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	return b.String()
 }

-const (
-	ansiRed    = "\033[31m"
-	ansiBlue   = "\033[34m"
-	ansiGreen  = "\033[32m"
-	ansiYellow = "\033[33m"
-	ansiReset  = "\033[0m"
-)
-
-const (
-	termChartWidth  = 70
-	termChartHeight = 12
-)
-
-// RenderGPUTerminalChart returns ANSI line charts (asciigraph-style) per GPU.
-// Used in SAT stress-test logs.
-func RenderGPUTerminalChart(rows []GPUMetricRow) string {
-	seen := make(map[int]bool)
-	var order []int
-	gpuMap := make(map[int][]GPUMetricRow)
-	for _, r := range rows {
-		if !seen[r.GPUIndex] {
-			seen[r.GPUIndex] = true
-			order = append(order, r.GPUIndex)
-		}
-		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
-	}
-
-	type seriesDef struct {
-		caption string
-		color   string
-		fn      func(GPUMetricRow) float64
-	}
-	defs := []seriesDef{
-		{"Temperature (°C)", ansiRed, func(r GPUMetricRow) float64 { return r.TempC }},
-		{"GPU Usage (%)", ansiBlue, func(r GPUMetricRow) float64 { return r.UsagePct }},
-		{"Power (W)", ansiGreen, func(r GPUMetricRow) float64 { return r.PowerW }},
-		{"Clock (MHz)", ansiYellow, func(r GPUMetricRow) float64 { return r.ClockMHz }},
-	}
-
-	var b strings.Builder
-	for _, gpuIdx := range order {
-		gr := gpuMap[gpuIdx]
-		if len(gr) == 0 {
-			continue
-		}
-		tMax := gr[len(gr)-1].ElapsedSec - gr[0].ElapsedSec
-		fmt.Fprintf(&b, "GPU %d — Stress Test Metrics  (%.0f seconds)\n\n", gpuIdx, tMax)
-		for _, d := range defs {
-			b.WriteString(renderLineChart(extractGPUField(gr, d.fn), d.color, d.caption,
-				termChartHeight, termChartWidth))
-			b.WriteRune('\n')
-		}
-	}
-
-	return strings.TrimRight(b.String(), "\n")
-}
-
-// renderLineChart draws a single time-series line chart using box-drawing characters.
-// Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption.
-func renderLineChart(vals []float64, color, caption string, height, width int) string {
-	if len(vals) == 0 {
-		return caption + "\n"
-	}
-
-	mn, mx := gpuMinMax(vals)
-	if mn == mx {
-		mx = mn + 1
-	}
-
-	// Use the smaller of width or len(vals) to avoid stretching sparse data.
-	w := width
-	if len(vals) < w {
-		w = len(vals)
-	}
-	data := gpuDownsample(vals, w)
-
-	// row[i] = display row index: 0 = top = max value, height = bottom = min value.
-	row := make([]int, w)
-	for i, v := range data {
-		r := int(math.Round((mx - v) / (mx - mn) * float64(height)))
-		if r < 0 {
-			r = 0
-		}
-		if r > height {
-			r = height
-		}
-		row[i] = r
-	}
-
-	// Fill the character grid.
-	grid := make([][]rune, height+1)
-	for i := range grid {
-		grid[i] = make([]rune, w)
-		for j := range grid[i] {
-			grid[i][j] = ' '
-		}
-	}
-	for x := 0; x < w; x++ {
-		r := row[x]
-		if x == 0 {
-			grid[r][0] = '─'
-			continue
-		}
-		p := row[x-1]
-		switch {
-		case r == p:
-			grid[r][x] = '─'
-		case r < p: // value went up (row index decreased toward top)
-			grid[r][x] = '╭'
-			grid[p][x] = '╯'
-			for y := r + 1; y < p; y++ {
-				grid[y][x] = '│'
-			}
-		default: // r > p, value went down
-			grid[p][x] = '╮'
-			grid[r][x] = '╰'
-			for y := p + 1; y < r; y++ {
-				grid[y][x] = '│'
-			}
-		}
-	}
-
-	// Y axis tick labels.
-	ticks := gpuNiceTicks(mn, mx, height/2)
-	tickAtRow := make(map[int]string)
-	labelWidth := 4
-	for _, t := range ticks {
-		r := int(math.Round((mx - t) / (mx - mn) * float64(height)))
-		if r < 0 || r > height {
-			continue
-		}
-		s := gpuFormatTick(t)
-		tickAtRow[r] = s
-		if len(s) > labelWidth {
-			labelWidth = len(s)
-		}
-	}
-
-	var b strings.Builder
-	for r := 0; r <= height; r++ {
-		label := tickAtRow[r]
-		fmt.Fprintf(&b, "%*s", labelWidth, label)
-		switch {
-		case label != "":
-			b.WriteRune('┤')
-		case r == height:
-			b.WriteRune('┼')
-		default:
-			b.WriteRune('│')
-		}
-		b.WriteString(color)
-		b.WriteString(string(grid[r]))
-		b.WriteString(ansiReset)
-		b.WriteRune('\n')
-	}
-
-	// Bottom axis.
-	b.WriteString(strings.Repeat(" ", labelWidth))
-	b.WriteRune('└')
-	b.WriteString(strings.Repeat("─", w))
-	b.WriteRune('\n')
-
-	// Caption centered under the chart.
-	if caption != "" {
-		total := labelWidth + 1 + w
-		if pad := (total - len(caption)) / 2; pad > 0 {
-			b.WriteString(strings.Repeat(" ", pad))
-		}
-		b.WriteString(caption)
-		b.WriteRune('\n')
-	}
-
-	return b.String()
-}
-
-func extractGPUField(rows []GPUMetricRow, fn func(GPUMetricRow) float64) []float64 {
-	v := make([]float64, len(rows))
-	for i, r := range rows {
-		v[i] = fn(r)
-	}
-	return v
-}
-
-// gpuDownsample averages vals into w buckets (or nearest-neighbor upsamples if len(vals) < w).
-func gpuDownsample(vals []float64, w int) []float64 {
-	n := len(vals)
-	if n == 0 {
-		return make([]float64, w)
-	}
-	result := make([]float64, w)
-	if n >= w {
-		counts := make([]int, w)
-		for i, v := range vals {
-			bucket := i * w / n
-			if bucket >= w {
-				bucket = w - 1
-			}
-			result[bucket] += v
-			counts[bucket]++
-		}
-		for i := range result {
-			if counts[i] > 0 {
-				result[i] /= float64(counts[i])
-			}
-		}
-	} else {
-		// Nearest-neighbour upsample.
-		for i := range result {
-			src := i * (n - 1) / (w - 1)
-			if src >= n {
-				src = n - 1
-			}
-			result[i] = vals[src]
-		}
-	}
-	return result
-}
-
 func gpuMinMax(vals []float64) (float64, float64) {
 	if len(vals) == 0 {
 		return 0, 1
@@ -644,3 +484,46 @@ func gpuFormatTick(v float64) string {
 	}
 	return strconv.FormatFloat(v, 'f', 1, 64)
 }
+
+var gpuMetricStagePalette = []string{
+	"#d95c5c",
+	"#2185d0",
+	"#21ba45",
+	"#f2c037",
+	"#6435c9",
+	"#00b5ad",
+	"#a5673f",
+}
+
+func buildGPUMetricStageSpans(rows []GPUMetricRow) []gpuMetricStageSpan {
+	var spans []gpuMetricStageSpan
+	for _, row := range rows {
+		name := strings.TrimSpace(row.Stage)
+		if name == "" {
+			name = "run"
+		}
+		if len(spans) == 0 || spans[len(spans)-1].Name != name {
+			spans = append(spans, gpuMetricStageSpan{Name: name, Start: row.ElapsedSec, End: row.ElapsedSec})
+			continue
+		}
+		spans[len(spans)-1].End = row.ElapsedSec
+	}
+	for i := range spans {
+		if spans[i].End <= spans[i].Start {
+			spans[i].End = spans[i].Start + 1
+		}
+	}
+	return spans
+}
+
+var gpuHTMLReplacer = strings.NewReplacer(
+	"&", "&amp;",
+	"<", "&lt;",
+	">", "&gt;",
+	`"`, "&quot;",
+	"'", "&#39;",
+)
+
+func gpuHTMLEscape(s string) string {
+	return gpuHTMLReplacer.Replace(s)
+}
--- a/audit/internal/platform/gpu_metrics_test.go
+++ b/audit/internal/platform/gpu_metrics_test.go
@@ -0,0 +1,65 @@
+package platform
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestWriteGPUMetricsCSVIncludesStageColumn(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gpu-metrics.csv")
+	rows := []GPUMetricRow{
+		{Stage: "warmup", ElapsedSec: 1, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 80, PowerW: 420, ClockMHz: 1800, MemClockMHz: 1200},
+	}
+	if err := WriteGPUMetricsCSV(path, rows); err != nil {
+		t.Fatalf("WriteGPUMetricsCSV: %v", err)
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	text := string(raw)
+	for _, needle := range []string{
+		"stage,elapsed_sec,gpu_index",
+		`"warmup",1.0,0,71.0,99.0,80.0,420.0,1800,1200`,
+	} {
+		if !strings.Contains(text, needle) {
+			t.Fatalf("csv missing %q\n%s", needle, text)
+		}
+	}
+}
+
+func TestWriteGPUMetricsHTMLShowsStageLegendAndLabels(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gpu-metrics.html")
+	rows := []GPUMetricRow{
+		{Stage: "baseline", ElapsedSec: 1, GPUIndex: 0, TempC: 50, UsagePct: 10, MemUsagePct: 5, PowerW: 100, ClockMHz: 500, MemClockMHz: 400},
+		{Stage: "baseline", ElapsedSec: 2, GPUIndex: 0, TempC: 51, UsagePct: 11, MemUsagePct: 5, PowerW: 101, ClockMHz: 510, MemClockMHz: 400},
+		{Stage: "steady-fp16", ElapsedSec: 3, GPUIndex: 0, TempC: 70, UsagePct: 98, MemUsagePct: 75, PowerW: 390, ClockMHz: 1700, MemClockMHz: 1100},
+		{Stage: "steady-fp16", ElapsedSec: 4, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 76, PowerW: 395, ClockMHz: 1710, MemClockMHz: 1110},
+	}
+	if err := WriteGPUMetricsHTML(path, rows); err != nil {
+		t.Fatalf("WriteGPUMetricsHTML: %v", err)
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	text := string(raw)
+	for _, needle := range []string{
+		"stage-legend",
+		"baseline",
+		"steady-fp16",
+		"GPU Stress Test Metrics",
+	} {
+		if !strings.Contains(text, needle) {
+			t.Fatalf("html missing %q\n%s", needle, text)
+		}
+	}
+}
--- a/audit/internal/platform/hpl.go
+++ b/audit/internal/platform/hpl.go
@@ -1,142 +0,0 @@
-package platform
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"time"
-)
-
-// HPLOptions configures the HPL (LINPACK) benchmark run.
-type HPLOptions struct {
-	MemFraction float64 // fraction of RAM to use (default 0.80)
-	NB          int     // block size (default 256)
-}
-
-// HPLResult holds the parsed result of an HPL run.
-type HPLResult struct {
-	N          int     // matrix dimension
-	NB         int     // block size
-	P          int     // process grid rows
-	Q          int     // process grid cols
-	TimeSec    float64 // wall time in seconds
-	GFlops     float64 // achieved performance
-	Residual   float64 // backward error residual (from HPL verification line)
-	Status     string  // "PASSED" or "FAILED"
-	RawOutput  string  // full xhpl output
-}
-
-func applyHPLDefaults(opts *HPLOptions) {
-	if opts.MemFraction <= 0 || opts.MemFraction > 1 {
-		opts.MemFraction = 0.80
-	}
-	if opts.NB <= 0 {
-		opts.NB = 256
-	}
-}
-
-// RunHPL runs bee-hpl and returns parsed results plus a tar.gz artifact path.
-func (s *System) RunHPL(ctx context.Context, baseDir string, opts HPLOptions, logFunc func(string)) (string, *HPLResult, error) {
-	applyHPLDefaults(&opts)
-
-	if baseDir == "" {
-		baseDir = "/var/log/bee-sat"
-	}
-	ts := time.Now().UTC().Format("20060102-150405")
-	runDir := filepath.Join(baseDir, "hpl-"+ts)
-	if err := os.MkdirAll(runDir, 0755); err != nil {
-		return "", nil, fmt.Errorf("mkdir %s: %w", runDir, err)
-	}
-
-	logPath := filepath.Join(runDir, "hpl.log")
-
-	cmd := []string{
-		"bee-hpl",
-		"--mem-fraction", strconv.FormatFloat(opts.MemFraction, 'f', 2, 64),
-		"--nb", strconv.Itoa(opts.NB),
-	}
-
-	if logFunc != nil {
-		logFunc(fmt.Sprintf("HPL: N will be auto-sized to %.0f%% of RAM, NB=%d", opts.MemFraction*100, opts.NB))
-	}
-
-	out, err := runSATCommandCtx(ctx, "", "hpl", cmd, nil, logFunc)
-	_ = os.WriteFile(logPath, out, 0644)
-
-	result := parseHPLOutput(string(out))
-	result.RawOutput = string(out)
-
-	if err != nil && err != context.Canceled {
-		return "", result, fmt.Errorf("bee-hpl failed: %w", err)
-	}
-	if err == nil && result.GFlops <= 0 {
-		return "", result, fmt.Errorf("HPL completed but no Gflops result found in output")
-	}
-
-	// Write summary
-	summary := fmt.Sprintf("N=%d NB=%d time=%.2fs gflops=%.3f status=%s\n",
-		result.N, result.NB, result.TimeSec, result.GFlops, result.Status)
-	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
-
-	if logFunc != nil {
-		logFunc(fmt.Sprintf("HPL result: N=%d NB=%d %.2fs %.3f Gflops %s",
-			result.N, result.NB, result.TimeSec, result.GFlops, result.Status))
-	}
-
-	ts2 := time.Now().UTC().Format("20060102-150405")
-	archive := filepath.Join(baseDir, "hpl-"+ts2+".tar.gz")
-	if archErr := createTarGz(archive, runDir); archErr != nil {
-		return runDir, result, err
-	}
-	return archive, result, err
-}
-
-// parseHPLOutput extracts N, NB, time, and Gflops from standard HPL output.
-//
-// HPL prints a result line of the form:
-//
-//	WR00L2L2       45312   256     1     1        1234.56             5.678e+01
-//	T/V               N    NB     P     Q           Time                 Gflops
-func parseHPLOutput(output string) *HPLResult {
-	result := &HPLResult{Status: "FAILED"}
-	for _, line := range strings.Split(output, "\n") {
-		line = strings.TrimSpace(line)
-		// Result line starts with WR
-		if strings.HasPrefix(line, "WR") {
-			fields := strings.Fields(line)
-			// WR00L2L2  N  NB  P  Q  Time  Gflops
-			if len(fields) >= 7 {
-				result.N, _ = strconv.Atoi(fields[1])
-				result.NB, _ = strconv.Atoi(fields[2])
-				result.P, _ = strconv.Atoi(fields[3])
-				result.Q, _ = strconv.Atoi(fields[4])
-				result.TimeSec, _ = strconv.ParseFloat(fields[5], 64)
-				result.GFlops, _ = strconv.ParseFloat(fields[6], 64)
-			}
-		}
-		// Verification line: "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ... PASSED"
-		if strings.Contains(line, "PASSED") {
-			result.Status = "PASSED"
-			fields := strings.Fields(line)
-			for i, f := range fields {
-				if f == "PASSED" && i > 0 {
-					result.Residual, _ = strconv.ParseFloat(fields[i-1], 64)
-				}
-			}
-		}
-	}
-	return result
-}
-
-// hplAvailable returns true if bee-hpl and xhpl are present and executable.
-func hplAvailable() bool {
-	if _, err := exec.LookPath("bee-hpl"); err != nil {
-		return false
-	}
-	_, err := os.Stat("/usr/local/lib/bee/xhpl")
-	return err == nil
-}
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -14,9 +14,17 @@ import (
 func (s *System) IsLiveMediaInRAM() bool {
 	fsType := mountFSType("/run/live/medium")
 	if fsType == "" {
+		// No medium mount at all — fall back to toram kernel parameter.
 		return toramActive()
 	}
-	return strings.EqualFold(fsType, "tmpfs")
+	if strings.EqualFold(fsType, "tmpfs") {
+		return true
+	}
+	// When RunInstallToRAM copies squashfs to /dev/shm/bee-live but the bind
+	// mount of /run/live/medium fails (common for CD-ROM boots), the medium
+	// fstype still shows the CD-ROM type. Check whether the RAM copy exists.
+	files, _ := filepath.Glob("/dev/shm/bee-live/*.squashfs")
+	return len(files) > 0
 }

 func (s *System) LiveBootSource() LiveBootSource {
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -49,6 +49,9 @@ func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 			"--seconds", strconv.Itoa(opts.DurationSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
+		if opts.StaggerSeconds > 0 && len(selected) > 1 {
+			cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
+		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
@@ -63,6 +66,9 @@ func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 			"bee-john-gpu-stress",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 		}
+		if opts.StaggerSeconds > 0 && len(selected) > 1 {
+			cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
+		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -161,13 +161,7 @@ func (s *System) RunPlatformStress(
 	}
 	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)

-	// Pack tar.gz
-	archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
-	if err := packPlatformDir(runDir, archivePath); err != nil {
-		return "", fmt.Errorf("pack archive: %w", err)
-	}
-	_ = os.RemoveAll(runDir)
-	return archivePath, nil
+	return runDir, nil
 }

 // collectPhase samples live metrics every second until ctx is done.
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -1,6 +1,7 @@
 package platform

 import (
+	"bufio"
 	"os"
 	"os/exec"
 	"strings"
@@ -114,6 +115,8 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
 	}

 	s.collectGPURuntimeHealth(vendor, &health)
+	s.collectToRAMHealth(&health)
+	s.collectUSBExportHealth(&health)

 	if health.Status != "FAILED" && len(health.Issues) > 0 {
 		health.Status = "PARTIAL"
@@ -168,6 +171,96 @@ func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	return ToolStatus{Name: display}
 }

+// collectToRAMHealth checks whether the LiveCD ISO has been copied to RAM.
+// Status values: "ok" = in RAM, "warning" = toram not active (no copy attempted),
+// "failed" = toram was requested but medium is not in RAM (copy failed or in progress).
+func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) {
+	inRAM := s.IsLiveMediaInRAM()
+	active := toramActive()
+	switch {
+	case inRAM:
+		health.ToRAMStatus = "ok"
+	case active:
+		// toram was requested but medium is not yet/no longer in RAM
+		health.ToRAMStatus = "failed"
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "toram_copy_failed",
+			Severity:    "warning",
+			Description: "toram boot parameter is set but the live medium is not mounted from RAM.",
+		})
+	default:
+		health.ToRAMStatus = "warning"
+	}
+}
+
+// collectUSBExportHealth scans /proc/mounts for a writable USB-backed filesystem
+// suitable for log export. Sets USBExportPath to the first match found.
+func (s *System) collectUSBExportHealth(health *schema.RuntimeHealth) {
+	health.USBExportPath = findUSBExportMount()
+}
+
+// findUSBExportMount returns the mount point of the first writable USB filesystem
+// found in /proc/mounts (vfat, exfat, ext2/3/4, ntfs) whose backing block device
+// has USB transport. Returns "" if none found.
+func findUSBExportMount() string {
+	f, err := os.Open("/proc/mounts")
+	if err != nil {
+		return ""
+	}
+	defer f.Close()
+
+	// fs types that are expected on USB export drives
+	exportFSTypes := map[string]bool{
+		"vfat":  true,
+		"exfat": true,
+		"ext2":  true,
+		"ext3":  true,
+		"ext4":  true,
+		"ntfs":  true,
+		"ntfs3": true,
+		"fuseblk": true,
+	}
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		// fields: device mountpoint fstype options dump pass
+		fields := strings.Fields(scanner.Text())
+		if len(fields) < 4 {
+			continue
+		}
+		device, mountPoint, fsType, options := fields[0], fields[1], fields[2], fields[3]
+		if !exportFSTypes[strings.ToLower(fsType)] {
+			continue
+		}
+		// Skip read-only mounts
+		opts := strings.Split(options, ",")
+		readOnly := false
+		for _, o := range opts {
+			if strings.TrimSpace(o) == "ro" {
+				readOnly = true
+				break
+			}
+		}
+		if readOnly {
+			continue
+		}
+		// Check USB transport via lsblk on the device (or its parent disk for partitions).
+		if !strings.HasPrefix(device, "/dev/") {
+			continue
+		}
+		checkDev := device
+		// lsblk only reports TRAN for the whole disk, not for partitions (e.g. /dev/sdc1).
+		// Strip trailing partition digits to get the parent disk name.
+		if trimmed := strings.TrimRight(device, "0123456789"); trimmed != device && len(trimmed) > len("/dev/") {
+			checkDev = trimmed
+		}
+		if blockDeviceTransport(checkDev) == "usb" {
+			return mountPoint
+		}
+	}
+	return ""
+}
+
 func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
 	lsmodText := commandText("lsmod")

--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -108,15 +108,15 @@ type nvidiaGPUHealth struct {
 }

 type nvidiaGPUStatusFile struct {
-	Index       int
-	Name        string
-	RunStatus   string
-	Reason      string
-	Health      string
-	HealthRaw   string
-	Observed    bool
-	Selected    bool
-	FailingJob  string
+	Index      int
+	Name       string
+	RunStatus  string
+	Reason     string
+	Health     string
+	HealthRaw  string
+	Observed   bool
+	Selected   bool
+	FailingJob string
 }

 // AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
@@ -384,14 +384,28 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
 	), logFunc)
 }

-func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
-	profCmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
-	if err != nil {
-		return "", err
+	var (
+		profCmd []string
+		profEnv []string
+	)
+	if staggerSec > 0 && len(selected) > 1 {
+		profCmd = []string{
+			"bee-dcgmproftester-staggered",
+			"--seconds", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)),
+			"--stagger-seconds", strconv.Itoa(staggerSec),
+			"--devices", joinIndexList(selected),
+		}
+	} else {
+		profCmd, err = resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
+		if err != nil {
+			return "", err
+		}
+		profEnv = nvidiaVisibleDevicesEnv(selected)
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-compute", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
@@ -399,7 +413,7 @@ func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir strin
 		satJob{
 			name:       "03-dcgmproftester.log",
 			cmd:        profCmd,
-			env:        nvidiaVisibleDevicesEnv(selected),
+			env:        profEnv,
 			collectGPU: true,
 			gpuIndices: selected,
 		},
@@ -648,11 +662,7 @@ func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, e
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
 		return "", err
 	}
-	archive := filepath.Join(baseDir, "storage-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 type satJob struct {
@@ -838,11 +848,7 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 		}
 	}

-	archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 func updateNvidiaGPUStatus(perGPU map[int]*nvidiaGPUStatusFile, idx int, status, jobName, detail string) {
@@ -905,7 +911,7 @@ func writeNvidiaGPUStatusFiles(runDir, overall string, perGPU map[int]*nvidiaGPU
 			entry.Health = "UNKNOWN"
 		}
 		if entry.Name == "" {
-			entry.Name = "unknown"
+			entry.Name = "Unknown GPU"
 		}
 		var body strings.Builder
 		fmt.Fprintf(&body, "gpu_index=%d\n", entry.Index)
@@ -1376,8 +1382,6 @@ func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd
 	if len(metricRows) > 0 {
 		_ = WriteGPUMetricsCSV(filepath.Join(runDir, "gpu-metrics.csv"), metricRows)
 		_ = WriteGPUMetricsHTML(filepath.Join(runDir, "gpu-metrics.html"), metricRows)
-		chart := RenderGPUTerminalChart(metricRows)
-		_ = os.WriteFile(filepath.Join(runDir, "gpu-metrics-term.txt"), []byte(chart), 0644)
 	}

 	return out, err
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -223,11 +223,7 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
 		return "", err
 	}

-	archive := filepath.Join(baseDir, "fan-stress-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 func applyFanStressDefaults(opts *FanStressOptions) {
--- a/audit/internal/platform/techdump.go
+++ b/audit/internal/platform/techdump.go
@@ -20,6 +20,7 @@ var techDumpFixedCommands = []struct {
 	{Name: "dmidecode", Args: []string{"-t", "4"}, File: "dmidecode-type4.txt"},
 	{Name: "dmidecode", Args: []string{"-t", "17"}, File: "dmidecode-type17.txt"},
 	{Name: "lspci", Args: []string{"-vmm", "-D"}, File: "lspci-vmm.txt"},
+	{Name: "lspci", Args: []string{"-vvv"}, File: "lspci-vvv.txt"},
 	{Name: "lsblk", Args: []string{"-J", "-d", "-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL"}, File: "lsblk.json"},
 	{Name: "sensors", Args: []string{"-j"}, File: "sensors.json"},
 	{Name: "ipmitool", Args: []string{"fru", "print"}, File: "ipmitool-fru.txt"},
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -70,6 +70,7 @@ type NvidiaStressOptions struct {
 	Loader            string
 	GPUIndices        []int
 	ExcludeGPUIndices []int
+	StaggerSeconds    int
 }

 func New() *System {
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -22,6 +22,10 @@ type RuntimeHealth struct {
 	CUDAReady     bool                   `json:"cuda_ready,omitempty"`
 	NvidiaGSPMode string                 `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
 	NetworkStatus string                 `json:"network_status,omitempty"`
+	// ToRAMStatus: "ok" (ISO in RAM), "warning" (toram not active), "failed" (toram active but copy failed)
+	ToRAMStatus   string `json:"toram_status,omitempty"`
+	// USBExportPath: mount point of the first writable USB drive found, empty if none.
+	USBExportPath string `json:"usb_export_path,omitempty"`
 	Issues        []RuntimeIssue         `json:"issues,omitempty"`
 	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
 	Services      []RuntimeServiceStatus `json:"services,omitempty"`
@@ -183,6 +187,13 @@ type HardwarePCIeDevice struct {
 	BatteryTemperatureC    *float64       `json:"battery_temperature_c,omitempty"`
 	BatteryVoltageV        *float64       `json:"battery_voltage_v,omitempty"`
 	BatteryReplaceRequired *bool          `json:"battery_replace_required,omitempty"`
+	SFPPresent             *bool          `json:"sfp_present,omitempty"`
+	SFPIdentifier          *string        `json:"sfp_identifier,omitempty"`
+	SFPConnector           *string        `json:"sfp_connector,omitempty"`
+	SFPVendor              *string        `json:"sfp_vendor,omitempty"`
+	SFPPartNumber          *string        `json:"sfp_part_number,omitempty"`
+	SFPSerialNumber        *string        `json:"sfp_serial_number,omitempty"`
+	SFPWavelengthNM        *float64       `json:"sfp_wavelength_nm,omitempty"`
 	SFPTemperatureC        *float64       `json:"sfp_temperature_c,omitempty"`
 	SFPTXPowerDBM          *float64       `json:"sfp_tx_power_dbm,omitempty"`
 	SFPRXPowerDBM          *float64       `json:"sfp_rx_power_dbm,omitempty"`
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -12,6 +12,7 @@ import (
 	"path/filepath"
 	"regexp"
 	"sort"
+	"strconv"
 	"strings"
 	"sync/atomic"
 	"syscall"
@@ -209,6 +210,14 @@ func joinTaskIndices(indices []int) string {
 	return strings.Join(parts, ",")
 }

+func formatGPUIndexList(indices []int) string {
+	parts := make([]string, len(indices))
+	for i, idx := range indices {
+		parts[i] = strconv.Itoa(idx)
+	}
+	return strings.Join(parts, ",")
+}
+
 func formatSplitTaskName(baseName, selectionLabel string) string {
 	baseName = strings.TrimSpace(baseName)
 	selectionLabel = strings.TrimSpace(selectionLabel)
@@ -482,12 +491,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 			return
 		}

-		var body struct {
-			Duration           int      `json:"duration"`
-			StressMode         bool     `json:"stress_mode"`
-			GPUIndices         []int    `json:"gpu_indices"`
-			ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
-			Loader             string   `json:"loader"`
+			var body struct {
+				Duration           int      `json:"duration"`
+				StressMode         bool     `json:"stress_mode"`
+				GPUIndices         []int    `json:"gpu_indices"`
+				ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
+				StaggerGPUStart    bool     `json:"stagger_gpu_start"`
+				ParallelGPUs       bool     `json:"parallel_gpus"`
+				Loader             string   `json:"loader"`
 			Profile            string   `json:"profile"`
 			DisplayName        string   `json:"display_name"`
 			PlatformComponents []string `json:"platform_components"`
@@ -503,12 +514,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 		if strings.TrimSpace(body.DisplayName) != "" {
 			name = body.DisplayName
 		}
-		params := taskParams{
-			Duration:           body.Duration,
-			StressMode:         body.StressMode,
-			GPUIndices:         body.GPUIndices,
-			ExcludeGPUIndices:  body.ExcludeGPUIndices,
-			Loader:             body.Loader,
+			params := taskParams{
+				Duration:           body.Duration,
+				StressMode:         body.StressMode,
+				GPUIndices:         body.GPUIndices,
+				ExcludeGPUIndices:  body.ExcludeGPUIndices,
+				StaggerGPUStart:    body.StaggerGPUStart,
+				ParallelGPUs:       body.ParallelGPUs,
+				Loader:             body.Loader,
 			BurnProfile:        body.Profile,
 			DisplayName:        body.DisplayName,
 			PlatformComponents: body.PlatformComponents,
@@ -538,6 +551,7 @@ func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Req
 		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
 		RunNCCL           *bool  `json:"run_nccl"`
 		ParallelGPUs      *bool  `json:"parallel_gpus"`
+		RampUp            *bool  `json:"ramp_up"`
 		DisplayName       string `json:"display_name"`
 	}
 	if r.Body != nil {
@@ -555,10 +569,82 @@ func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Req
 	if body.ParallelGPUs != nil {
 		parallelGPUs = *body.ParallelGPUs
 	}
+	rampUp := false
+	if body.RampUp != nil {
+		rampUp = *body.RampUp
+	}
+	// Build a descriptive base name that includes profile and mode so the task
+	// list is self-explanatory without opening individual task detail pages.
+	profile := strings.TrimSpace(body.Profile)
+	if profile == "" {
+		profile = "standard"
+	}
 	name := taskDisplayName("nvidia-benchmark", "", "")
 	if strings.TrimSpace(body.DisplayName) != "" {
 		name = body.DisplayName
 	}
+	// Append profile tag.
+	name = fmt.Sprintf("%s · %s", name, profile)
+
+	if rampUp && len(body.GPUIndices) > 1 {
+		// Ramp-up mode: resolve GPU list, then create one task per prefix
+		// [gpu0], [gpu0,gpu1], ..., [gpu0,...,gpuN-1], each running in parallel.
+		gpus, err := apiListNvidiaGPUs(h.opts.App)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, err.Error())
+			return
+		}
+		resolved, err := expandSelectedGPUIndices(gpus, body.GPUIndices, body.ExcludeGPUIndices)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, err.Error())
+			return
+		}
+		if len(resolved) < 2 {
+			// Fall through to normal single-task path.
+			rampUp = false
+		} else {
+			now := time.Now()
+			rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405"))
+			var allTasks []*Task
+			for step := 1; step <= len(resolved); step++ {
+				subset := resolved[:step]
+				stepName := fmt.Sprintf("%s · ramp %d/%d · GPU %s", name, step, len(resolved), formatGPUIndexList(subset))
+				t := &Task{
+					ID:        newJobID("benchmark-nvidia"),
+					Name:      stepName,
+					Target:    "nvidia-benchmark",
+					Priority:  15,
+					Status:    TaskPending,
+					CreatedAt: now,
+					params: taskParams{
+						GPUIndices:       append([]int(nil), subset...),
+						SizeMB:           body.SizeMB,
+						BenchmarkProfile: body.Profile,
+						RunNCCL:          runNCCL && step == len(resolved),
+						ParallelGPUs:     true,
+						RampStep:         step,
+						RampTotal:        len(resolved),
+						RampRunID:        rampRunID,
+						DisplayName:      stepName,
+					},
+				}
+				allTasks = append(allTasks, t)
+			}
+			for _, t := range allTasks {
+				globalQueue.enqueue(t)
+			}
+			writeTaskRunResponse(w, allTasks)
+			return
+		}
+	}
+
+	// For non-ramp tasks append mode tag.
+	if parallelGPUs {
+		name = fmt.Sprintf("%s · parallel", name)
+	} else {
+		name = fmt.Sprintf("%s · sequential", name)
+	}
+
 	tasks, err := buildNvidiaTaskSet("nvidia-benchmark", 15, time.Now(), taskParams{
 		GPUIndices:        body.GPUIndices,
 		ExcludeGPUIndices: body.ExcludeGPUIndices,
@@ -1376,107 +1462,3 @@ func (h *handler) rollbackPendingNetworkChange() error {
 	return nil
 }

-// ── Display / Screen Resolution ───────────────────────────────────────────────
-
-type displayMode struct {
-	Output  string `json:"output"`
-	Mode    string `json:"mode"`
-	Current bool   `json:"current"`
-}
-
-type displayInfo struct {
-	Output  string        `json:"output"`
-	Modes   []displayMode `json:"modes"`
-	Current string        `json:"current"`
-}
-
-var xrandrOutputRE = regexp.MustCompile(`^(\S+)\s+connected`)
-var xrandrModeRE = regexp.MustCompile(`^\s{3}(\d+x\d+)\s`)
-var xrandrCurrentRE = regexp.MustCompile(`\*`)
-
-func parseXrandrOutput(out string) []displayInfo {
-	var infos []displayInfo
-	var cur *displayInfo
-	for _, line := range strings.Split(out, "\n") {
-		if m := xrandrOutputRE.FindStringSubmatch(line); m != nil {
-			if cur != nil {
-				infos = append(infos, *cur)
-			}
-			cur = &displayInfo{Output: m[1]}
-			continue
-		}
-		if cur == nil {
-			continue
-		}
-		if m := xrandrModeRE.FindStringSubmatch(line); m != nil {
-			isCurrent := xrandrCurrentRE.MatchString(line)
-			mode := displayMode{Output: cur.Output, Mode: m[1], Current: isCurrent}
-			cur.Modes = append(cur.Modes, mode)
-			if isCurrent {
-				cur.Current = m[1]
-			}
-		}
-	}
-	if cur != nil {
-		infos = append(infos, *cur)
-	}
-	return infos
-}
-
-func xrandrCommand(args ...string) *exec.Cmd {
-	cmd := exec.Command("xrandr", args...)
-	env := append([]string{}, os.Environ()...)
-	hasDisplay := false
-	hasXAuthority := false
-	for _, kv := range env {
-		if strings.HasPrefix(kv, "DISPLAY=") && strings.TrimPrefix(kv, "DISPLAY=") != "" {
-			hasDisplay = true
-		}
-		if strings.HasPrefix(kv, "XAUTHORITY=") && strings.TrimPrefix(kv, "XAUTHORITY=") != "" {
-			hasXAuthority = true
-		}
-	}
-	if !hasDisplay {
-		env = append(env, "DISPLAY=:0")
-	}
-	if !hasXAuthority {
-		env = append(env, "XAUTHORITY=/home/bee/.Xauthority")
-	}
-	cmd.Env = env
-	return cmd
-}
-
-func (h *handler) handleAPIDisplayResolutions(w http.ResponseWriter, _ *http.Request) {
-	out, err := xrandrCommand().Output()
-	if err != nil {
-		writeError(w, http.StatusInternalServerError, "xrandr: "+err.Error())
-		return
-	}
-	writeJSON(w, parseXrandrOutput(string(out)))
-}
-
-func (h *handler) handleAPIDisplaySet(w http.ResponseWriter, r *http.Request) {
-	var req struct {
-		Output string `json:"output"`
-		Mode   string `json:"mode"`
-	}
-	if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Output == "" || req.Mode == "" {
-		writeError(w, http.StatusBadRequest, "output and mode are required")
-		return
-	}
-	// Validate mode looks like WxH to prevent injection
-	if !regexp.MustCompile(`^\d+x\d+$`).MatchString(req.Mode) {
-		writeError(w, http.StatusBadRequest, "invalid mode format")
-		return
-	}
-	// Validate output name (no special chars)
-	if !regexp.MustCompile(`^[A-Za-z0-9_\-]+$`).MatchString(req.Output) {
-		writeError(w, http.StatusBadRequest, "invalid output name")
-		return
-	}
-	if out, err := xrandrCommand("--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
-		writeError(w, http.StatusInternalServerError, "xrandr: "+strings.TrimSpace(string(out)))
-		return
-	}
-	writeJSON(w, map[string]string{"status": "ok", "output": req.Output, "mode": req.Mode})
-}
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -10,30 +10,6 @@ import (
 	"bee/audit/internal/platform"
 )

-func TestXrandrCommandAddsDefaultX11Env(t *testing.T) {
-	t.Setenv("DISPLAY", "")
-	t.Setenv("XAUTHORITY", "")
-
-	cmd := xrandrCommand("--query")
-
-	var hasDisplay bool
-	var hasXAuthority bool
-	for _, kv := range cmd.Env {
-		if kv == "DISPLAY=:0" {
-			hasDisplay = true
-		}
-		if kv == "XAUTHORITY=/home/bee/.Xauthority" {
-			hasXAuthority = true
-		}
-	}
-	if !hasDisplay {
-		t.Fatalf("DISPLAY not injected: %v", cmd.Env)
-	}
-	if !hasXAuthority {
-		t.Fatalf("XAUTHORITY not injected: %v", cmd.Env)
-	}
-}
-
 func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
--- a/audit/internal/webui/charts_svg.go
+++ b/audit/internal/webui/charts_svg.go
@@ -83,6 +83,10 @@ func renderMetricChartSVG(title string, labels []string, times []time.Time, data
 		}
 	}

+	// Downsample to at most ~1400 points (one per pixel) before building SVG.
+	times, datasets = downsampleTimeSeries(times, datasets, 1400)
+	pointCount = len(times)
+
 	statsLabel := chartStatsLabel(datasets)

 	legendItems := []metricChartSeries{}
@@ -196,6 +200,19 @@ func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, s
 		}
 	}

+	// Downsample to at most ~1400 points before building SVG.
+	{
+		datasets := make([][]float64, len(series))
+		for i := range series {
+			datasets[i] = series[i].Values
+		}
+		times, datasets = downsampleTimeSeries(times, datasets, 1400)
+		pointCount = len(times)
+		for i := range series {
+			series[i].Values = datasets[i]
+		}
+	}
+
 	scales := make([]chartScale, len(series))
 	for i := range series {
 		min, max := chartSeriesBounds(series[i].Values)
@@ -626,6 +643,87 @@ func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end
 	b.WriteString(`</g>` + "\n")
 }

+// downsampleTimeSeries reduces the time series to at most maxPts points using
+// min-max bucketing. Each bucket contributes the index of its min and max value
+// (using the first full-length dataset as the reference series). All parallel
+// datasets are sampled at those same indices so all series stay aligned.
+// If len(times) <= maxPts the inputs are returned unchanged.
+func downsampleTimeSeries(times []time.Time, datasets [][]float64, maxPts int) ([]time.Time, [][]float64) {
+	n := len(times)
+	if n <= maxPts || maxPts <= 0 {
+		return times, datasets
+	}
+	buckets := maxPts / 2
+	if buckets < 1 {
+		buckets = 1
+	}
+	// Use the first dataset that has the same length as times as the reference
+	// for deciding which two indices to keep per bucket.
+	var ref []float64
+	for _, ds := range datasets {
+		if len(ds) == n {
+			ref = ds
+			break
+		}
+	}
+	selected := make([]int, 0, maxPts)
+	bucketSize := float64(n) / float64(buckets)
+	for b := 0; b < buckets; b++ {
+		lo := int(math.Round(float64(b) * bucketSize))
+		hi := int(math.Round(float64(b+1) * bucketSize))
+		if hi > n {
+			hi = n
+		}
+		if lo >= hi {
+			continue
+		}
+		if ref == nil {
+			selected = append(selected, lo)
+			if hi-1 != lo {
+				selected = append(selected, hi-1)
+			}
+			continue
+		}
+		minIdx, maxIdx := lo, lo
+		for i := lo + 1; i < hi; i++ {
+			if ref[i] < ref[minIdx] {
+				minIdx = i
+			}
+			if ref[i] > ref[maxIdx] {
+				maxIdx = i
+			}
+		}
+		if minIdx <= maxIdx {
+			selected = append(selected, minIdx)
+			if maxIdx != minIdx {
+				selected = append(selected, maxIdx)
+			}
+		} else {
+			selected = append(selected, maxIdx)
+			if minIdx != maxIdx {
+				selected = append(selected, minIdx)
+			}
+		}
+	}
+	outTimes := make([]time.Time, len(selected))
+	for i, idx := range selected {
+		outTimes[i] = times[idx]
+	}
+	outDatasets := make([][]float64, len(datasets))
+	for d, ds := range datasets {
+		if len(ds) != n {
+			outDatasets[d] = ds
+			continue
+		}
+		out := make([]float64, len(selected))
+		for i, idx := range selected {
+			out[i] = ds[idx]
+		}
+		outDatasets[d] = out
+	}
+	return outTimes, outDatasets
+}
+
 func chartXForTime(ts, start, end time.Time, left, right int) float64 {
 	if !end.After(start) {
 		return float64(left+right) / 2
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -295,10 +295,6 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	// Tools
 	mux.HandleFunc("GET /api/tools/check", h.handleAPIToolsCheck)

-	// Display
-	mux.HandleFunc("GET /api/display/resolutions", h.handleAPIDisplayResolutions)
-	mux.HandleFunc("POST /api/display/set", h.handleAPIDisplaySet)
-
 	// GPU presence / tools
 	mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
 	mux.HandleFunc("GET /api/gpu/nvidia", h.handleAPIGNVIDIAGPUs)
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -693,8 +693,8 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	for _, needle := range []string{
 		`Benchmark Results`,
 		`Composite score by saved benchmark run and GPU.`,
-		`GPU #0 — NVIDIA H100 PCIe`,
-		`GPU #1 — NVIDIA H100 PCIe`,
+		`GPU 0`,
+		`GPU 1`,
 		`#1`,
 		wantTime,
 		`1176.25`,
@@ -1094,6 +1094,7 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
+		// Runtime Health card — LiveCD checks only
 		`Runtime Health`,
 		`<th>Check</th><th>Status</th><th>Source</th><th>Issue</th>`,
 		`Export Directory`,
@@ -1102,16 +1103,18 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 		`CUDA / ROCm`,
 		`Required Utilities`,
 		`Bee Services`,
-		`<td>CPU</td>`,
-		`<td>Memory</td>`,
-		`<td>Storage</td>`,
-		`<td>GPU</td>`,
 		`CUDA runtime is not ready for GPU SAT.`,
 		`Missing: nvidia-smi`,
 		`bee-nvidia=inactive`,
-		`cpu SAT: FAILED`,
-		`storage SAT: FAILED`,
-		`sat:nvidia`,
+		// Hardware Summary card — component health badges
+		`Hardware Summary`,
+		`>CPU<`,
+		`>Memory<`,
+		`>Storage<`,
+		`>GPU<`,
+		`>PSU<`,
+		`badge-warn`,   // cpu Warning badge
+		`badge-err`,    // storage Critical badge
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("dashboard missing %q: %s", needle, body)
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -39,7 +39,6 @@ var taskNames = map[string]string{
 	"nvidia-interconnect":    "NVIDIA Interconnect Test (NCCL all_reduce_perf)",
 	"nvidia-bandwidth":       "NVIDIA Bandwidth Test (NVBandwidth)",
 	"nvidia-stress":          "NVIDIA GPU Stress",
-	"hpl":                    "LINPACK (HPL)",
 	"memory":                 "Memory SAT",
 	"storage":                "Storage SAT",
 	"cpu":                    "CPU SAT",
@@ -119,6 +118,7 @@ type taskParams struct {
 	StressMode         bool     `json:"stress_mode,omitempty"`
 	GPUIndices         []int    `json:"gpu_indices,omitempty"`
 	ExcludeGPUIndices  []int    `json:"exclude_gpu_indices,omitempty"`
+	StaggerGPUStart    bool     `json:"stagger_gpu_start,omitempty"`
 	SizeMB             int      `json:"size_mb,omitempty"`
 	Passes             int      `json:"passes,omitempty"`
 	Loader             string   `json:"loader,omitempty"`
@@ -126,6 +126,9 @@ type taskParams struct {
 	BenchmarkProfile   string   `json:"benchmark_profile,omitempty"`
 	RunNCCL            bool     `json:"run_nccl,omitempty"`
 	ParallelGPUs       bool     `json:"parallel_gpus,omitempty"`
+	RampStep           int      `json:"ramp_step,omitempty"`
+	RampTotal          int      `json:"ramp_total,omitempty"`
+	RampRunID          string   `json:"ramp_run_id,omitempty"`
 	DisplayName        string   `json:"display_name,omitempty"`
 	Device             string   `json:"device,omitempty"` // for install
 	PlatformComponents []string `json:"platform_components,omitempty"`
@@ -152,6 +155,12 @@ type burnPreset struct {
 	DurationSec int
 }

+type nvidiaRampSpec struct {
+	DurationSec      int
+	StaggerSeconds   int
+	TotalDurationSec int
+}
+
 func resolveBurnPreset(profile string) burnPreset {
 	switch profile {
 	case "overnight":
@@ -163,6 +172,45 @@ func resolveBurnPreset(profile string) burnPreset {
 	}
 }

+func resolveNvidiaRampPlan(profile string, enabled bool, selected []int) (nvidiaRampSpec, error) {
+	base := resolveBurnPreset(profile).DurationSec
+	plan := nvidiaRampSpec{
+		DurationSec:      base,
+		TotalDurationSec: base,
+	}
+	if !enabled {
+		return plan, nil
+	}
+	count := len(selected)
+	if count == 0 {
+		return nvidiaRampSpec{}, fmt.Errorf("staggered NVIDIA burn requires explicit GPU selection")
+	}
+	if count == 1 {
+		return plan, nil
+	}
+
+	switch profile {
+	case "acceptance":
+		plan.StaggerSeconds = 10 * 60
+		plan.TotalDurationSec = plan.DurationSec + plan.StaggerSeconds*(count-1)
+	case "overnight":
+		plan.StaggerSeconds = 60 * 60
+		plan.TotalDurationSec = 8 * 60 * 60
+		minTotal := count * 60 * 60
+		if plan.TotalDurationSec < minTotal {
+			plan.TotalDurationSec = minTotal
+		}
+		if plan.TotalDurationSec > 10*60*60 {
+			return nvidiaRampSpec{}, fmt.Errorf("overnight staggered NVIDIA burn supports at most 10 GPUs")
+		}
+		plan.DurationSec = plan.TotalDurationSec - plan.StaggerSeconds*(count-1)
+	default:
+		plan.StaggerSeconds = 2 * 60
+		plan.TotalDurationSec = plan.DurationSec + plan.StaggerSeconds*(count-1)
+	}
+	return plan, nil
+}
+
 func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
 	acceptanceCycles := []platform.PlatformStressCycle{
 		{LoadSec: 85, IdleSec: 5},
@@ -592,6 +640,9 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			RunNCCL:           t.params.RunNCCL,
 			ParallelGPUs:      t.params.ParallelGPUs,
+			RampStep:          t.params.RampStep,
+			RampTotal:         t.params.RampTotal,
+			RampRunID:         t.params.RampRunID,
 		}, j.append)
 	case "nvidia-compute":
 		if a == nil {
@@ -602,7 +653,18 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
-		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, j.append)
+		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
+		if planErr != nil {
+			err = planErr
+			break
+		}
+		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
+			dur = rampPlan.DurationSec
+		}
+		if rampPlan.StaggerSeconds > 0 {
+			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
+		}
+		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, rampPlan.StaggerSeconds, j.append)
 	case "nvidia-targeted-power":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -652,11 +714,23 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
+		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
+		if planErr != nil {
+			err = planErr
+			break
+		}
+		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
+			dur = rampPlan.DurationSec
+		}
+		if rampPlan.StaggerSeconds > 0 {
+			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
+		}
 		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
 			DurationSec:       dur,
 			Loader:            t.params.Loader,
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+			StaggerSeconds:    rampPlan.StaggerSeconds,
 		}, j.append)
 	case "memory":
 		if a == nil {
@@ -740,19 +814,6 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
-	case "hpl":
-		if a == nil {
-			err = fmt.Errorf("app not configured")
-			break
-		}
-		opts := platform.HPLOptions{
-			MemFraction: 0.80,
-			NB:          256,
-		}
-		archive, err = func() (string, error) {
-			path, _, runErr := a.RunHPL(ctx, "", opts, j.append)
-			return path, runErr
-		}()
 	case "platform-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -422,7 +422,7 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	for _, needle := range []string{
 		`Benchmark Results`,
 		`Composite score for this benchmark task.`,
-		`GPU #0 — NVIDIA H100 PCIe`,
+		`GPU 0`,
 		`1176.25`,
 	} {
 		if !strings.Contains(html, needle) {
@@ -491,6 +491,83 @@ func TestResolveBurnPreset(t *testing.T) {
 	}
 }

+func TestResolveNvidiaRampPlan(t *testing.T) {
+	tests := []struct {
+		name     string
+		profile  string
+		enabled  bool
+		selected []int
+		want     nvidiaRampSpec
+		wantErr  string
+	}{
+		{
+			name:     "disabled uses base preset",
+			profile:  "acceptance",
+			selected: []int{0, 1},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, TotalDurationSec: 60 * 60},
+		},
+		{
+			name:     "smoke ramp uses two minute steps",
+			profile:  "smoke",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 5 * 60, StaggerSeconds: 2 * 60, TotalDurationSec: 9 * 60},
+		},
+		{
+			name:     "acceptance ramp uses ten minute steps",
+			profile:  "acceptance",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, StaggerSeconds: 10 * 60, TotalDurationSec: 80 * 60},
+		},
+		{
+			name:     "overnight stays at eight hours when possible",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 6 * 60 * 60, StaggerSeconds: 60 * 60, TotalDurationSec: 8 * 60 * 60},
+		},
+		{
+			name:     "overnight extends to keep one hour after final gpu",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2, 3, 4, 5, 6, 7, 8},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, StaggerSeconds: 60 * 60, TotalDurationSec: 9 * 60 * 60},
+		},
+		{
+			name:     "overnight rejects impossible gpu count",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+			wantErr:  "at most 10 GPUs",
+		},
+		{
+			name:    "enabled requires explicit selection",
+			profile: "smoke",
+			enabled: true,
+			wantErr: "requires explicit GPU selection",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got, err := resolveNvidiaRampPlan(tc.profile, tc.enabled, tc.selected)
+			if tc.wantErr != "" {
+				if err == nil || !strings.Contains(err.Error(), tc.wantErr) {
+					t.Fatalf("err=%v want substring %q", err, tc.wantErr)
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("resolveNvidiaRampPlan error: %v", err)
+			}
+			if got != tc.want {
+				t.Fatalf("resolveNvidiaRampPlan(%q, %t, %v)=%+v want %+v", tc.profile, tc.enabled, tc.selected, got, tc.want)
+			}
+		})
+	}
+}
+
 func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
 	tests := []struct {
 		loader string
--- a/bible-local/docs/gpu-model-propagation.md
+++ b/bible-local/docs/gpu-model-propagation.md
@@ -0,0 +1,117 @@
+# GPU Model Name Propagation
+
+How GPU model names are detected, stored, and displayed throughout the project.
+
+---
+
+## Detection Sources
+
+There are **two separate pipelines** for GPU model names — they use different structs and don't share state.
+
+### Pipeline A — Live / SAT (nvidia-smi query at runtime)
+
+**File:** `audit/internal/platform/sat.go`
+
+- `ListNvidiaGPUs()` → `NvidiaGPU.Name` (field: `name`, from `nvidia-smi --query-gpu=index,name,...`)
+- `ListNvidiaGPUStatuses()` → `NvidiaGPUStatus.Name`
+- Used by: GPU selection UI, live metrics labels, burn/stress test logic
+
+### Pipeline B — Benchmark results
+
+**File:** `audit/internal/platform/benchmark.go`, line 124
+
+- `queryBenchmarkGPUInfo(selected)` → `benchmarkGPUInfo.Name`
+- Stored in `BenchmarkGPUResult.Name` (`json:"name,omitempty"`)
+- Used by: benchmark history table, benchmark report
+
+### Pipeline C — Hardware audit JSON (PCIe schema)
+
+**File:** `audit/internal/schema/hardware.go`
+
+- `HardwarePCIeDevice.Model *string` (field name is **Model**, not Name)
+- For AMD GPUs: populated by `audit/internal/collector/amdgpu.go` from `info.Product`
+- For NVIDIA GPUs: **NOT populated** by `audit/internal/collector/nvidia.go` — the NVIDIA enricher sets telemetry/status but skips the Model field
+- Used by: hardware summary page (`hwDescribeGPU` in `pages.go:487`)
+
+---
+
+## Key Inconsistency: NVIDIA PCIe Model is Never Set
+
+`audit/internal/collector/nvidia.go` — `enrichPCIeWithNVIDIAData()` enriches NVIDIA PCIe devices with telemetry and status but does **not** populate `HardwarePCIeDevice.Model`.
+
+This means:
+- Hardware summary page shows "Unknown GPU" for all NVIDIA devices (falls back at `pages.go:486`)
+- AMD GPUs do have their model populated
+
+The fix would be: copy `gpu.Name` from the SAT pipeline into `dev.Model` inside `enrichPCIeWithNVIDIAData`.
+
+---
+
+## Benchmark History "Unknown GPU" Issue
+
+**Symptom:** Benchmark history table shows "GPU #N — Unknown GPU" columns instead of real GPU model names.
+
+**Root cause:** `BenchmarkGPUResult.Name` has tag `json:"name,omitempty"`. If `queryBenchmarkGPUInfo()` fails (warns at `benchmark.go:126`) or returns empty names, the Name field is never set and is omitted from JSON. Loaded results have empty Name → falls back to "Unknown GPU" at `pages.go:2226, 2237`.
+
+This happens for:
+- Older result files saved before the `Name` field was added
+- Runs where nvidia-smi query failed before the benchmark started
+
+---
+
+## Fallback Strings — Current State
+
+| Location | File | Fallback string |
+|---|---|---|
+| Hardware summary (PCIe) | `pages.go:486` | `"Unknown GPU"` |
+| Benchmark report summary | `benchmark_report.go:43` | `"Unknown GPU"` |
+| Benchmark report scorecard | `benchmark_report.go:93` | `"Unknown"` ← inconsistent |
+| Benchmark report detail | `benchmark_report.go:122` | `"Unknown GPU"` |
+| Benchmark history per-GPU col | `pages.go:2226` | `"Unknown GPU"` |
+| Benchmark history parallel col | `pages.go:2237` | `"Unknown GPU"` |
+| SAT status file write | `sat.go:922` | `"unknown"` ← lowercase, inconsistent |
+| GPU selection API | `api.go:163` | `"GPU N"` (no "Unknown") |
+
+**Rule:** all UI fallbacks should use `"Unknown GPU"`. The two outliers are `benchmark_report.go:93` (`"Unknown"`) and `sat.go:922` (`"unknown"`).
+
+---
+
+## GPU Selection UI
+
+**File:** `audit/internal/webui/pages.go`
+
+- Source: `GET /api/gpus` → `api.go` → `ListNvidiaGPUs()` → live nvidia-smi
+- Render: `'GPU ' + gpu.index + ' — ' + gpu.name + ' · ' + mem`
+- Fallback: `gpu.name || 'GPU ' + idx` (JS, line ~1432)
+
+This always shows the correct model because it queries nvidia-smi live. It is **not** connected to benchmark result data.
+
+---
+
+## Data Flow Summary
+
+```
+nvidia-smi (live)
+  └─ ListNvidiaGPUs() → NvidiaGPU.Name
+       ├─ GPU selection UI (always correct)
+       ├─ Live metrics labels (charts_svg.go)
+       └─ SAT/burn status file (sat.go)
+
+nvidia-smi (at benchmark start)
+  └─ queryBenchmarkGPUInfo() → benchmarkGPUInfo.Name
+       └─ BenchmarkGPUResult.Name (json:"name,omitempty")
+            ├─ Benchmark report
+            └─ Benchmark history table columns
+
+nvidia-smi / lspci (audit collection)
+  └─ HardwarePCIeDevice.Model (NVIDIA: NOT populated; AMD: populated)
+       └─ Hardware summary page hwDescribeGPU()
+```
+
+---
+
+## What Needs Fixing
+
+1. **NVIDIA PCIe Model** — `enrichPCIeWithNVIDIAData()` should set `dev.Model = &gpu.Name`
+2. **Fallback consistency** — `benchmark_report.go:93` should say `"Unknown GPU"` not `"Unknown"`; `sat.go:922` should say `"Unknown GPU"` not `"unknown"`
+3. **Old benchmark JSONs** — no fix possible for already-saved results with missing names (display-only issue)
--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -19,7 +19,5 @@ ROCRAND_VERSION=3.2.0.60304-76~22.04
 HIP_RUNTIME_AMD_VERSION=6.3.42134.60304-76~22.04
 HIPBLASLT_VERSION=0.10.0.60304-76~22.04
 COMGR_VERSION=2.8.0.60304-76~22.04
-HPL_VERSION=2.3
-HPL_SHA256=32c5c17d22330e6f2337b681aded51637fb6008d3f0eb7c277b163fadd612830
 GO_VERSION=1.24.0
 AUDIT_VERSION=1.0.0
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -1121,6 +1121,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               int cc_minor,
                               int seconds,
                               int size_mb,
+                               const char *precision_filter,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
@@ -1159,7 +1160,8 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
    }

    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
-        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc) {
+        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc &&
+            (precision_filter == NULL || strcmp(k_profiles[i].block_label, precision_filter) == 0)) {
            planned++;
        }
    }
@@ -1218,6 +1220,13 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                          desc->min_cc);
            continue;
        }
+        if (precision_filter != NULL && strcmp(desc->block_label, precision_filter) != 0) {
+            append_detail(report->details,
+                          sizeof(report->details),
+                          "%s=SKIPPED precision_filter\n",
+                          desc->name);
+            continue;
+        }
        for (int lane = 0; lane < stream_count; lane++) {
            CUstream stream = streams[lane];
            if (prepared_count >= (int)(sizeof(prepared) / sizeof(prepared[0]))) {
@@ -1339,6 +1348,7 @@ int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
    int device_index = 0;
+    const char *precision_filter = NULL; /* NULL = all; else block_label to match */
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
@@ -1346,8 +1356,12 @@ int main(int argc, char **argv) {
            size_mb = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--device") == 0 || strcmp(argv[i], "-d") == 0) && i + 1 < argc) {
            device_index = atoi(argv[++i]);
+        } else if (strcmp(argv[i], "--precision") == 0 && i + 1 < argc) {
+            precision_filter = argv[++i];
        } else {
-            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N] [--device N]\n", argv[0]);
+            fprintf(stderr,
+                    "usage: %s [--seconds N] [--size-mb N] [--device N] [--precision fp8|fp16|fp32|fp64|fp4]\n",
+                    argv[0]);
            return 2;
        }
    }
@@ -1407,7 +1421,7 @@ int main(int argc, char **argv) {
    int ok = 0;

 #if HAVE_CUBLASLT_HEADERS
-    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report);
+    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, precision_filter, &report);
 #endif
    if (!ok) {
        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report)) {
--- a/iso/builder/build-hpl.sh
+++ b/iso/builder/build-hpl.sh
@@ -1,331 +0,0 @@
-#!/bin/sh
-# build-hpl.sh — build HPL (High Performance LINPACK) for the bee LiveCD.
-#
-# Downloads HPL 2.3 from netlib, downloads OpenBLAS runtime from the Debian 12
-# apt repo, and compiles xhpl using a minimal single-process MPI stub so that
-# no MPI package is required inside the ISO.
-#
-# The resulting xhpl binary is a standard HPL binary whose output is compatible
-# with the accepted HPL format (WR... Gflops lines).
-#
-# Output:
-#   $CACHE_DIR/bin/xhpl
-#   $CACHE_DIR/lib/libopenblas.so*   (runtime, injected into ISO /usr/lib/)
-
-set -e
-
-HPL_VERSION="$1"
-HPL_SHA256="$2"
-DIST_DIR="$3"
-
-[ -n "$HPL_VERSION" ] || { echo "usage: $0 <hpl-version> <sha256> <dist-dir>"; exit 1; }
-[ -n "$HPL_SHA256"  ] || { echo "usage: $0 <hpl-version> <sha256> <dist-dir>"; exit 1; }
-[ -n "$DIST_DIR"    ] || { echo "usage: $0 <hpl-version> <sha256> <dist-dir>"; exit 1; }
-
-echo "=== HPL ${HPL_VERSION} ==="
-
-CACHE_DIR="${DIST_DIR}/hpl-${HPL_VERSION}"
-CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
-DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/hpl-downloads"
-
-if [ -x "${CACHE_DIR}/bin/xhpl" ]; then
-    echo "=== HPL cached, skipping build ==="
-    echo "binary: ${CACHE_DIR}/bin/xhpl"
-    exit 0
-fi
-
-mkdir -p "${DOWNLOAD_CACHE_DIR}" "${CACHE_DIR}/bin" "${CACHE_DIR}/lib"
-
-# ── download HPL source ────────────────────────────────────────────────────────
-HPL_TAR="${DOWNLOAD_CACHE_DIR}/hpl-${HPL_VERSION}.tar.gz"
-DEFAULT_HPL_URLS="
-https://www.netlib.org/benchmark/hpl/hpl-${HPL_VERSION}.tar.gz
-https://fossies.org/linux/privat/hpl-${HPL_VERSION}.tar.gz
-"
-HPL_GIT_URL="${HPL_GIT_URL:-https://github.com/icl-utk-edu/hpl.git}"
-DEFAULT_HPL_GIT_REFS="v${HPL_VERSION} ${HPL_VERSION} main"
-HPL_SOURCE_MODE="tarball"
-
-download_to_file() {
-    url="$1"
-    out="$2"
-
-    if command -v curl >/dev/null 2>&1; then
-        curl -fL \
-            --connect-timeout 15 \
-            --max-time 180 \
-            --retry 2 \
-            --retry-delay 2 \
-            --output "${out}" \
-            "${url}"
-        return $?
-    fi
-
-    wget \
-        --show-progress \
-        --tries=2 \
-        --timeout=30 \
-        -O "${out}" \
-        "${url}"
-}
-
-download_hpl_tarball() {
-    out="$1"
-    tmp="${out}.part"
-    urls="${HPL_URLS:-$DEFAULT_HPL_URLS}"
-
-    rm -f "${tmp}"
-    for url in ${urls}; do
-        [ -n "${url}" ] || continue
-        echo "=== trying HPL source: ${url} ==="
-        if download_to_file "${url}" "${tmp}"; then
-            mv "${tmp}" "${out}"
-            return 0
-        fi
-        rm -f "${tmp}"
-        echo "=== failed: ${url} ==="
-    done
-
-    echo "ERROR: failed to download HPL ${HPL_VERSION} from all configured URLs" >&2
-    return 1
-}
-
-download_hpl_from_git_archive() {
-    out="$1"
-    refs="${HPL_GIT_REFS:-$DEFAULT_HPL_GIT_REFS}"
-    tmp_root="$(mktemp -d)"
-    repo_dir="${tmp_root}/repo"
-    archive_dir="${tmp_root}/hpl-${HPL_VERSION}"
-    archive_tmp="${out}.part"
-
-    for ref in ${refs}; do
-        [ -n "${ref}" ] || continue
-        echo "=== trying HPL git source: ${HPL_GIT_URL} ref ${ref} ==="
-        rm -rf "${repo_dir}" "${archive_dir}" "${archive_tmp}"
-        if git clone --depth 1 --branch "${ref}" "${HPL_GIT_URL}" "${repo_dir}"; then
-            mv "${repo_dir}" "${archive_dir}"
-            tar czf "${archive_tmp}" -C "${tmp_root}" "hpl-${HPL_VERSION}"
-            mv "${archive_tmp}" "${out}"
-            rm -rf "${tmp_root}"
-            HPL_SOURCE_MODE="git"
-            return 0
-        fi
-        echo "=== failed git ref: ${ref} ==="
-    done
-
-    rm -rf "${tmp_root}" "${archive_tmp}"
-    echo "ERROR: failed to obtain HPL ${HPL_VERSION} from all configured sources" >&2
-    echo "  looked for cache: ${out}" >&2
-    echo "  tarball mirrors: ${HPL_URLS:-$DEFAULT_HPL_URLS}" >&2
-    echo "  git fallback: ${HPL_GIT_URL} refs ${refs}" >&2
-    echo "  override mirrors with HPL_URLS=\"https://mirror1/...\"" >&2
-    echo "  override git refs with HPL_GIT_REFS=\"v${HPL_VERSION} ${HPL_VERSION} main\"" >&2
-    return 1
-}
-
-if [ ! -f "${HPL_TAR}" ]; then
-    echo "=== downloading HPL ${HPL_VERSION} ==="
-    download_hpl_tarball "${HPL_TAR}" || download_hpl_from_git_archive "${HPL_TAR}"
-fi
-
-if [ "${HPL_SOURCE_MODE}" = "tarball" ]; then
-    actual_sha="$(sha256sum "${HPL_TAR}" | awk '{print $1}')"
-    if [ "${actual_sha}" != "${HPL_SHA256}" ]; then
-        echo "ERROR: sha256 mismatch for hpl-${HPL_VERSION}.tar.gz" >&2
-        echo "  expected: ${HPL_SHA256}" >&2
-        echo "  actual:   ${actual_sha}" >&2
-        rm -f "${HPL_TAR}"
-        exit 1
-    fi
-    echo "sha256 OK: hpl-${HPL_VERSION}.tar.gz"
-else
-    echo "=== HPL source obtained from git fallback; skipping tarball sha256 check ==="
-fi
-
-# ── download OpenBLAS from Debian 12 apt repo ─────────────────────────────────
-REPO_BASE="https://deb.debian.org/debian/pool/main/o/openblas"
-PACKAGES_GZ="${DOWNLOAD_CACHE_DIR}/Packages.gz"
-OPENBLAS_PKG="libopenblas0-openmp"
-
-echo "=== fetching Debian 12 Packages.gz ==="
-wget -q -O "${PACKAGES_GZ}" \
-    "https://deb.debian.org/debian/dists/bookworm/main/binary-amd64/Packages.gz"
-
-lookup_deb() {
-    pkg="$1"
-    gzip -dc "${PACKAGES_GZ}" | awk -v pkg="$pkg" '
-        /^Package: / { cur=$2 }
-        /^Filename: / { file=$2 }
-        /^SHA256: /  { sha=$2 }
-        /^$/ {
-            if (cur == pkg) { print file " " sha; exit }
-            cur=""; file=""; sha=""
-        }
-        END {
-            if (cur == pkg) print file " " sha
-        }'
-}
-
-meta="$(lookup_deb "${OPENBLAS_PKG}")"
-[ -n "$meta" ] || { echo "ERROR: ${OPENBLAS_PKG} not found in Packages.gz"; exit 1; }
-repo_file="$(printf '%s' "$meta" | awk '{print $1}')"
-repo_sha="$(printf '%s'  "$meta" | awk '{print $2}')"
-
-OPENBLAS_DEB="${DOWNLOAD_CACHE_DIR}/$(basename "${repo_file}")"
-if [ -f "${OPENBLAS_DEB}" ]; then
-    actual="$(sha256sum "${OPENBLAS_DEB}" | awk '{print $1}')"
-    [ "$actual" = "$repo_sha" ] || rm -f "${OPENBLAS_DEB}"
-fi
-if [ ! -f "${OPENBLAS_DEB}" ]; then
-    echo "=== downloading ${OPENBLAS_PKG} ==="
-    wget --show-progress -O "${OPENBLAS_DEB}" "https://deb.debian.org/debian/${repo_file}"
-    actual="$(sha256sum "${OPENBLAS_DEB}" | awk '{print $1}')"
-    [ "$actual" = "$repo_sha" ] || { echo "ERROR: sha256 mismatch for ${OPENBLAS_PKG}"; rm -f "${OPENBLAS_DEB}"; exit 1; }
-fi
-
-# extract libopenblas shared libs
-TMP_DEB=$(mktemp -d)
-trap 'rm -rf "${TMP_DEB}" "${BUILD_TMP:-}"' EXIT INT TERM
-(
-    cd "${TMP_DEB}"
-    ar x "${OPENBLAS_DEB}"
-    tar xf data.tar.*
-)
-find "${TMP_DEB}" \( -name 'libopenblas*.so*' \) \( -type f -o -type l \) \
-    -exec cp -a {} "${CACHE_DIR}/lib/" \;
-echo "=== OpenBLAS libs: $(ls "${CACHE_DIR}/lib/" | wc -l) files ==="
-
-# also need libopenblas-dev header for compilation (we only need the .so symlink)
-OPENBLAS_SO="$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libopenblas.so.*' -type f | sort | head -1)"
-[ -n "${OPENBLAS_SO}" ] || { echo "ERROR: libopenblas.so not extracted"; exit 1; }
-SONAME="$(basename "${OPENBLAS_SO}")"
-ln -sf "${SONAME}" "${CACHE_DIR}/lib/libopenblas.so" 2>/dev/null || true
-ln -sf "${SONAME}" "${CACHE_DIR}/lib/libblas.so" 2>/dev/null || true
-
-# ── build HPL ─────────────────────────────────────────────────────────────────
-BUILD_TMP=$(mktemp -d)
-
-cd "${BUILD_TMP}"
-tar xf "${HPL_TAR}"
-SRC_DIR="$(find . -maxdepth 1 -type d -name 'hpl-*' | head -1)"
-[ -n "${SRC_DIR}" ] || { echo "ERROR: HPL source dir not found"; exit 1; }
-cd "${SRC_DIR}"
-
-# Write a minimal single-process MPI stub so we don't need an MPI package.
-# HPL only needs these functions for single-process execution.
-cat > "${BUILD_TMP}/mpi_stub.c" <<'MPISTUB'
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-
-typedef int MPI_Comm;
-typedef int MPI_Datatype;
-typedef int MPI_Op;
-typedef int MPI_Status;
-typedef int MPI_Request;
-
-#define MPI_COMM_WORLD 0
-#define MPI_SUCCESS    0
-#define MPI_DOUBLE     6
-#define MPI_INT        5
-#define MPI_SUM        0
-#define MPI_MAX        1
-#define MPI_MIN        2
-#define MPI_BYTE       1
-#define MPI_ANY_SOURCE -1
-#define MPI_ANY_TAG    -1
-#define MPI_STATUS_IGNORE ((MPI_Status*)0)
-
-int MPI_Init(int *argc, char ***argv)          { (void)argc; (void)argv; return MPI_SUCCESS; }
-int MPI_Finalize(void)                          { return MPI_SUCCESS; }
-int MPI_Comm_rank(MPI_Comm c, int *rank)        { (void)c; *rank = 0; return MPI_SUCCESS; }
-int MPI_Comm_size(MPI_Comm c, int *size)        { (void)c; *size = 1; return MPI_SUCCESS; }
-int MPI_Bcast(void *b, int n, MPI_Datatype t, int r, MPI_Comm c)
-    { (void)b;(void)n;(void)t;(void)r;(void)c; return MPI_SUCCESS; }
-int MPI_Reduce(const void *s, void *r, int n, MPI_Datatype t, MPI_Op op, int root, MPI_Comm c) {
-    (void)op;(void)root;(void)c;
-    size_t sz = (t==MPI_DOUBLE)?sizeof(double):(t==MPI_INT)?sizeof(int):1;
-    memcpy(r, s, (size_t)n * sz);
-    return MPI_SUCCESS;
-}
-int MPI_Allreduce(const void *s, void *r, int n, MPI_Datatype t, MPI_Op op, MPI_Comm c)
-    { return MPI_Reduce(s,r,n,t,op,0,c); }
-int MPI_Send(const void *b, int n, MPI_Datatype t, int d, int tag, MPI_Comm c)
-    { (void)b;(void)n;(void)t;(void)d;(void)tag;(void)c; return MPI_SUCCESS; }
-int MPI_Recv(void *b, int n, MPI_Datatype t, int s, int tag, MPI_Comm c, MPI_Status *st)
-    { (void)b;(void)n;(void)t;(void)s;(void)tag;(void)c;(void)st; return MPI_SUCCESS; }
-int MPI_Sendrecv(const void *sb, int sn, MPI_Datatype st2, int dest, int stag,
-                 void *rb, int rn, MPI_Datatype rt, int src, int rtag,
-                 MPI_Comm c, MPI_Status *status)
-    { (void)sb;(void)sn;(void)st2;(void)dest;(void)stag;
-      (void)rb;(void)rn;(void)rt;(void)src;(void)rtag;(void)c;(void)status;
-      return MPI_SUCCESS; }
-int MPI_Irecv(void *b, int n, MPI_Datatype t, int s, int tag, MPI_Comm c, MPI_Request *req)
-    { (void)b;(void)n;(void)t;(void)s;(void)tag;(void)c;(void)req; return MPI_SUCCESS; }
-int MPI_Wait(MPI_Request *req, MPI_Status *st)
-    { (void)req;(void)st; return MPI_SUCCESS; }
-int MPI_Abort(MPI_Comm c, int code) { (void)c; exit(code); }
-double MPI_Wtime(void) {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    return (double)tv.tv_sec + (double)tv.tv_usec * 1e-6;
-}
-MPISTUB
-
-# Write Make.bee — HPL makefile configuration
-cat > Make.bee <<MAKEFILE
-SHELL        = /bin/sh
-CD           = cd
-CP           = cp
-LN_S         = ln -s
-MKDIR        = mkdir -p
-RM           = /bin/rm -f
-TOUCH        = touch
-ARCH         = bee
-
-# Directories
-TOPdir       = \$(shell pwd)
-INCdir       = \$(TOPdir)/include
-BINdir       = \$(TOPdir)/bin/\$(ARCH)
-LIBdir       = \$(TOPdir)/lib/\$(ARCH)
-HPLlib       = \$(LIBdir)/libhpl.a
-
-# Compiler
-CC           = gcc
-CCNOOPT      = \$(HPL_DEFS)
-CCFLAGS      = \$(HPL_DEFS) -O3 -march=native -funroll-loops -fomit-frame-pointer
-
-# Linker
-LINKER       = gcc
-LINKFLAGS    = \$(CCFLAGS)
-
-# MPI (single-process stub — no actual MPI needed)
-MPdir        =
-MPinc        = -I${BUILD_TMP}
-MPlib        = ${BUILD_TMP}/mpi_stub.o
-
-# BLAS (OpenBLAS)
-LAdir        = ${CACHE_DIR}/lib
-LAinc        =
-LAlib        = -L\$(LAdir) -Wl,-rpath,/usr/lib -lopenblas
-
-HPL_OPTS     =
-HPL_DEFS     = \$(HPL_OPTS) -DHPL_CALL_CBLAS
-MAKEFILE
-echo "=== Make.bee written ==="
-
-# compile MPI stub
-gcc -O2 -c -o "${BUILD_TMP}/mpi_stub.o" "${BUILD_TMP}/mpi_stub.c"
-
-# build HPL
-echo "=== building HPL ${HPL_VERSION} ==="
-make -j"$(nproc)" arch=bee 2>&1 | tail -20
-
-XHPL_BIN="bin/bee/xhpl"
-[ -x "${XHPL_BIN}" ] || { echo "ERROR: xhpl not found after build"; exit 1; }
-
-cp "${XHPL_BIN}" "${CACHE_DIR}/bin/xhpl"
-chmod +x "${CACHE_DIR}/bin/xhpl"
-echo "=== HPL build complete ==="
-echo "binary: ${CACHE_DIR}/bin/xhpl"
-echo "libs:   $(ls "${CACHE_DIR}/lib/")"
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -1148,19 +1148,6 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    echo "=== john injected ==="
 fi

-# --- build HPL (CPU LINPACK) — runs on all variants ---
-run_step "build HPL ${HPL_VERSION}" "80-hpl" \
-    sh "${BUILDER_DIR}/build-hpl.sh" "${HPL_VERSION}" "${HPL_SHA256}" "${DIST_DIR}"
-
-HPL_CACHE="${DIST_DIR}/hpl-${HPL_VERSION}"
-mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/lib/bee"
-cp "${HPL_CACHE}/bin/xhpl" "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/xhpl"
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/xhpl"
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-hpl" 2>/dev/null || true
-# Inject OpenBLAS runtime libs needed by xhpl
-cp "${HPL_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
-echo "=== HPL injected: xhpl + $(ls "${HPL_CACHE}/lib/" | wc -l) OpenBLAS libs ==="
-
 # --- embed build metadata ---
 mkdir -p "${OVERLAY_STAGE_DIR}/etc"
 BUILD_DATE="$(date +%Y-%m-%d)"
@@ -1193,7 +1180,6 @@ BUILD_DATE=${BUILD_DATE}
 GIT_COMMIT=${GIT_COMMIT}
 DEBIAN_VERSION=${DEBIAN_VERSION}
 DEBIAN_KERNEL_ABI=${DEBIAN_KERNEL_ABI}
-HPL_VERSION=${HPL_VERSION}
 ${GPU_VERSION_LINE}
 EOF

--- a/iso/builder/config/bootloaders/grub-pc/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/grub.cfg
@@ -11,18 +11,18 @@ echo "  Hardware Audit LiveCD"
 echo ""

 menuentry "EASY-BEE" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }

 submenu "EASY-BEE (advanced options) -->" {
    menuentry "EASY-BEE — GSP=off" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
        initrd  @INITRD_LIVE@
    }

    menuentry "EASY-BEE — KMS (no nomodeset)" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+        linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
        initrd  @INITRD_LIVE@
    }

--- a/iso/builder/config/bootloaders/grub-pc/theme.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/theme.cfg
@@ -1,9 +1,9 @@
 set color_normal=light-gray/black
-set color_highlight=white/dark-gray
+set color_highlight=yellow/black

 if [ -e /boot/grub/splash.png ]; then
    set theme=/boot/grub/live-theme/theme.txt
 else
-    set menu_color_normal=cyan/black
-    set menu_color_highlight=white/dark-gray
+    set menu_color_normal=yellow/black
+    set menu_color_highlight=white/brown
 fi
--- a/iso/builder/config/bootloaders/isolinux/live.cfg.in
+++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in
@@ -3,31 +3,31 @@ label live-@FLAVOUR@-normal
    menu default
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.nvidia.mode=normal
+    append @APPEND_LIVE@ bee.nvidia.mode=normal pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-kms
    menu label EASY-BEE (^graphics/KMS)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal
+    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-toram
    menu label EASY-BEE (^load to RAM)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ toram bee.nvidia.mode=normal
+    append @APPEND_LIVE@ toram bee.nvidia.mode=normal pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-gsp-off
    menu label EASY-BEE (^NVIDIA GSP=off)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-kms-gsp-off
    menu label EASY-BEE (g^raphics/KMS, GSP=off)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=gsp-off pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1

 label live-@FLAVOUR@-failsafe
    menu label EASY-BEE (^fail-safe)
--- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
+++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
@@ -25,6 +25,7 @@ ensure_bee_console_user() {
 ensure_bee_console_user

 # Enable common bee services
+systemctl enable bee-hpc-tuning.service
 systemctl enable bee-network.service
 systemctl enable bee-preflight.service
 systemctl enable bee-audit.service
@@ -55,6 +56,7 @@ fi
 # nogpu: no GPU services needed

 # Ensure scripts are executable
+chmod +x /usr/local/bin/bee-hpc-tuning  2>/dev/null || true
 chmod +x /usr/local/bin/bee-network.sh  2>/dev/null || true
 chmod +x /usr/local/bin/bee-sshsetup   2>/dev/null || true
 chmod +x /usr/local/bin/bee-smoketest  2>/dev/null || true
--- a/iso/builder/config/hooks/normal/9001-wallpaper.hook.chroot
+++ b/iso/builder/config/hooks/normal/9001-wallpaper.hook.chroot
@@ -82,16 +82,22 @@ glow_draw.ellipse((520, 340, 1400, 760), fill=(255, 190, 40, 36))
 glow = glow.filter(ImageFilter.GaussianBlur(60))
 img = Image.alpha_composite(img.convert('RGBA'), glow)

-font_logo = load_font(MONO_FONT_CANDIDATES, 64)
+TARGET_LOGO_W = 400
+max_chars = max(len(line) for line in ASCII_ART)
+_probe_font = load_font(MONO_FONT_CANDIDATES, 64)
+_probe_cw, _ = mono_metrics(_probe_font)
+font_size_logo = max(6, int(64 * TARGET_LOGO_W / (_probe_cw * max_chars)))
+font_logo = load_font(MONO_FONT_CANDIDATES, font_size_logo)
 char_w, char_h = mono_metrics(font_logo)
-logo_mask = render_ascii_mask(font_logo, ASCII_ART, char_w, char_h, 8)
+logo_mask = render_ascii_mask(font_logo, ASCII_ART, char_w, char_h, 2)
 logo_w, logo_h = logo_mask.size
 logo_x = (W - logo_w) // 2
-logo_y = 270
+logo_y = 380

-shadow_mask = logo_mask.filter(ImageFilter.GaussianBlur(2))
-img.paste(SHADOW, (logo_x + 16, logo_y + 14), shadow_mask)
-img.paste(FG_DIM, (logo_x + 8, logo_y + 7), logo_mask)
+sh_off = max(1, font_size_logo // 6)
+shadow_mask = logo_mask.filter(ImageFilter.GaussianBlur(1))
+img.paste(SHADOW, (logo_x + sh_off * 2, logo_y + sh_off * 2), shadow_mask)
+img.paste(FG_DIM, (logo_x + sh_off, logo_y + sh_off), logo_mask)
 img.paste(FG, (logo_x, logo_y), logo_mask)

 font_sub = load_font(SUB_FONT_CANDIDATES, 30)
--- a/iso/overlay/etc/systemd/system/bee-hpc-tuning.service
+++ b/iso/overlay/etc/systemd/system/bee-hpc-tuning.service
@@ -0,0 +1,14 @@
+[Unit]
+Description=Bee: HPC tuning (CPU governor, C-states)
+After=local-fs.target
+Before=bee-nvidia.service bee-audit.service
+
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-hpc-tuning.log /usr/local/bin/bee-hpc-tuning
+StandardOutput=journal
+StandardError=journal
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
--- a/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered
+++ b/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered
@@ -0,0 +1,110 @@
+#!/bin/sh
+set -eu
+
+SECONDS=300
+STAGGER_SECONDS=180
+DEVICES=""
+EXCLUDE=""
+
+usage() {
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--devices 0,1] [--exclude 2,3]" >&2
+    exit 2
+}
+
+normalize_list() {
+    echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, -
+}
+
+contains_csv() {
+    needle="$1"
+    haystack="${2:-}"
+    echo ",${haystack}," | grep -q ",${needle},"
+}
+
+resolve_dcgmproftester() {
+    for candidate in dcgmproftester dcgmproftester13 dcgmproftester12 dcgmproftester11; do
+        if command -v "${candidate}" >/dev/null 2>&1; then
+            command -v "${candidate}"
+            return 0
+        fi
+    done
+    return 1
+}
+
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+        --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
+        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
+        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        *) usage ;;
+    esac
+done
+
+PROF=$(resolve_dcgmproftester) || { echo "dcgmproftester not found in PATH" >&2; exit 1; }
+ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -)
+[ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; }
+
+DEVICES=$(normalize_list "${DEVICES}")
+EXCLUDE=$(normalize_list "${EXCLUDE}")
+SELECTED="${DEVICES}"
+if [ -z "${SELECTED}" ]; then
+    SELECTED="${ALL_DEVICES}"
+fi
+
+FINAL=""
+for id in $(echo "${SELECTED}" | tr ',' ' '); do
+    [ -n "${id}" ] || continue
+    if contains_csv "${id}" "${EXCLUDE}"; then
+        continue
+    fi
+    if [ -z "${FINAL}" ]; then
+        FINAL="${id}"
+    else
+        FINAL="${FINAL},${id}"
+    fi
+done
+
+[ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; }
+
+echo "loader=dcgmproftester-staggered"
+echo "selected_gpus=${FINAL}"
+echo "stagger_seconds=${STAGGER_SECONDS}"
+
+TMP_DIR=$(mktemp -d)
+trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM
+
+GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
+gpu_pos=0
+WORKERS=""
+for id in $(echo "${FINAL}" | tr ',' ' '); do
+    gpu_pos=$((gpu_pos + 1))
+    log="${TMP_DIR}/gpu-${id}.log"
+    extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
+    gpu_seconds=$(( SECONDS + extra_sec ))
+    echo "starting gpu ${id} seconds=${gpu_seconds}"
+    CUDA_VISIBLE_DEVICES="${id}" "${PROF}" --no-dcgm-validation -t 1004 -d "${gpu_seconds}" >"${log}" 2>&1 &
+    pid=$!
+    WORKERS="${WORKERS} ${pid}:${id}:${log}"
+    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
+        sleep "${STAGGER_SECONDS}"
+    fi
+done
+
+status=0
+for spec in ${WORKERS}; do
+    pid=${spec%%:*}
+    rest=${spec#*:}
+    id=${rest%%:*}
+    log=${rest#*:}
+    if wait "${pid}"; then
+        echo "gpu ${id} finished: OK"
+    else
+        rc=$?
+        echo "gpu ${id} finished: FAILED rc=${rc}"
+        status=1
+    fi
+    sed "s/^/[gpu ${id}] /" "${log}" || true
+done
+
+exit "${status}"
--- a/iso/overlay/usr/local/bin/bee-gpu-burn
+++ b/iso/overlay/usr/local/bin/bee-gpu-burn
@@ -2,13 +2,15 @@
 set -eu

 SECONDS=5
+STAGGER_SECONDS=0
 SIZE_MB=0
 DEVICES=""
 EXCLUDE=""
+PRECISION=""
 WORKER="/usr/local/lib/bee/bee-gpu-burn-worker"

 usage() {
-    echo "usage: $0 [--seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3] [--precision fp8|fp16|fp32|fp64|fp4]" >&2
    exit 2
 }

@@ -25,9 +27,11 @@ contains_csv() {
 while [ "$#" -gt 0 ]; do
    case "$1" in
        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+        --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
        --size-mb|-m) [ "$#" -ge 2 ] || usage; SIZE_MB="$2"; shift 2 ;;
        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        --precision) [ "$#" -ge 2 ] || usage; PRECISION="$2"; shift 2 ;;
        *) usage ;;
    esac
 done
@@ -61,14 +65,18 @@ done

 echo "loader=bee-gpu-burn"
 echo "selected_gpus=${FINAL}"
+echo "stagger_seconds=${STAGGER_SECONDS}"

 export CUDA_DEVICE_ORDER="PCI_BUS_ID"

 TMP_DIR=$(mktemp -d)
 trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM

+GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
+gpu_pos=0
 WORKERS=""
 for id in $(echo "${FINAL}" | tr ',' ' '); do
+    gpu_pos=$((gpu_pos + 1))
    log="${TMP_DIR}/gpu-${id}.log"
    gpu_size_mb="${SIZE_MB}"
    if [ "${gpu_size_mb}" -le 0 ] 2>/dev/null; then
@@ -79,11 +87,18 @@ for id in $(echo "${FINAL}" | tr ',' ' '); do
            gpu_size_mb=512
        fi
    fi
-    echo "starting gpu ${id} size=${gpu_size_mb}MB"
+    extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
+    gpu_seconds=$(( SECONDS + extra_sec ))
+    echo "starting gpu ${id} size=${gpu_size_mb}MB seconds=${gpu_seconds}"
+    precision_arg=""
+    [ -n "${PRECISION}" ] && precision_arg="--precision ${PRECISION}"
    CUDA_VISIBLE_DEVICES="${id}" \
-        "${WORKER}" --device 0 --seconds "${SECONDS}" --size-mb "${gpu_size_mb}" >"${log}" 2>&1 &
+        "${WORKER}" --device 0 --seconds "${gpu_seconds}" --size-mb "${gpu_size_mb}" ${precision_arg} >"${log}" 2>&1 &
    pid=$!
    WORKERS="${WORKERS} ${pid}:${id}:${log}"
+    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
+        sleep "${STAGGER_SECONDS}"
+    fi
 done

 status=0
--- a/iso/overlay/usr/local/bin/bee-hpc-tuning
+++ b/iso/overlay/usr/local/bin/bee-hpc-tuning
@@ -0,0 +1,41 @@
+#!/bin/sh
+# bee-hpc-tuning — apply HPC tuning for deterministic benchmarking
+# Called by bee-hpc-tuning.service at boot.
+
+log() { echo "[bee-hpc-tuning] $*"; }
+
+# ── CPU governor ────────────────────────────────────────────────────────────
+# Set all CPU cores to performance governor via sysfs.
+# cpupower is not available; write directly to scaling_governor.
+governor_ok=0
+governor_fail=0
+for gov_path in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
+    [ -f "$gov_path" ] || continue
+    if echo performance > "$gov_path" 2>/dev/null; then
+        governor_ok=$((governor_ok + 1))
+    else
+        governor_fail=$((governor_fail + 1))
+    fi
+done
+
+if [ "$governor_ok" -gt 0 ] && [ "$governor_fail" -eq 0 ]; then
+    log "CPU governor set to performance on ${governor_ok} core(s)"
+elif [ "$governor_ok" -gt 0 ]; then
+    log "WARN: CPU governor: ${governor_ok} OK, ${governor_fail} failed"
+elif [ "$governor_fail" -gt 0 ]; then
+    log "WARN: failed to set CPU governor on ${governor_fail} core(s)"
+else
+    log "WARN: no cpufreq scaling_governor paths found (C-state governor or HW-controlled)"
+fi
+
+# ── Transparent Huge Pages ───────────────────────────────────────────────────
+# Kernel cmdline sets transparent_hugepage=always at boot, but confirm and log.
+thp_path=/sys/kernel/mm/transparent_hugepage/enabled
+if [ -f "$thp_path" ]; then
+    current=$(cat "$thp_path" 2>/dev/null)
+    log "transparent_hugepage: ${current}"
+else
+    log "WARN: transparent_hugepage sysfs path not found"
+fi
+
+log "done"
--- a/iso/overlay/usr/local/bin/bee-hpl
+++ b/iso/overlay/usr/local/bin/bee-hpl
@@ -1,97 +0,0 @@
-#!/bin/sh
-# bee-hpl — run HPL (High Performance LINPACK) with auto-sized problem.
-#
-# Generates HPL.dat based on available RAM, runs xhpl, and prints standard
-# HPL output. The WR... line with Gflops is parsed by the bee audit tool.
-#
-# Usage: bee-hpl [--mem-fraction 0.80] [--nb 256] [--seconds N]
-#
-# --mem-fraction   fraction of total RAM to use for the matrix (default 0.80)
-# --nb             block size; 256 is good for modern CPUs (default 256)
-# --seconds        ignored — HPL runtime is determined by problem size; kept
-#                  for interface compatibility with other bee stress tools
-
-set -eu
-
-XHPL="/usr/local/lib/bee/xhpl"
-MEM_FRACTION="0.80"
-NB=256
-
-usage() {
-    echo "usage: $0 [--mem-fraction 0.80] [--nb 256] [--seconds N]" >&2
-    exit 2
-}
-
-while [ "$#" -gt 0 ]; do
-    case "$1" in
-        --mem-fraction) [ "$#" -ge 2 ] || usage; MEM_FRACTION="$2"; shift 2 ;;
-        --nb)           [ "$#" -ge 2 ] || usage; NB="$2"; shift 2 ;;
-        --seconds)      [ "$#" -ge 2 ] || usage; shift 2 ;;  # accepted, ignored
-        *) usage ;;
-    esac
-done
-
-[ -x "${XHPL}" ] || { echo "ERROR: xhpl not found at ${XHPL}" >&2; exit 1; }
-
-# Detect total RAM in bytes
-TOTAL_KB=$(grep MemTotal /proc/meminfo | awk '{print $2}')
-[ -n "${TOTAL_KB}" ] || { echo "ERROR: cannot read MemTotal from /proc/meminfo" >&2; exit 1; }
-TOTAL_BYTES=$(( TOTAL_KB * 1024 ))
-
-# N = floor(sqrt(fraction * total_bytes / 8)) rounded down to multiple of NB
-# Use awk for floating-point sqrt
-N=$(awk -v total="${TOTAL_BYTES}" -v frac="${MEM_FRACTION}" -v nb="${NB}" '
-BEGIN {
-    raw = int(sqrt(total * frac / 8.0))
-    n   = int(raw / nb) * nb
-    if (n < nb) n = nb
-    print n
-}')
-
-echo "loader=bee-hpl"
-echo "total_ram_mb=$(( TOTAL_KB / 1024 ))"
-echo "matrix_n=${N}"
-echo "block_nb=${NB}"
-echo "mem_fraction=${MEM_FRACTION}"
-
-# Generate HPL.dat in a temp directory and run from there
-RUNDIR=$(mktemp -d)
-trap 'rm -rf "${RUNDIR}"' EXIT INT TERM
-
-cat > "${RUNDIR}/HPL.dat" <<DAT
-HPLinpack benchmark input file
-Innovative Computing Laboratory, University of Tennessee
-HPL.out        output file name (if any)
-6              device out (6=stdout, 7=stderr, file)
-1              # of problems sizes (N)
-${N}           Ns
-1              # of NBs
-${NB}          NBs
-0              PMAP process mapping (0=Row-,1=Column-major)
-1              # of process grids (P x Q)
-1              Ps
-1              Qs
-16.0           threshold
-1              # of panel fact
-2              PFACTs (0=left, 1=Crout, 2=Right)
-1              # of recursive stopping criterium
-4              NBMINs (>= 1)
-1              # of panels in recursion
-2              NDIVs
-1              # of recursive panel fact.
-1              RFACTs (0=left, 1=Crout, 2=Right)
-1              # of broadcast
-1              BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
-1              # of lookahead depth
-1              DEPTHs (>=0)
-2              SWAP (0=bin-exch,1=long,2=mix)
-64             swapping threshold
-0              L1 in (0=transposed,1=no-transposed) form
-0              U  in (0=transposed,1=no-transposed) form
-1              Equilibration (0=no,1=yes)
-8              memory alignment in double (> 0)
-DAT
-
-cd "${RUNDIR}"
-echo "---"
-"${XHPL}"
--- a/iso/overlay/usr/local/bin/bee-john-gpu-stress
+++ b/iso/overlay/usr/local/bin/bee-john-gpu-stress
@@ -2,6 +2,7 @@
 set -eu

 DURATION_SEC=300
+STAGGER_SECONDS=0
 DEVICES=""
 EXCLUDE=""
 FORMAT=""
@@ -12,7 +13,7 @@ export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
 export LD_LIBRARY_PATH="/usr/lib:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"

 usage() {
-    echo "usage: $0 [--seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
    exit 2
 }

@@ -118,6 +119,7 @@ ensure_opencl_ready() {
 while [ "$#" -gt 0 ]; do
    case "$1" in
        --seconds|-t) [ "$#" -ge 2 ] || usage; DURATION_SEC="$2"; shift 2 ;;
+        --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
        --format) [ "$#" -ge 2 ] || usage; FORMAT="$2"; shift 2 ;;
@@ -170,6 +172,7 @@ done
 echo "loader=john"
 echo "selected_gpus=${FINAL}"
 echo "john_devices=${JOHN_DEVICES}"
+echo "stagger_seconds=${STAGGER_SECONDS}"

 cd "${JOHN_DIR}"

@@ -232,14 +235,21 @@ trap cleanup EXIT INT TERM
 echo "format=${CHOSEN_FORMAT}"
 echo "target_seconds=${DURATION_SEC}"
 echo "slice_seconds=${TEST_SLICE_SECONDS}"
-DEADLINE=$(( $(date +%s) + DURATION_SEC ))
+TOTAL_DEVICES=$(echo "${JOHN_DEVICES}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
 _first=1
+pos=0
 for opencl_id in $(echo "${JOHN_DEVICES}" | tr ',' ' '); do
+    pos=$((pos + 1))
    [ "${_first}" = "1" ] || sleep 3
    _first=0
-    run_john_loop "${opencl_id}" "${DEADLINE}" &
+    extra_sec=$(( STAGGER_SECONDS * (TOTAL_DEVICES - pos) ))
+    deadline=$(( $(date +%s) + DURATION_SEC + extra_sec ))
+    run_john_loop "${opencl_id}" "${deadline}" &
    pid=$!
    PIDS="${PIDS} ${pid}"
+    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${pos}" -lt "${TOTAL_DEVICES}" ]; then
+        sleep "${STAGGER_SECONDS}"
+    fi
 done
 FAIL=0
 for pid in ${PIDS}; do
--- a/iso/overlay/usr/local/bin/bee-nvidia-load
+++ b/iso/overlay/usr/local/bin/bee-nvidia-load
@@ -21,8 +21,13 @@ read_nvidia_modules_flavor() {

 log "kernel: $(uname -r)"

-# Skip if no NVIDIA GPU present (PCI vendor 10de)
-if ! lspci -nn 2>/dev/null | grep -qi '10de:'; then
+# Skip if no NVIDIA display/compute GPU is present.
+# Match only display-class PCI functions (0300 VGA, 0302 3D controller) from vendor 10de.
+have_nvidia_gpu() {
+    lspci -Dn 2>/dev/null | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ { found=1; exit } END { exit(found ? 0 : 1) }'
+}
+
+if ! have_nvidia_gpu; then
    log "no NVIDIA GPU detected — skipping module load"
    exit 0
 fi
--- a/iso/overlay/usr/local/bin/bee-selfheal
+++ b/iso/overlay/usr/local/bin/bee-selfheal
@@ -14,7 +14,7 @@ log() {
 }

 have_nvidia_gpu() {
-    lspci -nn 2>/dev/null | grep -qi '10de:'
+    lspci -Dn 2>/dev/null | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ { found=1; exit } END { exit(found ? 0 : 1) }'
 }

 service_active() {
Author	SHA1	Message	Date
Michael Chus	457ea1cf04	Unify benchmark exports and drop ASCII charts	2026-04-13 21:38:28 +03:00
Michael Chus	bf6ecab4f0	Add per-precision benchmark phases, weighted TOPS scoring, and ECC tracking - Split steady window into 6 equal slots: fp8/fp16/fp32/fp64/fp4 + combined - Each precision phase runs bee-gpu-burn with --precision filter so PowerCVPct reflects single-kernel stability (not round-robin artifact) - Add fp4 support in bee-gpu-stress.c for Blackwell (cc>=100) via existing CUDA_R_4F_E2M1 guard - Weighted TOPS: fp64×2.0, fp32×1.0, fp16×0.5, fp8×0.25, fp4×0.125 - SyntheticScore = sum of weighted TOPS from per-precision phases - MixedScore = sum from combined phase; MixedEfficiency = Mixed/Synthetic - ComputeScore = SyntheticScore × (1 + MixedEfficiency × 0.3) - ECC volatile counters sampled before/after each phase and overall - DegradationReasons: ecc_uncorrected_errors, ecc_corrected_errors - Report: per-precision stability table with ECC columns, methodology section - Ramp-up history table redesign: GPU indices as columns, runs as rows Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-13 10:49:49 +03:00
Michael Chus	02e44b1172	Fix USB/RAM status checks; add server model+S/N to dashboard; remove cycles USB Export Drive: lsblk reports TRAN only for whole disks, not partitions (/dev/sdc1). Strip trailing partition digits to get parent disk before transport check. LiveCD in RAM: When RunInstallToRAM copies squashfs to /dev/shm/bee-live/ but bind-mount of /run/live/medium fails (CD-ROM boots), /run/live/medium still shows the CD-ROM fstype. Add fallback: if /dev/shm/bee-live/*.squashfs exists, the data is in RAM — report status OK. Dashboard Hardware Summary: Show server Manufacturer + ProductName as heading and S/N as subline above the component table, sourced from hw.Board (dmidecode system-type data). Validate: Remove Cycles input — always run once. cycles=1 hardcoded in runAllSAT(). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:46:42 +03:00
Michael Chus	2ceaa0d0ca	Include profile and mode in benchmark task names for task list clarity Task names now follow the pattern: NVIDIA Benchmark · <profile> · <mode> [· GPU <indices>] Examples: NVIDIA Benchmark · standard · sequential (GPU 0, RTX 6000 Pro) NVIDIA Benchmark · stability · parallel NVIDIA Benchmark · standard · ramp 1/4 · GPU 0 NVIDIA Benchmark · standard · ramp 2/4 · GPU 0,1 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:36:51 +03:00
Michael Chus	9482ba20a2	Remove NCCL checkbox — auto-enable interconnect step when >1 GPU selected NCCL all_reduce is always attempted when 2+ GPUs are selected; a failure leaves InterconnectScore=0 (no bonus, no penalty) and OverallStatus unaffected. Exposing the checkbox implied NCCL is optional and made a failed run look like a deliberate skip. - Remove benchmark-run-nccl checkbox and its change listener from pages.go - Client sends run_nccl: selected.length > 1 (automatic) - api.go default runNCCL=true is unchanged - Selection note now mentions NCCL automatically for multi-GPU runs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:33:17 +03:00
Michael Chus	813e2f86a9	Add scalability/ramp-up labeling, ServerPower penalty in scoring, and report improvements - Add RampStep/RampTotal/RampRunID to NvidiaBenchmarkOptions, taskParams, and NvidiaBenchmarkResult so ramp-up steps can be correlated across result.json files - Add ScalabilityScore field to NvidiaBenchmarkResult (placeholder; computed externally by comparing ramp-up step results sharing the same ramp_run_id) - Propagate ramp fields through api.go (generates shared ramp_run_id at spawn time), tasks.go handler, and benchmark.go result population - Apply ServerPower penalty to CompositeScore when IPMI reporting_ratio < 0.75: factor = ratio/0.75, applied per-GPU with a note explaining the reduction - Add finding when server power delta exceeds GPU-reported sum by >25% (non-GPU draw) - Report header now shows ramp step N/M and run ID instead of "parallel" when in ramp mode; shows scalability_score when non-zero Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:30:47 +03:00
Michael Chus	58a6da9b44	Recover power limits and SM count from nvidia-smi -q in enrichGPUInfo When --query-gpu CSV fields fail (exit status 2 on some Blackwell + driver combos), enrichGPUInfoWithMaxClocks now also parses from the verbose nvidia-smi -q output already collected at benchmark start: - Default Power Limit → DefaultPowerLimitW - Current Power Limit → PowerLimitW (fallback) - Multiprocessor Count → MultiprocessorCount Fixes PowerSustainScore=0 on systems where all three CSV query variants fail but nvidia-smi -q succeeds (confirmed on RTX PRO 6000 Blackwell + driver 590.48.01). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:17:56 +03:00
Michael Chus	f4a19c0a00	Add power calibration step to benchmark; fix PowerSustainScore reference Before the per-GPU compute phases, run `dcgmi diag -r targeted_power` for 45 s while collecting nvidia-smi power metrics in parallel. The p95 power per GPU is stored as calibrated_peak_power_w and used as the denominator for PowerSustainScore instead of the hardware default limit, which bee-gpu-burn cannot reach because it is compute-only. Fallback chain: calibrated peak → default limit → enforced limit. If dcgmi is absent or the run fails, calibration is skipped silently. Adjust composite score weights to match the new honest power reference: base 0.35, thermal 0.25, stability 0.25, power 0.15, NCCL bonus 0.10. Power weight reduced (0.20→0.15) because even with a calibrated reference bee-gpu-burn reaches ~60-75% of TDP by design (no concurrent mem stress). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 22:06:46 +03:00
Michael Chus	9e3dcf9b4d	Record host CPU/RAM config in benchmark results; check CPU load - BenchmarkHostConfig captures CPU model, sockets, cores, threads, and total RAM from /proc/cpuinfo and /proc/meminfo at benchmark start. - BenchmarkCPULoad samples host CPU utilisation every 10 s throughout the GPU steady-state phase (sequential and parallel paths). - Summarises avg/max/p95 and classifies status as ok / high / unstable. - Adds a finding when CPU load is elevated (avg >20% or max >40%) or erratic (stddev >12%), with a plain-English description in the report. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 20:02:04 +03:00
Michael Chus	098e19f760	Add ramp-up mode to NVIDIA GPU benchmark Adds a new checkbox (enabled by default) in the benchmark section. In ramp-up mode N tasks are spawned simultaneously: 1 GPU, then 2, then 3, up to all selected GPUs — each step runs its GPUs in parallel. NCCL runs only on the final step. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 18:34:19 +03:00
Michael Chus	e16d0f34b5	Adjust burn GPU ramp timing by profile	2026-04-12 15:58:30 +03:00
Mikhail Chusavitin	525ed8b8fc	Fix GPU clock lock normalization for Blackwell (clocks.max.* unsupported) clocks.max.graphics / clocks.max.memory CSV fields return exit status 2 on RTX PRO 6000 Blackwell (driver 98.x), causing the entire gpu inventory query to fail and clock lock to be skipped → normalization: partial. Fix: - Add minimal fallback query (index,uuid,name,pci.bus_id,vbios_version, power.limit) that succeeds even without clock fields - Add enrichGPUInfoWithMaxClocks: parses "Max Clocks" section of nvidia-smi -q verbose output to fill MaxGraphicsClockMHz / MaxMemoryClockMHz when CSV fields fail - Move nvidia-smi -q execution before queryBenchmarkGPUInfo so its output is available for clock enrichment immediately after - Tests: cover enrichment and skip-if-populated cases Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 13:33:54 +03:00
Mikhail Chusavitin	4f94ebcb2c	Add HPC tuning: PCIe ASPM off, C-states, performance CPU governor - grub.cfg + isolinux/live.cfg.in: add pcie_aspm=off, intel_idle.max_cstate=1 and processor.max_cstate=1 to all non-failsafe boot entries - bee-hpc-tuning: new script that sets all CPU cores to performance governor via sysfs and logs THP state at boot - bee-hpc-tuning.service: runs before bee-nvidia and bee-audit - 9000-bee-setup.hook.chroot: enable service and mark script executable Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 13:07:32 +03:00
Mikhail Chusavitin	05c1fde233	Warn on PCIe link speed degradation and collect lspci -vvv in techdump - collector/pcie: add applyPCIeLinkSpeedWarning that sets status=Warning and ErrorDescription when current link speed is below maximum negotiated speed (e.g. Gen1 running on a Gen5 slot) - collector/pcie: add pcieLinkSpeedRank helper for Gen string comparison - collector/pcie_filter_test: cover degraded and healthy link speed cases - platform/techdump: collect lspci -vvv → lspci-vvv.txt for LnkCap/LnkSta Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-12 12:42:17 +03:00
Michael Chus	825ef6b98a	Add USB export drive and LiveCD-in-RAM checks to Runtime Health - schema: add ToRAMStatus and USBExportPath fields to RuntimeHealth - platform/runtime.go: collectToRAMHealth (ok/warning/failed based on IsLiveMediaInRAM + toramActive) and collectUSBExportHealth (scans /proc/mounts + lsblk for writable USB-backed filesystems) - pages.go: add USB Export Drive and LiveCD in RAM rows to the health table Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-11 10:05:27 +03:00
Michael Chus	ba16021cdb	Fix GPU model propagation, export filenames, PSU/service status, and chart perf - nvidia.go: add Name field to nvidiaGPUInfo, include model name in nvidia-smi query, set dev.Model in enrichPCIeWithNVIDIAData - pages.go: fix duplicate GPU count in validate card summary (4 GPU: 4 x … → 4 x … GPU); fix PSU UNKNOWN fallback from hw.PowerSupplies; treat activating/deactivating/reloading service states as OK in Runtime Health - support_bundle.go: use "150405" time format (no colons) for exFAT compat - sat.go / benchmark.go / platform_stress.go / sat_fan_stress.go: remove .tar.gz archive creation from export dirs — export packs everything itself - charts_svg.go: add min-max downsampling (1400 pt cap) for SVG chart perf - benchmark_report.go / sat.go: normalize GPU fallback to "Unknown GPU" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-11 10:05:27 +03:00
Mikhail Chusavitin	bb1218ddd4	Fix GPU inventory: exclude BMC virtual VGA, show real NVIDIA model names Two issues: 1. BMC/management VGA chips (e.g. Huawei iBMC Hi171x, ASPEED) were included in GPU inventory because shouldIncludePCIeDevice only checked the PCI class, not the device name. Added a name-based filter for known BMC/management patterns when the class is VGA/display/3d. 2. New NVIDIA GPUs (e.g. RTX PRO 6000 Blackwell, device ID 2bb5) showed as "Device 2bb5" because lspci's database lags behind. Added "name" to the nvidia-smi query and use it to override dev.Model during enrichment. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-10 13:57:26 +03:00
Mikhail Chusavitin	65faae8ede	Remove hpl from SAT run-all targets — no backend route exists hpl was listed in baseTargets and stressOnlyTargets but /api/sat/hpl/run was never registered, causing a 405 Method Not Allowed (not valid JSON) error when Validate one by one was triggered in stress mode. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-10 13:30:32 +03:00
Michael Chus	05241f2e0e	Redesign dashboard: split Runtime Health and Hardware Summary - Runtime Health now shows only LiveCD system status (services, tools, drivers, network, CUDA/ROCm) — hardware component rows removed - Hardware Summary now shows server components with readable descriptions (model, count×size) and component-status.json health badges - Add Network Adapters row to Hardware Summary - SFP module static info (vendor, PN, SN, connector, type, wavelength) now collected via ethtool -m regardless of carrier state - PSU statuses from IPMI audit written to component-status.json so PSU badge shows actual status after first audit instead of UNKNOWN Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-09 23:41:23 +03:00
Mikhail Chusavitin	c1690a084b	Fix app tests that mutate global defaults	2026-04-09 15:28:25 +03:00
Mikhail Chusavitin	9481ca2805	Add staged NVIDIA burn ramp-up mode	2026-04-09 15:21:14 +03:00
Mikhail Chusavitin	a78fdadd88	Refine validate and burn profile layout	2026-04-09 15:14:48 +03:00
Mikhail Chusavitin	4ef403898f	Tighten NVIDIA GPU PCI detection	2026-04-09 15:14:48 +03:00
Michael Chus	025548ab3c	UI: amber accents, smaller wallpaper logo, new support bundle name, drop display resolution - Bootloader: GRUB fallback text colors → yellow/brown (amber tone) - CLI charts: all GPU metric series use single amber color (xterm-256 #214) - Wallpaper: logo width scaled to 400 px dynamically, shadow scales with font size - Support bundle: renamed to YYYY-MM-DD (BEE-SP vX.X) SRV_MODEL SRV_SN ToD.tar.gz using dmidecode for server model (spaces→underscores) and serial number - Remove display resolution feature (UI card, API routes, handlers, tests) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-08 21:37:01 +03:00
Mikhail Chusavitin	e0d94d7f47	Remove HPL from build and audit flows	2026-04-08 10:00:23 +03:00
Mikhail Chusavitin	13899aa864	Drop incompatible HPL git fallback	2026-04-08 09:50:58 +03:00
Mikhail Chusavitin	f345d8a89d	Build HPL serially to avoid upstream make races	2026-04-08 09:47:35 +03:00
Mikhail Chusavitin	4715059ac0	Fix HPL MPI stub header and keep full build logs	2026-04-08 09:45:14 +03:00
Mikhail Chusavitin	0660a40287	Harden HPL builder cache and runtime libs	2026-04-08 09:40:18 +03:00
Mikhail Chusavitin	67369d9b7b	Fix OpenBLAS package lookup in HPL build	2026-04-08 09:32:49 +03:00