fix(webui): repair audit actions and CPU burn flow - v3.15

fix(stress): label loaders and improve john opencl diagnostics
fix(iso): validate memtest with xorriso fallback
2026-04-01 08:19:11 +03:00 · 2026-04-01 07:31:52 +03:00 · 2026-04-01 07:24:05 +03:00 · 2026-04-01 07:14:53 +03:00 · 2026-04-01 07:04:48 +03:00 · 2026-03-31 22:28:26 +03:00
26 changed files with 831 additions and 186 deletions
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -16,7 +16,7 @@ func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts N
 		return "", err
 	}
-	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-stress", []satJob{
+	return runAcceptancePackCtx(ctx, baseDir, nvidiaStressArchivePrefix(opts.Loader), []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{name: "02-nvidia-smi-list.log", cmd: []string{"nvidia-smi", "-L"}},
 		job,
@@ -24,6 +24,17 @@ func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts N
 	}, logFunc)
 }
 func nvidiaStressArchivePrefix(loader string) string {
 	switch strings.TrimSpace(strings.ToLower(loader)) {
 	case NvidiaStressLoaderJohn:
 		return "gpu-nvidia-john"
 	case NvidiaStressLoaderNCCL:
 		return "gpu-nvidia-nccl"
 	default:
 		return "gpu-nvidia-burn"
 	}
 }
 func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 	selected, err := resolveNvidiaGPUSelection(opts.GPUIndices, opts.ExcludeGPUIndices)
 	if err != nil {
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -10,9 +10,11 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"sync"
 	"syscall"
 	"time"
 )
@@ -374,10 +376,17 @@ func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) {
 		return nil, fmt.Errorf("stressapptest not found: %w", err)
 	}
 	// Use a very long duration; the context timeout will kill it at the right time.
-	cmd := exec.CommandContext(ctx, path, "-s", "86400", "-W", "--cc_test")
+	cmdArgs := []string{"-s", "86400", "-W", "--cc_test"}
 	if threads := platformStressCPUThreads(); threads > 0 {
 		cmdArgs = append(cmdArgs, "-m", strconv.Itoa(threads))
 	}
 	if mb := platformStressMemoryMB(); mb > 0 {
 		cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb))
 	}
 	cmd := exec.CommandContext(ctx, path, cmdArgs...)
 	cmd.Stdout = nil
 	cmd.Stderr = nil
-	if err := cmd.Start(); err != nil {
+	if err := startLowPriorityCmd(cmd, 15); err != nil {
 		return nil, fmt.Errorf("stressapptest start: %w", err)
 	}
 	return cmd, nil
@@ -418,7 +427,7 @@ func buildAMDGPUStressCmd(ctx context.Context) *exec.Cmd {
 	cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile)
 	cmd.Stdout = nil
 	cmd.Stderr = nil
-	_ = cmd.Start()
+	_ = startLowPriorityCmd(cmd, 10)
 	return cmd
 }
@@ -433,10 +442,50 @@ func buildNvidiaGPUStressCmd(ctx context.Context) *exec.Cmd {
 	cmd := exec.CommandContext(ctx, path, "--seconds", "86400", "--size-mb", "64")
 	cmd.Stdout = nil
 	cmd.Stderr = nil
-	_ = cmd.Start()
+	_ = startLowPriorityCmd(cmd, 10)
 	return cmd
 }
 func startLowPriorityCmd(cmd *exec.Cmd, nice int) error {
 	if err := cmd.Start(); err != nil {
 		return err
 	}
 	if cmd.Process != nil {
 		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, nice)
 	}
 	return nil
 }
 func platformStressCPUThreads() int {
 	if n := envInt("BEE_PLATFORM_STRESS_THREADS", 0); n > 0 {
 		return n
 	}
 	cpus := runtime.NumCPU()
 	switch {
 	case cpus <= 2:
 		return 1
 	case cpus <= 8:
 		return cpus - 1
 	default:
 		return cpus - 2
 	}
 }
 func platformStressMemoryMB() int {
 	if mb := envInt("BEE_PLATFORM_STRESS_MB", 0); mb > 0 {
 		return mb
 	}
 	free := freeMemBytes()
 	if free <= 0 {
 		return 0
 	}
 	mb := int((free * 60) / 100 / (1024 * 1024))
 	if mb < 1024 {
 		return 1024
 	}
 	return mb
 }
 func packPlatformDir(dir, dest string) error {
 	f, err := os.Create(dest)
 	if err != nil {
--- a/audit/internal/platform/platform_stress_test.go
+++ b/audit/internal/platform/platform_stress_test.go
@@ -0,0 +1,34 @@
 package platform
 import (
 	"runtime"
 	"testing"
 )
 func TestPlatformStressCPUThreadsOverride(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "7")
 	if got := platformStressCPUThreads(); got != 7 {
 		t.Fatalf("platformStressCPUThreads=%d want 7", got)
 	}
 }
 func TestPlatformStressCPUThreadsDefaultLeavesHeadroom(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "")
 	got := platformStressCPUThreads()
 	if got < 1 {
 		t.Fatalf("platformStressCPUThreads=%d want >= 1", got)
 	}
 	if got > runtime.NumCPU() {
 		t.Fatalf("platformStressCPUThreads=%d want <= NumCPU=%d", got, runtime.NumCPU())
 	}
 	if runtime.NumCPU() > 2 && got >= runtime.NumCPU() {
 		t.Fatalf("platformStressCPUThreads=%d want headroom below NumCPU=%d", got, runtime.NumCPU())
 	}
 }
 func TestPlatformStressMemoryMBOverride(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_MB", "8192")
 	if got := platformStressMemoryMB(); got != 8192 {
 		t.Fatalf("platformStressMemoryMB=%d want 8192", got)
 	}
 }
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -684,7 +684,11 @@ func resolveSATCommand(cmd []string) ([]string, error) {
 	case "rvs":
 		return resolveRVSCommand(cmd[1:]...)
 	}
-	return cmd, nil
+	path, err := satLookPath(cmd[0])
 	if err != nil {
 		return nil, fmt.Errorf("%s not found in PATH: %w", cmd[0], err)
 	}
 	return append([]string{path}, cmd[1:]...), nil
 }
 func resolveRVSCommand(args ...string) ([]string, error) {
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -162,6 +162,25 @@ func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
 	}
 }
 func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
 		loader string
 		want   string
 	}{
 		{loader: NvidiaStressLoaderBuiltin, want: "gpu-nvidia-burn"},
 		{loader: NvidiaStressLoaderJohn, want: "gpu-nvidia-john"},
 		{loader: NvidiaStressLoaderNCCL, want: "gpu-nvidia-nccl"},
 		{loader: "", want: "gpu-nvidia-burn"},
 	}
 	for _, tt := range tests {
 		if got := nvidiaStressArchivePrefix(tt.loader); got != tt.want {
 			t.Fatalf("loader=%q prefix=%q want %q", tt.loader, got, tt.want)
 		}
 	}
 }
 func TestEnvIntFallback(t *testing.T) {
 	os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
 	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
@@ -237,6 +256,44 @@ func TestResolveROCmSMICommandFromPATH(t *testing.T) {
 	}
 }
 func TestResolveSATCommandUsesLookPathForGenericTools(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		if file == "stress-ng" {
 			return "/usr/bin/stress-ng", nil
 		}
 		return "", exec.ErrNotFound
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	cmd, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
 	if err != nil {
 		t.Fatalf("resolveSATCommand error: %v", err)
 	}
 	if len(cmd) != 3 {
 		t.Fatalf("cmd len=%d want 3 (%v)", len(cmd), cmd)
 	}
 	if cmd[0] != "/usr/bin/stress-ng" {
 		t.Fatalf("cmd[0]=%q want /usr/bin/stress-ng", cmd[0])
 	}
 }
 func TestResolveSATCommandFailsForMissingGenericTool(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		return "", exec.ErrNotFound
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	_, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
 	if err == nil {
 		t.Fatal("expected error")
 	}
 	if !strings.Contains(err.Error(), "stress-ng not found in PATH") {
 		t.Fatalf("error=%q", err)
 	}
 }
 func TestResolveROCmSMICommandFallsBackToROCmTree(t *testing.T) {
 	tmp := t.TempDir()
 	execPath := filepath.Join(tmp, "opt", "rocm", "bin", "rocm-smi")
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -4,9 +4,11 @@ import (
 	"bufio"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
@@ -179,19 +181,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 			Profile           string `json:"profile"`
 			DisplayName       string `json:"display_name"`
 		}
-		if r.ContentLength > 0 {
+		if r.Body != nil {
-			_ = json.NewDecoder(r.Body).Decode(&body)
+			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
-		}
+				writeError(w, http.StatusBadRequest, "invalid request body")
-
+				return
 		name := taskNames[target]
 		if body.Profile != "" {
 			if n, ok := burnNames[target]; ok {
 				name = n
 			}
 		}
-		if name == "" {
+
-			name = target
+		name := taskDisplayName(target, body.Profile, body.Loader)
 		}
 		t := &Task{
 			ID:        newJobID("sat-" + target),
 			Name:      name,
@@ -667,6 +664,22 @@ func (h *handler) handleAPIInstallStream(w http.ResponseWriter, r *http.Request)
 // ── Metrics SSE ───────────────────────────────────────────────────────────────
 func (h *handler) handleAPIMetricsLatest(w http.ResponseWriter, r *http.Request) {
 	sample, ok := h.latestMetric()
 	if !ok {
 		w.Header().Set("Content-Type", "application/json")
 		_, _ = w.Write([]byte("{}"))
 		return
 	}
 	b, err := json.Marshal(sample)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
 	w.Header().Set("Content-Type", "application/json")
 	_, _ = w.Write(b)
 }
 func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request) {
 	if !sseStart(w) {
 		return
@@ -917,8 +930,31 @@ func parseXrandrOutput(out string) []displayInfo {
 	return infos
 }
 func xrandrCommand(args ...string) *exec.Cmd {
 	cmd := exec.Command("xrandr", args...)
 	env := append([]string{}, os.Environ()...)
 	hasDisplay := false
 	hasXAuthority := false
 	for _, kv := range env {
 		if strings.HasPrefix(kv, "DISPLAY=") && strings.TrimPrefix(kv, "DISPLAY=") != "" {
 			hasDisplay = true
 		}
 		if strings.HasPrefix(kv, "XAUTHORITY=") && strings.TrimPrefix(kv, "XAUTHORITY=") != "" {
 			hasXAuthority = true
 		}
 	}
 	if !hasDisplay {
 		env = append(env, "DISPLAY=:0")
 	}
 	if !hasXAuthority {
 		env = append(env, "XAUTHORITY=/home/bee/.Xauthority")
 	}
 	cmd.Env = env
 	return cmd
 }
 func (h *handler) handleAPIDisplayResolutions(w http.ResponseWriter, _ *http.Request) {
-	out, err := exec.Command("xrandr").Output()
+	out, err := xrandrCommand().Output()
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, "xrandr: "+err.Error())
 		return
@@ -945,7 +981,7 @@ func (h *handler) handleAPIDisplaySet(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusBadRequest, "invalid output name")
 		return
 	}
-	if out, err := exec.Command("xrandr", "--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
+	if out, err := xrandrCommand("--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
 		writeError(w, http.StatusInternalServerError, "xrandr: "+strings.TrimSpace(string(out)))
 		return
 	}
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -0,0 +1,64 @@
 package webui
 import (
 	"net/http/httptest"
 	"strings"
 	"testing"
 	"bee/audit/internal/app"
 )
 func TestXrandrCommandAddsDefaultX11Env(t *testing.T) {
 	t.Setenv("DISPLAY", "")
 	t.Setenv("XAUTHORITY", "")
 	cmd := xrandrCommand("--query")
 	var hasDisplay bool
 	var hasXAuthority bool
 	for _, kv := range cmd.Env {
 		if kv == "DISPLAY=:0" {
 			hasDisplay = true
 		}
 		if kv == "XAUTHORITY=/home/bee/.Xauthority" {
 			hasXAuthority = true
 		}
 	}
 	if !hasDisplay {
 		t.Fatalf("DISPLAY not injected: %v", cmd.Env)
 	}
 	if !hasXAuthority {
 		t.Fatalf("XAUTHORITY not injected: %v", cmd.Env)
 	}
 }
 func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/sat/cpu/run", strings.NewReader(`{"profile":"smoke"}`))
 	req.ContentLength = -1
 	rec := httptest.NewRecorder()
 	h.handleAPISATRun("cpu").ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
 		t.Fatalf("burn profile=%q want smoke", got)
 	}
 }
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -289,7 +289,7 @@ func renderAudit() string {
 func renderHardwareSummaryCard(opts HandlerOptions) string {
 	data, err := loadSnapshot(opts.AuditPath)
 	if err != nil {
-		return `<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body"><span class="badge badge-unknown">No audit data</span></div></div>`
+		return `<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body"><button class="btn btn-primary" onclick="auditModalRun()">&#9654; Run Audit</button></div></div>`
 	}
 	// Parse just enough fields for the summary banner
 	var snap struct {
@@ -532,16 +532,10 @@ function refreshCharts() {
 }
 setInterval(refreshCharts, 3000);
-const es = new EventSource('/api/metrics/stream');
+fetch('/api/metrics/latest').then(r => r.json()).then(d => {
 es.addEventListener('metrics', e => {
  const d = JSON.parse(e.data);
  // Show/hide Fan RPM card based on data availability
  const fanCard = document.getElementById('card-server-fans');
  if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none';
-
+}).catch(() => {});
 });
 es.onerror = () => {};
 </script>`
 }
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -270,6 +270,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	// Metrics — SSE stream of live sensor data + server-side SVG charts + CSV export
 	mux.HandleFunc("GET /api/metrics/stream", h.handleAPIMetricsStream)
 	mux.HandleFunc("GET /api/metrics/latest", h.handleAPIMetricsLatest)
 	mux.HandleFunc("GET /api/metrics/chart/", h.handleMetricsChartSVG)
 	mux.HandleFunc("GET /api/metrics/export.csv", h.handleAPIMetricsExportCSV)
@@ -1230,13 +1231,6 @@ probe();
 func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
 	page := strings.TrimPrefix(r.URL.Path, "/")
 	if page == "" {
 		// Serve loading page until audit snapshot exists
 		if _, err := os.Stat(h.opts.AuditPath); err != nil {
 			w.Header().Set("Cache-Control", "no-store")
 			w.Header().Set("Content-Type", "text/html; charset=utf-8")
 			_, _ = w.Write([]byte(loadingPageHTML))
 			return
 		}
 		page = "dashboard"
 	}
 	// Redirect old routes to new names
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -136,6 +136,33 @@ func TestRootRendersDashboard(t *testing.T) {
 	}
 }
 func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
 	if err := os.MkdirAll(exportDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	handler := NewHandler(HandlerOptions{
 		Title:     "Bee Hardware Audit",
 		AuditPath: filepath.Join(dir, "missing-audit.json"),
 		ExportDir: exportDir,
 	})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `Run Audit`) {
 		t.Fatalf("dashboard missing run audit button: %s", body)
 	}
 	if strings.Contains(body, `No audit data`) {
 		t.Fatalf("dashboard still shows empty audit badge: %s", body)
 	}
 }
 func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 	"sync"
 	"time"
@@ -51,6 +52,33 @@ var burnNames = map[string]string{
 	"amd":    "AMD GPU Burn-in",
 }
 func nvidiaStressTaskName(loader string) string {
 	switch strings.TrimSpace(strings.ToLower(loader)) {
 	case platform.NvidiaStressLoaderJohn:
 		return "NVIDIA GPU Stress (John/OpenCL)"
 	case platform.NvidiaStressLoaderNCCL:
 		return "NVIDIA GPU Stress (NCCL)"
 	default:
 		return "NVIDIA GPU Stress (bee-gpu-burn)"
 	}
 }
 func taskDisplayName(target, profile, loader string) string {
 	name := taskNames[target]
 	if profile != "" {
 		if n, ok := burnNames[target]; ok {
 			name = n
 		}
 	}
 	if target == "nvidia-stress" {
 		name = nvidiaStressTaskName(loader)
 	}
 	if name == "" {
 		name = target
 	}
 	return name
 }
 // Task represents one unit of work in the queue.
 type Task struct {
 	ID        string     `json:"id"`
@@ -440,6 +468,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if dur <= 0 {
 			dur = 60
 		}
 		j.append(fmt.Sprintf("CPU stress duration: %ds", dur))
 		archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
 	case "amd":
 		archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -95,6 +95,23 @@ func TestResolveBurnPreset(t *testing.T) {
 	}
 }
 func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
 	tests := []struct {
 		loader string
 		want   string
 	}{
 		{loader: "", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
 		{loader: "builtin", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
 		{loader: "john", want: "NVIDIA GPU Stress (John/OpenCL)"},
 		{loader: "nccl", want: "NVIDIA GPU Stress (NCCL)"},
 	}
 	for _, tc := range tests {
 		if got := taskDisplayName("nvidia-stress", "acceptance", tc.loader); got != tc.want {
 			t.Fatalf("taskDisplayName(loader=%q)=%q want %q", tc.loader, got, tc.want)
 		}
 	}
 }
 func TestRunTaskHonorsCancel(t *testing.T) {
 	t.Parallel()
@@ -154,3 +171,34 @@ func TestRunTaskHonorsCancel(t *testing.T) {
 		t.Fatal("runTask did not return after cancel")
 	}
 }
 func TestRunTaskUsesBurnProfileDurationForCPU(t *testing.T) {
 	t.Parallel()
 	var gotDuration int
 	q := &taskQueue{
 		opts: &HandlerOptions{App: &app.App{}},
 	}
 	tk := &Task{
 		ID:        "cpu-burn-1",
 		Name:      "CPU Burn-in",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 		params:    taskParams{BurnProfile: "smoke"},
 	}
 	j := &jobState{}
 	orig := runCPUAcceptancePackCtx
 	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, durationSec int, _ func(string)) (string, error) {
 		gotDuration = durationSec
 		return "/tmp/cpu-burn.tar.gz", nil
 	}
 	defer func() { runCPUAcceptancePackCtx = orig }()
 	q.runTask(tk, j, context.Background())
 	if gotDuration != 5*60 {
 		t.Fatalf("duration=%d want %d", gotDuration, 5*60)
 	}
 }
--- a/2
+++ b/2
--- a/bible-local/docs/iso-build-rules.md
+++ b/bible-local/docs/iso-build-rules.md
@@ -13,9 +13,10 @@ Use one of:
 This applies to:
 - `iso/builder/config/package-lists/*.list.chroot`
- Any package referenced in `grub.cfg`, hooks, or overlay scripts (e.g. file paths like `/boot/memtest86+x64.bin`)
+- Any package referenced in bootloader configs, hooks, or overlay scripts
-## Example of what goes wrong without this
+## Memtest rule
-`memtest86+` in Debian bookworm installs `/boot/memtest86+x64.bin`, not `/boot/memtest86+.bin`.
+Prefer live-build's built-in memtest integration over custom hooks or hardcoded
-Guessing the filename caused a broken GRUB entry that only surfaced at boot time, after a full rebuild.
+bootloader paths. If you ever need to reference memtest files manually, verify
 the exact package file list first for the target Debian release.
--- a/internal/chart
+++ b/internal/chart
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -29,7 +29,7 @@ lb config noauto \
    --security true \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
-    --memtest none \
+    --memtest memtest86+ \
    --iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
    --iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -36,6 +36,7 @@ typedef void *CUstream;
 #define MAX_CUBLAS_PROFILES 5
 #define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
 #define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
 #define STRESS_LAUNCH_DEPTH 8
 static const char *ptx_source =
    ".version 6.0\n"
@@ -422,24 +423,31 @@ static int run_ptx_fallback(struct cuda_api *api,
    double deadline = start + (double)seconds;
    while (now_seconds() < deadline) {
        launches_per_wave = 0;
-        for (int lane = 0; lane < stream_count; lane++) {
+        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
-            unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
+            int launched_this_batch = 0;
-            if (!check_rc(api,
+            for (int lane = 0; lane < stream_count; lane++) {
-                          "cuLaunchKernel",
+                unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
-                          api->cuLaunchKernel(kernel,
+                if (!check_rc(api,
-                                              blocks,
+                              "cuLaunchKernel",
-                                              1,
+                              api->cuLaunchKernel(kernel,
-                                              1,
+                                                  blocks,
-                                              threads,
+                                                  1,
-                                              1,
+                                                  1,
-                                              1,
+                                                  threads,
-                                              0,
+                                                  1,
-                                              streams[lane],
+                                                  1,
-                                              params[lane],
+                                                  0,
-                                              NULL))) {
+                                                  streams[lane],
-                goto fail;
+                                                  params[lane],
                                                  NULL))) {
                    goto fail;
                }
                launches_per_wave++;
                launched_this_batch++;
            }
            if (launched_this_batch <= 0) {
                break;
            }
            launches_per_wave++;
        }
        if (launches_per_wave <= 0) {
            goto fail;
@@ -460,10 +468,11 @@ static int run_ptx_fallback(struct cuda_api *api,
    report->iterations = iterations;
    snprintf(report->details,
             sizeof(report->details),
-             "fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d per_stream_mb=%zu iterations=%lu\n",
+             "fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d queue_depth=%d per_stream_mb=%zu iterations=%lu\n",
             size_mb,
             report->buffer_mb,
             report->stream_count,
             STRESS_LAUNCH_DEPTH,
             bytes_per_stream[0] / (1024u * 1024u),
             iterations);
@@ -1184,10 +1193,11 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
    report->buffer_mb = (int)(total_budget / (1024u * 1024u));
    append_detail(report->details,
                  sizeof(report->details),
-                  "requested_mb=%d actual_mb=%d streams=%d mp_count=%d per_worker_mb=%zu\n",
+                  "requested_mb=%d actual_mb=%d streams=%d queue_depth=%d mp_count=%d per_worker_mb=%zu\n",
                  size_mb,
                  report->buffer_mb,
                  report->stream_count,
                  STRESS_LAUNCH_DEPTH,
                  mp_count,
                  per_profile_budget / (1024u * 1024u));
@@ -1239,26 +1249,33 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
    double deadline = now_seconds() + (double)seconds;
    while (now_seconds() < deadline) {
        wave_launches = 0;
-        for (int i = 0; i < prepared_count; i++) {
+        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
-            if (!prepared[i].ready) {
+            int launched_this_batch = 0;
-                continue;
+            for (int i = 0; i < prepared_count; i++) {
-            }
+                if (!prepared[i].ready) {
-            if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
+                    continue;
                append_detail(report->details,
                              sizeof(report->details),
                              "%s=FAILED runtime\n",
                              prepared[i].desc.name);
                for (int j = 0; j < prepared_count; j++) {
                    destroy_profile(&cublas, cuda, &prepared[j]);
                }
-                cublas.cublasLtDestroy(handle);
+                if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
-                destroy_streams(cuda, streams, stream_count);
+                    append_detail(report->details,
-                cuda->cuCtxDestroy(ctx);
+                                  sizeof(report->details),
-                return 0;
+                                  "%s=FAILED runtime\n",
                                  prepared[i].desc.name);
                    for (int j = 0; j < prepared_count; j++) {
                        destroy_profile(&cublas, cuda, &prepared[j]);
                    }
                    cublas.cublasLtDestroy(handle);
                    destroy_streams(cuda, streams, stream_count);
                    cuda->cuCtxDestroy(ctx);
                    return 0;
                }
                prepared[i].iterations++;
                report->iterations++;
                wave_launches++;
                launched_this_batch++;
            }
            if (launched_this_batch <= 0) {
                break;
            }
            prepared[i].iterations++;
            report->iterations++;
            wave_launches++;
        }
        if (wave_launches <= 0) {
            break;
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -111,8 +111,231 @@ resolve_iso_version() {
    resolve_audit_version
 }
 iso_list_files() {
    iso_path="$1"
    if command -v bsdtar >/dev/null 2>&1; then
        bsdtar -tf "$iso_path"
        return $?
    fi
    if command -v xorriso >/dev/null 2>&1; then
        xorriso -indev "$iso_path" -find / -type f -print 2>/dev/null | sed 's#^/##'
        return $?
    fi
    return 127
 }
 iso_extract_file() {
    iso_path="$1"
    iso_member="$2"
    if command -v bsdtar >/dev/null 2>&1; then
        bsdtar -xOf "$iso_path" "$iso_member"
        return $?
    fi
    if command -v xorriso >/dev/null 2>&1; then
        xorriso -osirrox on -indev "$iso_path" -cat "/$iso_member" 2>/dev/null
        return $?
    fi
    return 127
 }
 require_iso_reader() {
    command -v bsdtar >/dev/null 2>&1 && return 0
    command -v xorriso >/dev/null 2>&1 && return 0
    memtest_fail "ISO reader is required for validation/debug (expected bsdtar or xorriso)" "${1:-}"
 }
 dump_memtest_debug() {
    phase="$1"
    lb_dir="${2:-}"
    iso_path="${3:-}"
    phase_slug="$(printf '%s' "${phase}" | tr ' /' '__')"
    memtest_log="${LOG_DIR:-}/memtest-${phase_slug}.log"
    (
        echo "=== memtest debug: ${phase} ==="
        echo "-- auto/config --"
        if [ -f "${BUILDER_DIR}/auto/config" ]; then
            grep -n -- '--memtest' "${BUILDER_DIR}/auto/config" || echo "  (no --memtest line found)"
        else
            echo "  (missing ${BUILDER_DIR}/auto/config)"
        fi
        echo "-- source bootloader templates --"
        for cfg in \
            "${BUILDER_DIR}/config/bootloaders/grub-pc/grub.cfg" \
            "${BUILDER_DIR}/config/bootloaders/isolinux/live.cfg.in"; do
            if [ -f "$cfg" ]; then
                echo "  file: $cfg"
                grep -n 'Memory Test\|memtest' "$cfg" || echo "    (no memtest lines)"
            fi
        done
        if [ -n "$lb_dir" ] && [ -d "$lb_dir" ]; then
            echo "-- live-build workdir package lists --"
            for pkg in \
                "$lb_dir/config/package-lists/bee.list.chroot" \
                "$lb_dir/config/package-lists/bee-gpu.list.chroot" \
                "$lb_dir/config/package-lists/bee-nvidia.list.chroot"; do
                if [ -f "$pkg" ]; then
                    echo "  file: $pkg"
                    grep -n 'memtest' "$pkg" || echo "    (no memtest lines)"
                fi
            done
            echo "-- live-build chroot/boot --"
            if [ -d "$lb_dir/chroot/boot" ]; then
                find "$lb_dir/chroot/boot" -maxdepth 1 -name 'memtest*' -print | sed 's/^/  /' || true
            else
                echo "  (missing $lb_dir/chroot/boot)"
            fi
            echo "-- live-build binary/boot --"
            if [ -d "$lb_dir/binary/boot" ]; then
                find "$lb_dir/binary/boot" -maxdepth 1 -name 'memtest*' -print | sed 's/^/  /' || true
            else
                echo "  (missing $lb_dir/binary/boot)"
            fi
            echo "-- live-build package cache --"
            if [ -d "$lb_dir/cache/packages.chroot" ]; then
                find "$lb_dir/cache/packages.chroot" -maxdepth 1 -name 'memtest86+*.deb' -print | sed 's/^/  /' || true
            else
                echo "  (missing $lb_dir/cache/packages.chroot)"
            fi
        fi
        if [ -n "$iso_path" ] && [ -f "$iso_path" ]; then
            echo "-- ISO memtest files --"
            iso_list_files "$iso_path" | grep 'memtest' | sed 's/^/  /' || echo "  (no memtest files in ISO)"
            echo "-- ISO GRUB memtest lines --"
            iso_extract_file "$iso_path" boot/grub/grub.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo "  (no memtest lines in boot/grub/grub.cfg)"
            echo "-- ISO isolinux memtest lines --"
            iso_extract_file "$iso_path" isolinux/live.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo "  (no memtest lines in isolinux/live.cfg)"
        fi
        echo "=== end memtest debug: ${phase} ==="
    ) | {
        if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ]; then
            tee "${memtest_log}"
        else
            cat
        fi
    }
 }
 memtest_fail() {
    msg="$1"
    iso_path="${2:-}"
    echo "ERROR: ${msg}" >&2
    dump_memtest_debug "failure" "${LB_DIR:-}" "$iso_path" >&2
    exit 1
 }
 validate_iso_memtest() {
    iso_path="$1"
    echo "=== validating memtest in ISO ==="
    [ -f "$iso_path" ] || memtest_fail "ISO not found for validation: $iso_path" "$iso_path"
    require_iso_reader "$iso_path"
    iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || {
        memtest_fail "memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" "$iso_path"
    }
    iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || {
        memtest_fail "memtest EFI binary missing in ISO: boot/memtest86+x64.efi" "$iso_path"
    }
    grub_cfg="$(mktemp)"
    isolinux_cfg="$(mktemp)"
    iso_extract_file "$iso_path" boot/grub/grub.cfg > "$grub_cfg" || memtest_fail "failed to extract boot/grub/grub.cfg from ISO" "$iso_path"
    iso_extract_file "$iso_path" isolinux/live.cfg > "$isolinux_cfg" || memtest_fail "failed to extract isolinux/live.cfg from ISO" "$iso_path"
    grep -q 'Memory Test (memtest86+)' "$grub_cfg" || {
        memtest_fail "GRUB menu entry for memtest is missing" "$iso_path"
    }
    grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || {
        memtest_fail "GRUB memtest EFI path is missing" "$iso_path"
    }
    grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || {
        memtest_fail "GRUB memtest BIOS path is missing" "$iso_path"
    }
    grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || {
        memtest_fail "isolinux menu entry for memtest is missing" "$iso_path"
    }
    grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || {
        memtest_fail "isolinux memtest path is missing" "$iso_path"
    }
    rm -f "$grub_cfg" "$isolinux_cfg"
    echo "=== memtest validation OK ==="
 }
 AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)"
 ISO_VERSION_EFFECTIVE="$(resolve_iso_version)"
 ISO_BASENAME="easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64"
 LOG_DIR="${DIST_DIR}/${ISO_BASENAME}.logs"
 LOG_ARCHIVE="${DIST_DIR}/${ISO_BASENAME}.logs.tar.gz"
 ISO_OUT="${DIST_DIR}/${ISO_BASENAME}.iso"
 LOG_OUT="${LOG_DIR}/build.log"
 cleanup_build_log() {
    status="${1:-$?}"
    trap - EXIT INT TERM HUP
    if [ "${BUILD_LOG_ACTIVE:-0}" = "1" ]; then
        BUILD_LOG_ACTIVE=0
        exec 1>&3 2>&4
        exec 3>&- 4>&-
        if [ -n "${BUILD_TEE_PID:-}" ]; then
            wait "${BUILD_TEE_PID}" 2>/dev/null || true
        fi
        rm -f "${BUILD_LOG_PIPE}"
    fi
    if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ] && command -v tar >/dev/null 2>&1; then
        rm -f "${LOG_ARCHIVE}"
        tar -czf "${LOG_ARCHIVE}" -C "${DIST_DIR}" "$(basename "${LOG_DIR}")" 2>/dev/null || true
    fi
    exit "${status}"
 }
 start_build_log() {
    command -v tee >/dev/null 2>&1 || {
        echo "ERROR: tee is required for build logging" >&2
        exit 1
    }
    rm -rf "${LOG_DIR}"
    rm -f "${LOG_ARCHIVE}"
    mkdir -p "${LOG_DIR}"
    BUILD_LOG_PIPE="$(mktemp -u "${TMPDIR:-/tmp}/bee-build-log.XXXXXX")"
    mkfifo "${BUILD_LOG_PIPE}"
    exec 3>&1 4>&2
    tee "${LOG_OUT}" < "${BUILD_LOG_PIPE}" &
    BUILD_TEE_PID=$!
    exec > "${BUILD_LOG_PIPE}" 2>&1
    BUILD_LOG_ACTIVE=1
    trap 'cleanup_build_log "$?"' EXIT INT TERM HUP
    echo "=== build log dir: ${LOG_DIR} ==="
    echo "=== build log: ${LOG_OUT} ==="
    echo "=== build log archive: ${LOG_ARCHIVE} ==="
 }
 start_build_log
 # Auto-detect kernel ABI: refresh apt index, then query current linux-image-amd64 dependency.
 # If headers for the detected ABI are not yet installed (kernel updated since image build),
@@ -245,13 +468,13 @@ rm -f \
    "${OVERLAY_STAGE_DIR}/etc/bee-release" \
    "${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/john" \
    "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker" \
    "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
 rm -rf \
    "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john"
 # Remove NVIDIA-specific overlay files for non-nvidia variants
 if [ "$BEE_GPU_VENDOR" != "nvidia" ]; then
@@ -304,7 +527,6 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ] && [ -f "$GPU_BURN_WORKER_BIN" ]; then
    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-burn" 2>/dev/null || true
    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-john-gpu-stress" 2>/dev/null || true
    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" 2>/dev/null || true
    ln -sfn bee-gpu-burn "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
 fi
 # --- inject smoketest into overlay so it runs directly on the live CD ---
@@ -510,6 +732,7 @@ export BEE_GPU_VENDOR_UPPER
 cd "${LB_DIR}"
 lb clean 2>&1 | tail -3
 lb config 2>&1 | tail -5
 dump_memtest_debug "pre-build" "${LB_DIR}"
 lb build 2>&1
 # --- persist deb package cache back to shared location ---
@@ -521,8 +744,9 @@ fi
 # live-build outputs live-image-amd64.hybrid.iso in LB_DIR
 ISO_RAW="${LB_DIR}/live-image-amd64.hybrid.iso"
 ISO_OUT="${DIST_DIR}/easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64.iso"
 if [ -f "$ISO_RAW" ]; then
    dump_memtest_debug "post-build" "${LB_DIR}" "$ISO_RAW"
    validate_iso_memtest "$ISO_RAW"
    cp "$ISO_RAW" "$ISO_OUT"
    echo ""
    echo "=== done (${BEE_GPU_VENDOR}) ==="
--- a/iso/builder/config/bootloaders/isolinux/live.cfg.in
+++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in
@@ -22,3 +22,7 @@ label live-@FLAVOUR@-failsafe
    linux @LINUX@
    initrd @INITRD@
    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
 label memtest
    menu label ^Memory Test (memtest86+)
    linux /boot/memtest86+x64.bin
--- a/iso/builder/config/hooks/normal/9100-memtest.hook.binary
+++ b/iso/builder/config/hooks/normal/9100-memtest.hook.binary
@@ -1,76 +0,0 @@
 #!/bin/sh
 # Copy memtest86+ binaries from chroot /boot into the ISO boot directory
 # so GRUB can chainload them directly (they must be on the ISO filesystem,
 # not inside the squashfs).
 #
 # Primary: copy from chroot/boot/ (populated by package postinst).
 # Naming fallbacks:
 #   Debian Bookworm: /boot/memtest86+       — EFI PE64 (no extension)
 #                    /boot/memtest86+.bin    — legacy binary
 #   Upstream/Ubuntu: /boot/memtest86+x64.efi, /boot/memtest86+x64.bin, etc.
 # Last resort: extract directly from the cached .deb if postinst didn't place
 #              the files (happens in chroot environments without grub triggers).
 set -e
 MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi memtest86+ia32.bin memtest86+ia32.efi"
 # Ensure destination directory exists (absence caused silent copy failures).
 mkdir -p binary/boot
 echo "memtest: scanning chroot/boot/ for memtest files:"
 ls chroot/boot/memtest* 2>/dev/null || echo "memtest: WARNING: no memtest files in chroot/boot/"
 # Primary path: copy upstream-named files from chroot/boot/
 for f in ${MEMTEST_FILES}; do
    src="chroot/boot/${f}"
    if [ -f "${src}" ]; then
        cp "${src}" "binary/boot/${f}"
        echo "memtest: copied ${f} from chroot/boot/"
    fi
 done
 # Debian Bookworm naming fallback: /boot/memtest86+ (no extension) is the EFI binary.
 if [ ! -f "binary/boot/memtest86+x64.efi" ] && [ -f "chroot/boot/memtest86+" ]; then
    cp "chroot/boot/memtest86+" "binary/boot/memtest86+x64.efi"
    echo "memtest: copied /boot/memtest86+ as memtest86+x64.efi (Debian naming)"
 fi
 if [ ! -f "binary/boot/memtest86+x64.bin" ] && [ -f "chroot/boot/memtest86+.bin" ]; then
    cp "chroot/boot/memtest86+.bin" "binary/boot/memtest86+x64.bin"
    echo "memtest: copied /boot/memtest86+.bin as memtest86+x64.bin (Debian naming)"
 fi
 # Last resort: if EFI binary still missing, extract from cached .deb
 if [ ! -f "binary/boot/memtest86+x64.efi" ]; then
    echo "memtest: EFI binary missing — attempting extraction from .deb cache"
    deb=$(find chroot/var/cache/apt/archives/ chroot/var/lib/apt/lists/ \
              -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' 2>/dev/null \
          | head -1)
    if [ -z "$deb" ]; then
        deb=$(find cache/ -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' 2>/dev/null | head -1)
    fi
    if [ -n "$deb" ]; then
        echo "memtest: extracting from ${deb}"
        EXTRACT_DIR="$(mktemp -d)"
        dpkg-deb -x "${deb}" "${EXTRACT_DIR}"
        echo "memtest: files found in .deb:"
        find "${EXTRACT_DIR}/boot" -type f 2>/dev/null || echo "  (none in /boot)"
        for f in ${MEMTEST_FILES}; do
            src="${EXTRACT_DIR}/boot/${f}"
            if [ -f "${src}" ]; then
                cp "${src}" "binary/boot/${f}"
                echo "memtest: extracted ${f} from .deb"
            fi
        done
        # Debian naming fallback inside .deb as well
        if [ ! -f "binary/boot/memtest86+x64.efi" ] && [ -f "${EXTRACT_DIR}/boot/memtest86+" ]; then
            cp "${EXTRACT_DIR}/boot/memtest86+" "binary/boot/memtest86+x64.efi"
            echo "memtest: extracted /boot/memtest86+ as memtest86+x64.efi from .deb"
        fi
        rm -rf "${EXTRACT_DIR}"
    else
        echo "memtest: WARNING: no memtest86+ .deb found in cache — memtest will not be available"
    fi
 fi
 echo "memtest: binary/boot/ contents:"
 ls binary/boot/memtest* 2>/dev/null || echo "  (none)"
--- a/iso/builder/config/package-lists/bee.list.chroot
+++ b/iso/builder/config/package-lists/bee.list.chroot
@@ -21,14 +21,15 @@ openssh-server
 # Disk installer
 squashfs-tools
 parted
-# grub-pc / grub-efi-amd64 provide grub-install + grub2-common (required for chroot install).
+# Keep GRUB install tools without selecting a single active platform package.
-# The -bin variants only carry binary modules and do NOT include grub-install itself.
+# grub-pc and grub-efi-amd64 conflict with each other, but grub2-common
-grub-pc
+# provides grub-install/update-grub and the *-bin packages provide BIOS/UEFI modules.
 grub2-common
 grub-pc-bin
 grub-efi-amd64
 grub-efi-amd64-bin
 grub-efi-amd64-signed
 shim-signed
 efibootmgr
 # Filesystem support for USB export targets
 exfatprogs
@@ -50,7 +51,6 @@ sudo
 zstd
 mstflint
 memtester
 memtest86+
 stress-ng
 stressapptest
--- a/iso/overlay/etc/systemd/system/bee-audit.service
+++ b/iso/overlay/etc/systemd/system/bee-audit.service
@@ -1,25 +1,9 @@
 [Unit]
-Description=Bee: schedule startup hardware audit via task queue
+Description=Bee: on-demand hardware audit (not started automatically)
 # Start AFTER bee-web, not before — bee-web must not wait for audit.
 After=bee-web.service
 Wants=bee-web.service
 [Service]
 Type=oneshot
 RemainAfterExit=yes
-# Wait up to 90s for bee-web to respond on /healthz, then sleep 60s for
+ExecStart=/bin/sh -c 'curl -sf -X POST http://localhost/api/audit/run >/dev/null'
 # the system to settle (GPU drivers, sensors), then enqueue the audit as
 # a background task so it appears in the task list and logs.
 ExecStart=/bin/sh -c '\
  i=0; \
  while [ $i -lt 90 ]; do \
    if curl -sf http://localhost/healthz >/dev/null 2>&1; then break; fi; \
    sleep 1; i=$((i+1)); \
  done; \
  sleep 60; \
  curl -sf -X POST http://localhost/api/audit/run >/dev/null'
 StandardOutput=journal
 StandardError=journal
 [Install]
 WantedBy=multi-user.target
--- a/iso/overlay/usr/local/bin/bee-install
+++ b/iso/overlay/usr/local/bin/bee-install
@@ -12,17 +12,55 @@
 set -euo pipefail
 usage() {
    cat >&2 <<'EOF'
 Usage: bee-install <device> [logfile]
  Installs the live system to a local disk (WIPES the target).
  device   Target block device, e.g. /dev/sda or /dev/nvme0n1
           Must be a hard disk or NVMe — NOT a CD-ROM (/dev/sr*)
  logfile  Optional path for progress log (default: /tmp/bee-install.log)
 Examples:
  bee-install /dev/sda
  bee-install /dev/nvme0n1
  bee-install /dev/sdb /tmp/my-install.log
 WARNING: ALL DATA ON <device> WILL BE ERASED.
 Layout (UEFI):  GPT — partition 1: EFI 512MB vfat, partition 2: root ext4
 Layout (BIOS):  MBR — partition 1: root ext4
 EOF
    exit 1
 }
 DEVICE="${1:-}"
 LOGFILE="${2:-/tmp/bee-install.log}"
-if [ -z "$DEVICE" ]; then
+if [ -z "$DEVICE" ] || [ "$DEVICE" = "--help" ] || [ "$DEVICE" = "-h" ]; then
-    echo "Usage: bee-install <device> [logfile]" >&2
+    usage
    exit 1
 fi
 if [ ! -b "$DEVICE" ]; then
    echo "ERROR: $DEVICE is not a block device" >&2
    echo "Run 'lsblk' to list available disks." >&2
    exit 1
 fi
 # Block CD-ROM devices
 case "$DEVICE" in
    /dev/sr*|/dev/scd*)
        echo "ERROR: $DEVICE is a CD-ROM/optical device — cannot install to it." >&2
        echo "Run 'lsblk' to find the target disk (e.g. /dev/sda, /dev/nvme0n1)." >&2
        exit 1
        ;;
 esac
 # Check required tools
 for tool in parted mkfs.vfat mkfs.ext4 unsquashfs grub-install update-grub; do
    if ! command -v "$tool" >/dev/null 2>&1; then
        echo "ERROR: required tool not found: $tool" >&2
        exit 1
    fi
 done
 SQUASHFS="/run/live/medium/live/filesystem.squashfs"
 if [ ! -f "$SQUASHFS" ]; then
--- a/iso/overlay/usr/local/bin/bee-john-gpu-stress
+++ b/iso/overlay/usr/local/bin/bee-john-gpu-stress
@@ -7,6 +7,8 @@ EXCLUDE=""
 FORMAT=""
 JOHN_DIR="/usr/local/lib/bee/john/run"
 JOHN_BIN="${JOHN_DIR}/john"
 export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
 export LD_LIBRARY_PATH="/usr/lib:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
 usage() {
    echo "usage: $0 [--seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
@@ -23,6 +25,95 @@ contains_csv() {
    echo ",${haystack}," | grep -q ",${needle},"
 }
 show_opencl_diagnostics() {
    echo "-- OpenCL ICD vendors --" >&2
    if [ -d /etc/OpenCL/vendors ]; then
        ls -l /etc/OpenCL/vendors >&2 || true
        for icd in /etc/OpenCL/vendors/*.icd; do
            [ -f "${icd}" ] || continue
            echo "  file: ${icd}" >&2
            sed 's/^/    /' "${icd}" >&2 || true
        done
    else
        echo "  /etc/OpenCL/vendors is missing" >&2
    fi
    echo "-- NVIDIA device nodes --" >&2
    ls -l /dev/nvidia* >&2 || true
    echo "-- ldconfig OpenCL/NVIDIA --" >&2
    ldconfig -p 2>/dev/null | grep 'libOpenCL\|libcuda\|libnvidia-opencl' >&2 || true
    if command -v clinfo >/dev/null 2>&1; then
        echo "-- clinfo -l --" >&2
        clinfo -l >&2 || true
    fi
    echo "-- john --list=opencl-devices --" >&2
    ./john --list=opencl-devices >&2 || true
 }
 refresh_nvidia_runtime() {
    if [ "$(id -u)" != "0" ]; then
        return 1
    fi
    if command -v bee-nvidia-load >/dev/null 2>&1; then
        bee-nvidia-load >/dev/null 2>&1 || true
    fi
    ldconfig >/dev/null 2>&1 || true
    return 0
 }
 ensure_nvidia_uvm() {
    if lsmod 2>/dev/null | grep -q '^nvidia_uvm '; then
        return 0
    fi
    if [ "$(id -u)" != "0" ]; then
        return 1
    fi
    ko="/usr/local/lib/nvidia/nvidia-uvm.ko"
    [ -f "${ko}" ] || return 1
    if ! insmod "${ko}" >/dev/null 2>&1; then
        return 1
    fi
    uvm_major=$(grep -m1 ' nvidia-uvm$' /proc/devices | awk '{print $1}')
    if [ -n "${uvm_major}" ]; then
        mknod -m 666 /dev/nvidia-uvm c "${uvm_major}" 0 2>/dev/null || true
        mknod -m 666 /dev/nvidia-uvm-tools c "${uvm_major}" 1 2>/dev/null || true
    fi
    return 0
 }
 ensure_opencl_ready() {
    out=$(./john --list=opencl-devices 2>&1 || true)
    if echo "${out}" | grep -q "Device #"; then
        return 0
    fi
    if refresh_nvidia_runtime; then
        out=$(./john --list=opencl-devices 2>&1 || true)
        if echo "${out}" | grep -q "Device #"; then
            return 0
        fi
    fi
    if ensure_nvidia_uvm; then
        out=$(./john --list=opencl-devices 2>&1 || true)
        if echo "${out}" | grep -q "Device #"; then
            return 0
        fi
    fi
    echo "OpenCL devices are not available for John." >&2
    if ! lsmod 2>/dev/null | grep -q '^nvidia_uvm '; then
        echo "nvidia_uvm is not loaded." >&2
    fi
    if [ ! -e /dev/nvidia-uvm ]; then
        echo "/dev/nvidia-uvm is missing." >&2
    fi
    show_opencl_diagnostics
    return 1
 }
 while [ "$#" -gt 0 ]; do
    case "$1" in
        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
@@ -76,6 +167,8 @@ echo "john_devices=${JOHN_DEVICES}"
 cd "${JOHN_DIR}"
 ensure_opencl_ready || exit 1
 choose_format() {
    if [ -n "${FORMAT}" ]; then
        echo "${FORMAT}"
--- a/iso/overlay/usr/local/bin/bee-log-run
+++ b/iso/overlay/usr/local/bin/bee-log-run
@@ -17,7 +17,7 @@ mkdir -p "$(dirname "$log_file")"
 serial_sink() {
    local tty="$1"
    if [ -w "$tty" ]; then
-        cat > "$tty"
+        cat > "$tty" 2>/dev/null || true
    else
        cat > /dev/null
    fi
--- a/iso/overlay/usr/local/bin/bee-nvidia-load
+++ b/iso/overlay/usr/local/bin/bee-nvidia-load
@@ -59,11 +59,24 @@ load_module() {
    return 1
 }
 load_host_module() {
    mod="$1"
    if modprobe "$mod" >/dev/null 2>&1; then
        log "host module loaded: $mod"
        return 0
    fi
    return 1
 }
 case "$nvidia_mode" in
    normal|full)
        if ! load_module nvidia; then
            exit 1
        fi
        # nvidia-modeset on some server kernels needs ACPI video helper symbols
        # exported by the generic "video" module. Best-effort only; compute paths
        # remain functional even if display-related modules stay absent.
        load_host_module video || true
        load_module nvidia-modeset || true
        load_module nvidia-uvm || true
        ;;
Author	SHA1	Message	Date
Mikhail Chusavitin	b5b34983f1	fix(webui): repair audit actions and CPU burn flow - v3.15	2026-04-01 08:19:11 +03:00
Michael Chus	45221d1e9a	fix(stress): label loaders and improve john opencl diagnostics	2026-04-01 07:31:52 +03:00
Michael Chus	3869788bac	fix(iso): validate memtest with xorriso fallback	2026-04-01 07:24:05 +03:00
Michael Chus	3dbc2184ef	fix(iso): archive build logs and memtest diagnostics	2026-04-01 07:14:53 +03:00
Michael Chus	60cb8f889a	fix(iso): restore memtest menu entries and validate ISO	2026-04-01 07:04:48 +03:00
Michael Chus	c9ee078622	fix(stress): keep platform burn responsive under load	2026-03-31 22:28:26 +03:00
Michael Chus	ea660500c9	chore: commit pending repo changes	2026-03-31 22:17:36 +03:00
Michael Chus	d43a9aeec7	fix(iso): restore live-build memtest integration	2026-03-31 22:10:28 +03:00
Mikhail Chusavitin	f5622e351e	Fix staged John cleanup for repeated ISO builds	2026-03-31 11:40:52 +03:00
Mikhail Chusavitin	a20806afc8	Fix ISO grub package conflict	2026-03-31 11:38:30 +03:00
Mikhail Chusavitin	4f9b6b3bcd	Harden NVIDIA boot logging on live ISO	2026-03-31 11:37:21 +03:00