fix(iso): recover memtest after live-build

refactor(webui): queue install and bundle tasks - v3.18
fix(iso): make memtest non-blocking by default
2026-04-01 08:55:57 +03:00 · 2026-04-01 08:46:46 +03:00 · 2026-04-01 08:33:36 +03:00 · 2026-04-01 08:23:39 +03:00 · 2026-04-01 08:19:11 +03:00 · 2026-04-01 07:31:52 +03:00
19 changed files with 1554 additions and 161 deletions
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -36,6 +36,8 @@ var supportBundleCommands = []struct {
 	{name: "system/dmesg-tail.txt", cmd: []string{"sh", "-c", "dmesg | tail -n 200"}},
 }

+const supportBundleGlob = "bee-support-*.tar.gz"
+
 func BuildSupportBundle(exportDir string) (string, error) {
 	exportDir = strings.TrimSpace(exportDir)
 	if exportDir == "" {
@@ -86,34 +88,64 @@ func BuildSupportBundle(exportDir string) (string, error) {
 	return archivePath, nil
 }

+func LatestSupportBundlePath() (string, error) {
+	return latestSupportBundlePath(os.TempDir())
+}
+
 func cleanupOldSupportBundles(dir string) error {
-	matches, err := filepath.Glob(filepath.Join(dir, "bee-support-*.tar.gz"))
+	matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
 	if err != nil {
 		return err
 	}
-	type entry struct {
-		path string
-		mod  time.Time
+	entries := supportBundleEntries(matches)
+	for path, mod := range entries {
+		if time.Since(mod) > 24*time.Hour {
+			_ = os.Remove(path)
+			delete(entries, path)
+		}
 	}
-	list := make([]entry, 0, len(matches))
+	ordered := orderSupportBundles(entries)
+	if len(ordered) > 3 {
+		for _, old := range ordered[3:] {
+			_ = os.Remove(old)
+		}
+	}
+	return nil
+}
+
+func latestSupportBundlePath(dir string) (string, error) {
+	matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
+	if err != nil {
+		return "", err
+	}
+	ordered := orderSupportBundles(supportBundleEntries(matches))
+	if len(ordered) == 0 {
+		return "", os.ErrNotExist
+	}
+	return ordered[0], nil
+}
+
+func supportBundleEntries(matches []string) map[string]time.Time {
+	entries := make(map[string]time.Time, len(matches))
 	for _, match := range matches {
 		info, err := os.Stat(match)
 		if err != nil {
 			continue
 		}
-		if time.Since(info.ModTime()) > 24*time.Hour {
-			_ = os.Remove(match)
-			continue
-		}
-		list = append(list, entry{path: match, mod: info.ModTime()})
+		entries[match] = info.ModTime()
 	}
-	sort.Slice(list, func(i, j int) bool { return list[i].mod.After(list[j].mod) })
-	if len(list) > 3 {
-		for _, old := range list[3:] {
-			_ = os.Remove(old.path)
-		}
+	return entries
+}
+
+func orderSupportBundles(entries map[string]time.Time) []string {
+	ordered := make([]string, 0, len(entries))
+	for path := range entries {
+		ordered = append(ordered, path)
 	}
-	return nil
+	sort.Slice(ordered, func(i, j int) bool {
+		return entries[ordered[i]].After(entries[ordered[j]])
+	})
+	return ordered
 }

 func writeJournalDump(dst string) error {
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -16,7 +16,7 @@ func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts N
 		return "", err
 	}

-	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-stress", []satJob{
+	return runAcceptancePackCtx(ctx, baseDir, nvidiaStressArchivePrefix(opts.Loader), []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{name: "02-nvidia-smi-list.log", cmd: []string{"nvidia-smi", "-L"}},
 		job,
@@ -24,6 +24,17 @@ func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts N
 	}, logFunc)
 }

+func nvidiaStressArchivePrefix(loader string) string {
+	switch strings.TrimSpace(strings.ToLower(loader)) {
+	case NvidiaStressLoaderJohn:
+		return "gpu-nvidia-john"
+	case NvidiaStressLoaderNCCL:
+		return "gpu-nvidia-nccl"
+	default:
+		return "gpu-nvidia-burn"
+	}
+}
+
 func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 	selected, err := resolveNvidiaGPUSelection(opts.GPUIndices, opts.ExcludeGPUIndices)
 	if err != nil {
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -684,7 +684,11 @@ func resolveSATCommand(cmd []string) ([]string, error) {
 	case "rvs":
 		return resolveRVSCommand(cmd[1:]...)
 	}
-	return cmd, nil
+	path, err := satLookPath(cmd[0])
+	if err != nil {
+		return nil, fmt.Errorf("%s not found in PATH: %w", cmd[0], err)
+	}
+	return append([]string{path}, cmd[1:]...), nil
 }

 func resolveRVSCommand(args ...string) ([]string, error) {
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -162,6 +162,25 @@ func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
 	}
 }

+func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		loader string
+		want   string
+	}{
+		{loader: NvidiaStressLoaderBuiltin, want: "gpu-nvidia-burn"},
+		{loader: NvidiaStressLoaderJohn, want: "gpu-nvidia-john"},
+		{loader: NvidiaStressLoaderNCCL, want: "gpu-nvidia-nccl"},
+		{loader: "", want: "gpu-nvidia-burn"},
+	}
+	for _, tt := range tests {
+		if got := nvidiaStressArchivePrefix(tt.loader); got != tt.want {
+			t.Fatalf("loader=%q prefix=%q want %q", tt.loader, got, tt.want)
+		}
+	}
+}
+
 func TestEnvIntFallback(t *testing.T) {
 	os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
 	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
@@ -237,6 +256,44 @@ func TestResolveROCmSMICommandFromPATH(t *testing.T) {
 	}
 }

+func TestResolveSATCommandUsesLookPathForGenericTools(t *testing.T) {
+	oldLookPath := satLookPath
+	satLookPath = func(file string) (string, error) {
+		if file == "stress-ng" {
+			return "/usr/bin/stress-ng", nil
+		}
+		return "", exec.ErrNotFound
+	}
+	t.Cleanup(func() { satLookPath = oldLookPath })
+
+	cmd, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
+	if err != nil {
+		t.Fatalf("resolveSATCommand error: %v", err)
+	}
+	if len(cmd) != 3 {
+		t.Fatalf("cmd len=%d want 3 (%v)", len(cmd), cmd)
+	}
+	if cmd[0] != "/usr/bin/stress-ng" {
+		t.Fatalf("cmd[0]=%q want /usr/bin/stress-ng", cmd[0])
+	}
+}
+
+func TestResolveSATCommandFailsForMissingGenericTool(t *testing.T) {
+	oldLookPath := satLookPath
+	satLookPath = func(file string) (string, error) {
+		return "", exec.ErrNotFound
+	}
+	t.Cleanup(func() { satLookPath = oldLookPath })
+
+	_, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if !strings.Contains(err.Error(), "stress-ng not found in PATH") {
+		t.Fatalf("error=%q", err)
+	}
+}
+
 func TestResolveROCmSMICommandFallsBackToROCmTree(t *testing.T) {
 	tmp := t.TempDir()
 	execPath := filepath.Join(tmp, "opt", "rocm", "bin", "rocm-smi")
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -2,11 +2,12 @@ package webui

 import (
 	"bufio"
-	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
+	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
@@ -85,15 +86,16 @@ func streamJob(w http.ResponseWriter, r *http.Request, j *jobState) {
 	}
 }

-// runCmdJob runs an exec.Cmd as a background job, streaming stdout+stderr lines.
-func runCmdJob(j *jobState, cmd *exec.Cmd) {
+// streamCmdJob runs an exec.Cmd and streams stdout+stderr lines into j.
+func streamCmdJob(j *jobState, cmd *exec.Cmd) error {
 	pr, pw := io.Pipe()
 	cmd.Stdout = pw
 	cmd.Stderr = pw

 	if err := cmd.Start(); err != nil {
-		j.finish(err.Error())
-		return
+		_ = pw.Close()
+		_ = pr.Close()
+		return err
 	}
 	// Lower the CPU scheduling priority of stress/audit subprocesses to nice+10
 	// so the X server and kernel interrupt handling remain responsive under load
@@ -102,8 +104,10 @@ func runCmdJob(j *jobState, cmd *exec.Cmd) {
 		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, 10)
 	}

+	scanDone := make(chan error, 1)
 	go func() {
 		scanner := bufio.NewScanner(pr)
+		scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
 		for scanner.Scan() {
 			// Split on \r to handle progress-bar style output (e.g. \r overwrites)
 			// and strip ANSI escape codes so logs are readable in the browser.
@@ -115,15 +119,21 @@ func runCmdJob(j *jobState, cmd *exec.Cmd) {
 				}
 			}
 		}
+		if err := scanner.Err(); err != nil && !errors.Is(err, io.ErrClosedPipe) {
+			scanDone <- err
+			return
+		}
+		scanDone <- nil
 	}()

 	err := cmd.Wait()
 	_ = pw.Close()
+	scanErr := <-scanDone
+	_ = pr.Close()
 	if err != nil {
-		j.finish(err.Error())
-	} else {
-		j.finish("")
+		return err
 	}
+	return scanErr
 }

 // ── Audit ─────────────────────────────────────────────────────────────────────
@@ -179,19 +189,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 			Profile           string `json:"profile"`
 			DisplayName       string `json:"display_name"`
 		}
-		if r.ContentLength > 0 {
-			_ = json.NewDecoder(r.Body).Decode(&body)
-		}
-
-		name := taskNames[target]
-		if body.Profile != "" {
-			if n, ok := burnNames[target]; ok {
-				name = n
+		if r.Body != nil {
+			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
+				writeError(w, http.StatusBadRequest, "invalid request body")
+				return
 			}
 		}
-		if name == "" {
-			name = target
-		}
+
+		name := taskDisplayName(target, body.Profile, body.Loader)
 		t := &Task{
 			ID:        newJobID("sat-" + target),
 			Name:      name,
@@ -420,15 +425,23 @@ func (h *handler) handleAPIExportList(w http.ResponseWriter, r *http.Request) {
 }

 func (h *handler) handleAPIExportBundle(w http.ResponseWriter, r *http.Request) {
-	archive, err := app.BuildSupportBundle(h.opts.ExportDir)
-	if err != nil {
-		writeError(w, http.StatusInternalServerError, err.Error())
+	if globalQueue.hasActiveTarget("support-bundle") {
+		writeError(w, http.StatusConflict, "support bundle task is already pending or running")
 		return
 	}
+	t := &Task{
+		ID:        newJobID("support-bundle"),
+		Name:      "Support Bundle",
+		Target:    "support-bundle",
+		Status:    TaskPending,
+		CreatedAt: time.Now(),
+	}
+	globalQueue.enqueue(t)
 	writeJSON(w, map[string]string{
-		"status": "ok",
-		"path":   archive,
-		"url":    "/export/support.tar.gz",
+		"status":  "queued",
+		"task_id": t.ID,
+		"job_id":  t.ID,
+		"url":     "/export/support.tar.gz",
 	})
 }

@@ -516,10 +529,7 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	h.installMu.Lock()
-	installRunning := h.installJob != nil && !h.installJob.isDone()
-	h.installMu.Unlock()
-	if installRunning {
+	if globalQueue.hasActiveTarget("install") {
 		writeError(w, http.StatusConflict, "install to disk is already running")
 		return
 	}
@@ -634,35 +644,23 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusConflict, "install to RAM task is already pending or running")
 		return
 	}
-
-	h.installMu.Lock()
-	if h.installJob != nil && !h.installJob.isDone() {
-		h.installMu.Unlock()
-		writeError(w, http.StatusConflict, "install already running")
+	if globalQueue.hasActiveTarget("install") {
+		writeError(w, http.StatusConflict, "install task is already pending or running")
 		return
 	}
-	j := &jobState{}
-	h.installJob = j
-	h.installMu.Unlock()
-
-	logFile := platform.InstallLogPath(req.Device)
-	go runCmdJob(j, exec.CommandContext(context.Background(), "bee-install", req.Device, logFile))
-
-	w.WriteHeader(http.StatusNoContent)
-}
-
-func (h *handler) handleAPIInstallStream(w http.ResponseWriter, r *http.Request) {
-	h.installMu.Lock()
-	j := h.installJob
-	h.installMu.Unlock()
-	if j == nil {
-		if !sseStart(w) {
-			return
-		}
-		sseWrite(w, "done", "")
-		return
+	t := &Task{
+		ID:        newJobID("install"),
+		Name:      "Install to Disk",
+		Target:    "install",
+		Priority:  20,
+		Status:    TaskPending,
+		CreatedAt: time.Now(),
+		params: taskParams{
+			Device: req.Device,
+		},
 	}
-	streamJob(w, r, j)
+	globalQueue.enqueue(t)
+	writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
 }

 // ── Metrics SSE ───────────────────────────────────────────────────────────────
@@ -933,8 +931,31 @@ func parseXrandrOutput(out string) []displayInfo {
 	return infos
 }

+func xrandrCommand(args ...string) *exec.Cmd {
+	cmd := exec.Command("xrandr", args...)
+	env := append([]string{}, os.Environ()...)
+	hasDisplay := false
+	hasXAuthority := false
+	for _, kv := range env {
+		if strings.HasPrefix(kv, "DISPLAY=") && strings.TrimPrefix(kv, "DISPLAY=") != "" {
+			hasDisplay = true
+		}
+		if strings.HasPrefix(kv, "XAUTHORITY=") && strings.TrimPrefix(kv, "XAUTHORITY=") != "" {
+			hasXAuthority = true
+		}
+	}
+	if !hasDisplay {
+		env = append(env, "DISPLAY=:0")
+	}
+	if !hasXAuthority {
+		env = append(env, "XAUTHORITY=/home/bee/.Xauthority")
+	}
+	cmd.Env = env
+	return cmd
+}
+
 func (h *handler) handleAPIDisplayResolutions(w http.ResponseWriter, _ *http.Request) {
-	out, err := exec.Command("xrandr").Output()
+	out, err := xrandrCommand().Output()
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, "xrandr: "+err.Error())
 		return
@@ -961,7 +982,7 @@ func (h *handler) handleAPIDisplaySet(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusBadRequest, "invalid output name")
 		return
 	}
-	if out, err := exec.Command("xrandr", "--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
+	if out, err := xrandrCommand("--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
 		writeError(w, http.StatusInternalServerError, "xrandr: "+strings.TrimSpace(string(out)))
 		return
 	}
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -0,0 +1,102 @@
+package webui
+
+import (
+	"encoding/json"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"bee/audit/internal/app"
+)
+
+func TestXrandrCommandAddsDefaultX11Env(t *testing.T) {
+	t.Setenv("DISPLAY", "")
+	t.Setenv("XAUTHORITY", "")
+
+	cmd := xrandrCommand("--query")
+
+	var hasDisplay bool
+	var hasXAuthority bool
+	for _, kv := range cmd.Env {
+		if kv == "DISPLAY=:0" {
+			hasDisplay = true
+		}
+		if kv == "XAUTHORITY=/home/bee/.Xauthority" {
+			hasXAuthority = true
+		}
+	}
+	if !hasDisplay {
+		t.Fatalf("DISPLAY not injected: %v", cmd.Env)
+	}
+	if !hasXAuthority {
+		t.Fatalf("XAUTHORITY not injected: %v", cmd.Env)
+	}
+}
+
+func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
+	globalQueue.mu.Lock()
+	originalTasks := globalQueue.tasks
+	globalQueue.tasks = nil
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = originalTasks
+		globalQueue.mu.Unlock()
+	})
+
+	h := &handler{opts: HandlerOptions{App: &app.App{}}}
+	req := httptest.NewRequest("POST", "/api/sat/cpu/run", strings.NewReader(`{"profile":"smoke"}`))
+	req.ContentLength = -1
+	rec := httptest.NewRecorder()
+
+	h.handleAPISATRun("cpu").ServeHTTP(rec, req)
+
+	if rec.Code != 200 {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	if len(globalQueue.tasks) != 1 {
+		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
+	}
+	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
+		t.Fatalf("burn profile=%q want smoke", got)
+	}
+}
+
+func TestHandleAPIExportBundleQueuesTask(t *testing.T) {
+	globalQueue.mu.Lock()
+	originalTasks := globalQueue.tasks
+	globalQueue.tasks = nil
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = originalTasks
+		globalQueue.mu.Unlock()
+	})
+
+	h := &handler{opts: HandlerOptions{ExportDir: t.TempDir()}}
+	req := httptest.NewRequest("POST", "/api/export/bundle", nil)
+	rec := httptest.NewRecorder()
+
+	h.handleAPIExportBundle(rec, req)
+
+	if rec.Code != 200 {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	var body map[string]string
+	if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode response: %v", err)
+	}
+	if body["task_id"] == "" {
+		t.Fatalf("missing task_id in response: %v", body)
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	if len(globalQueue.tasks) != 1 {
+		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
+	}
+	if got := globalQueue.tasks[0].Target; got != "support-bundle" {
+		t.Fatalf("target=%q want support-bundle", got)
+	}
+}
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -4,6 +4,8 @@ import (
 	"database/sql"
 	"encoding/csv"
 	"io"
+	"os"
+	"path/filepath"
 	"strconv"
 	"time"

@@ -20,6 +22,9 @@ type MetricsDB struct {

 // openMetricsDB opens (or creates) the metrics database at the given path.
 func openMetricsDB(path string) (*MetricsDB, error) {
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return nil, err
+	}
 	db, err := sql.Open("sqlite", path+"?_journal=WAL&_busy_timeout=5000")
 	if err != nil {
 		return nil, err
@@ -132,7 +137,7 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	defer rows.Close()

 	type sysRow struct {
-		ts          int64
+		ts            int64
 		cpu, mem, pwr float64
 	}
 	var sysRows []sysRow
@@ -156,7 +161,10 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	maxTS := sysRows[len(sysRows)-1].ts

 	// Load GPU rows in range
-	type gpuKey struct{ ts int64; idx int }
+	type gpuKey struct {
+		ts  int64
+		idx int
+	}
 	gpuData := map[gpuKey]platform.GPUMetricRow{}
 	gRows, err := m.db.Query(
 		`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
@@ -174,7 +182,10 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	}

 	// Load fan rows in range
-	type fanKey struct{ ts int64; name string }
+	type fanKey struct {
+		ts   int64
+		name string
+	}
 	fanData := map[fanKey]float64{}
 	fRows, err := m.db.Query(
 		`SELECT ts,name,rpm FROM fan_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
@@ -192,7 +203,10 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	}

 	// Load temp rows in range
-	type tempKey struct{ ts int64; name string }
+	type tempKey struct {
+		ts   int64
+		name string
+	}
 	tempData := map[tempKey]platform.TempReading{}
 	tRows, err := m.db.Query(
 		`SELECT ts,name,grp,celsius FROM temp_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -289,7 +289,7 @@ func renderAudit() string {
 func renderHardwareSummaryCard(opts HandlerOptions) string {
 	data, err := loadSnapshot(opts.AuditPath)
 	if err != nil {
-		return `<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body"><span class="badge badge-unknown">No audit data</span></div></div>`
+		return `<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body"><button class="btn btn-primary" onclick="auditModalRun()">&#9654; Run Audit</button></div></div>`
 	}
 	// Parse just enough fields for the summary banner
 	var snap struct {
@@ -926,7 +926,7 @@ func renderExport(exportDir string) string {
 	return `<div class="grid2">
 <div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
 <p style="font-size:13px;color:var(--muted);margin-bottom:12px">Creates a tar.gz archive of all audit files, SAT results, and logs.</p>
-<a class="btn btn-primary" href="/export/support.tar.gz">&#8595; Download Support Bundle</a>
+` + renderSupportBundleInline() + `
 </div></div>
 <div class="card"><div class="card-head">Export Files</div><div class="card-body">
 <table><tr><th>File</th></tr>` + rows.String() + `</table>
@@ -1024,6 +1024,77 @@ func listExportFiles(exportDir string) ([]string, error) {
 	return entries, nil
 }

+func renderSupportBundleInline() string {
+	return `<button id="support-bundle-btn" class="btn btn-primary" onclick="supportBundleBuild()">Build Support Bundle</button>
+<a id="support-bundle-download" class="btn btn-secondary" href="/export/support.tar.gz" style="display:none">&#8595; Download Support Bundle</a>
+<div id="support-bundle-status" style="margin-top:12px;font-size:13px;color:var(--muted)">No support bundle built in this session.</div>
+<div id="support-bundle-log" class="terminal" style="display:none;margin-top:12px;max-height:260px"></div>
+<script>
+(function(){
+var _supportBundleES = null;
+window.supportBundleBuild = function() {
+  var btn = document.getElementById('support-bundle-btn');
+  var status = document.getElementById('support-bundle-status');
+  var log = document.getElementById('support-bundle-log');
+  var download = document.getElementById('support-bundle-download');
+  if (_supportBundleES) {
+    _supportBundleES.close();
+    _supportBundleES = null;
+  }
+  btn.disabled = true;
+  btn.textContent = 'Building...';
+  status.textContent = 'Queueing support bundle task...';
+  status.style.color = 'var(--muted)';
+  log.style.display = '';
+  log.textContent = '';
+  download.style.display = 'none';
+
+  fetch('/api/export/bundle', {method:'POST'}).then(function(r){
+    return r.json().then(function(j){
+      if (!r.ok) throw new Error(j.error || r.statusText);
+      return j;
+    });
+  }).then(function(data){
+    if (!data.task_id) throw new Error('missing task id');
+    status.textContent = 'Building support bundle...';
+    _supportBundleES = new EventSource('/api/tasks/' + data.task_id + '/stream');
+    _supportBundleES.onmessage = function(e) {
+      log.textContent += e.data + '\n';
+      log.scrollTop = log.scrollHeight;
+    };
+    _supportBundleES.addEventListener('done', function(e) {
+      _supportBundleES.close();
+      _supportBundleES = null;
+      btn.disabled = false;
+      btn.textContent = 'Build Support Bundle';
+      if (e.data) {
+        status.textContent = 'Error: ' + e.data;
+        status.style.color = 'var(--crit-fg)';
+        return;
+      }
+      status.textContent = 'Support bundle ready.';
+      status.style.color = 'var(--ok-fg)';
+      download.style.display = '';
+    });
+    _supportBundleES.onerror = function() {
+      if (_supportBundleES) _supportBundleES.close();
+      _supportBundleES = null;
+      btn.disabled = false;
+      btn.textContent = 'Build Support Bundle';
+      status.textContent = 'Support bundle stream disconnected.';
+      status.style.color = 'var(--crit-fg)';
+    };
+  }).catch(function(e){
+    btn.disabled = false;
+    btn.textContent = 'Build Support Bundle';
+    status.textContent = 'Error: ' + e;
+    status.style.color = 'var(--crit-fg)';
+  });
+};
+})();
+</script>`
+}
+
 // ── Display Resolution ────────────────────────────────────────────────────────

 func renderDisplayInline() string {
@@ -1113,7 +1184,7 @@ function installToRAM() {

 <div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
 <p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
-<a class="btn btn-primary" href="/export/support.tar.gz">&#8595; Download Support Bundle</a>
+` + renderSupportBundleInline() + `
 </div></div>

 <div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">&#8635; Check</button></div>
@@ -1292,21 +1363,23 @@ function installStart() {
    headers: {'Content-Type': 'application/json'},
    body: JSON.stringify({device: _installSelected.device})
  }).then(function(r){
-    if (r.status === 204) {
-      installStreamLog();
-    } else {
-      return r.json().then(function(j){ throw new Error(j.error || r.statusText); });
-    }
+    return r.json().then(function(j){
+      if (!r.ok) throw new Error(j.error || r.statusText);
+      return j;
+    });
+  }).then(function(j){
+    if (!j.task_id) throw new Error('missing task id');
+    installStreamLog(j.task_id);
  }).catch(function(e){
    status.textContent = 'Error: ' + e;
    status.style.color = 'var(--crit-fg)';
  });
 }

-function installStreamLog() {
+function installStreamLog(taskId) {
  var term = document.getElementById('install-terminal');
  var status = document.getElementById('install-status');
-  var es = new EventSource('/api/install/stream');
+  var es = new EventSource('/api/tasks/' + taskId + '/stream');
  es.onmessage = function(e) {
    term.textContent += e.data + '\n';
    term.scrollTop = term.scrollHeight;
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"html"
+	"log/slog"
 	"mime"
 	"net/http"
 	"os"
@@ -143,9 +144,6 @@ type handler struct {
 	latest   *platform.LiveMetricSample
 	// metrics persistence (nil if DB unavailable)
 	metricsDB *MetricsDB
-	// install job (at most one at a time)
-	installJob *jobState
-	installMu  sync.Mutex
 	// pending network change (rollback on timeout)
 	pendingNet   *pendingNetChange
 	pendingNetMu sync.Mutex
@@ -180,7 +178,11 @@ func NewHandler(opts HandlerOptions) http.Handler {
 			if len(samples) > 0 {
 				h.setLatestMetric(samples[len(samples)-1])
 			}
+		} else {
+			slog.Warn("metrics history unavailable", "path", metricsDBPath, "err", err)
 		}
+	} else {
+		slog.Warn("metrics db disabled", "path", metricsDBPath, "err", err)
 	}
 	h.startMetricsCollector()

@@ -266,7 +268,6 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	// Install
 	mux.HandleFunc("GET /api/install/disks", h.handleAPIInstallDisks)
 	mux.HandleFunc("POST /api/install/run", h.handleAPIInstallRun)
-	mux.HandleFunc("GET /api/install/stream", h.handleAPIInstallStream)

 	// Metrics — SSE stream of live sensor data + server-side SVG charts + CSV export
 	mux.HandleFunc("GET /api/metrics/stream", h.handleAPIMetricsStream)
@@ -366,9 +367,13 @@ func (h *handler) handleRuntimeHealthJSON(w http.ResponseWriter, r *http.Request
 }

 func (h *handler) handleSupportBundleDownload(w http.ResponseWriter, r *http.Request) {
-	archive, err := app.BuildSupportBundle(h.opts.ExportDir)
+	archive, err := app.LatestSupportBundlePath()
 	if err != nil {
-		http.Error(w, fmt.Sprintf("build support bundle: %v", err), http.StatusInternalServerError)
+		if errors.Is(err, os.ErrNotExist) {
+			http.Error(w, "support bundle not built yet", http.StatusNotFound)
+			return
+		}
+		http.Error(w, fmt.Sprintf("locate support bundle: %v", err), http.StatusInternalServerError)
 		return
 	}
 	w.Header().Set("Cache-Control", "no-store")
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -136,6 +136,33 @@ func TestRootRendersDashboard(t *testing.T) {
 	}
 }

+func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
+	dir := t.TempDir()
+	exportDir := filepath.Join(dir, "export")
+	if err := os.MkdirAll(exportDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	handler := NewHandler(HandlerOptions{
+		Title:     "Bee Hardware Audit",
+		AuditPath: filepath.Join(dir, "missing-audit.json"),
+		ExportDir: exportDir,
+	})
+
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d", rec.Code)
+	}
+	body := rec.Body.String()
+	if !strings.Contains(body, `Run Audit`) {
+		t.Fatalf("dashboard missing run audit button: %s", body)
+	}
+	if strings.Contains(body, `No audit data`) {
+		t.Fatalf("dashboard still shows empty audit badge: %s", body)
+	}
+}
+
 func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
@@ -232,6 +259,17 @@ func TestSupportBundleEndpointReturnsArchive(t *testing.T) {
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.log"), []byte("audit log"), 0644); err != nil {
 		t.Fatal(err)
 	}
+	archive, err := os.CreateTemp(os.TempDir(), "bee-support-server-test-*.tar.gz")
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { _ = os.Remove(archive.Name()) })
+	if _, err := archive.WriteString("support-bundle"); err != nil {
+		t.Fatal(err)
+	}
+	if err := archive.Close(); err != nil {
+		t.Fatal(err)
+	}

 	handler := NewHandler(HandlerOptions{ExportDir: exportDir})
 	rec := httptest.NewRecorder()
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -6,8 +6,10 @@ import (
 	"fmt"
 	"net/http"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"sort"
+	"strings"
 	"sync"
 	"time"

@@ -39,6 +41,7 @@ var taskNames = map[string]string{
 	"sat-stress":      "SAT Stress (stressapptest)",
 	"platform-stress": "Platform Thermal Cycling",
 	"audit":           "Audit",
+	"support-bundle":  "Support Bundle",
 	"install":         "Install to Disk",
 	"install-to-ram":  "Install to RAM",
 }
@@ -51,6 +54,33 @@ var burnNames = map[string]string{
 	"amd":    "AMD GPU Burn-in",
 }

+func nvidiaStressTaskName(loader string) string {
+	switch strings.TrimSpace(strings.ToLower(loader)) {
+	case platform.NvidiaStressLoaderJohn:
+		return "NVIDIA GPU Stress (John/OpenCL)"
+	case platform.NvidiaStressLoaderNCCL:
+		return "NVIDIA GPU Stress (NCCL)"
+	default:
+		return "NVIDIA GPU Stress (bee-gpu-burn)"
+	}
+}
+
+func taskDisplayName(target, profile, loader string) string {
+	name := taskNames[target]
+	if profile != "" {
+		if n, ok := burnNames[target]; ok {
+			name = n
+		}
+	}
+	if target == "nvidia-stress" {
+		name = nvidiaStressTaskName(loader)
+	}
+	if name == "" {
+		name = target
+	}
+	return name
+}
+
 // Task represents one unit of work in the queue.
 type Task struct {
 	ID        string     `json:"id"`
@@ -185,6 +215,10 @@ var (
 	runSATStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 		return a.RunSATStressPackCtx(ctx, baseDir, durationSec, logFunc)
 	}
+	buildSupportBundle = app.BuildSupportBundle
+	installCommand     = func(ctx context.Context, device string, logPath string) *exec.Cmd {
+		return exec.CommandContext(ctx, "bee-install", device, logPath)
+	}
 )

 // enqueue adds a task to the queue and notifies the worker.
@@ -382,9 +416,9 @@ func setCPUGovernor(governor string) {

 // runTask executes the work for a task, writing output to j.
 func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
-	if q.opts == nil || q.opts.App == nil {
-		j.append("ERROR: app not configured")
-		j.finish("app not configured")
+	if q.opts == nil {
+		j.append("ERROR: handler options not configured")
+		j.finish("handler options not configured")
 		return
 	}
 	a := q.opts.App
@@ -401,6 +435,10 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {

 	switch t.Target {
 	case "nvidia":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		diagLevel := t.params.DiagLevel
 		if t.params.BurnProfile != "" && diagLevel <= 0 {
 			diagLevel = resolveBurnPreset(t.params.BurnProfile).NvidiaDiag
@@ -418,6 +456,10 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			archive, err = a.RunNvidiaAcceptancePack("", j.append)
 		}
 	case "nvidia-stress":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
@@ -429,10 +471,22 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 		}, j.append)
 	case "memory":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		archive, err = runMemoryAcceptancePackCtx(a, ctx, "", j.append)
 	case "storage":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		archive, err = runStorageAcceptancePackCtx(a, ctx, "", j.append)
 	case "cpu":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
@@ -440,35 +494,68 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if dur <= 0 {
 			dur = 60
 		}
+		j.append(fmt.Sprintf("CPU stress duration: %ds", dur))
 		archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
 	case "amd":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
 	case "amd-mem":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		archive, err = runAMDMemIntegrityPackCtx(a, ctx, "", j.append)
 	case "amd-bandwidth":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		archive, err = runAMDMemBandwidthPackCtx(a, ctx, "", j.append)
 	case "amd-stress":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runAMDStressPackCtx(a, ctx, "", dur, j.append)
 	case "memory-stress":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runMemoryStressPackCtx(a, ctx, "", dur, j.append)
 	case "sat-stress":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
 	case "platform-stress":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		opts := resolvePlatformStressPreset(t.params.BurnProfile)
 		archive, err = a.RunPlatformStress(ctx, "", opts, j.append)
 	case "audit":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		result, e := a.RunAuditNow(q.opts.RuntimeMode)
 		if e != nil {
 			err = e
@@ -477,7 +564,22 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 				j.append(line)
 			}
 		}
+	case "support-bundle":
+		j.append("Building support bundle...")
+		archive, err = buildSupportBundle(q.opts.ExportDir)
+	case "install":
+		if strings.TrimSpace(t.params.Device) == "" {
+			err = fmt.Errorf("device is required")
+			break
+		}
+		installLogPath := platform.InstallLogPath(t.params.Device)
+		j.append("Install log: " + installLogPath)
+		err = streamCmdJob(j, installCommand(ctx, t.params.Device, installLogPath))
 	case "install-to-ram":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
 		err = a.RunInstallToRAM(ctx, j.append)
 	default:
 		j.append("ERROR: unknown target: " + t.Target)
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -3,7 +3,9 @@ package webui
 import (
 	"context"
 	"os"
+	"os/exec"
 	"path/filepath"
+	"strings"
 	"testing"
 	"time"

@@ -95,9 +97,24 @@ func TestResolveBurnPreset(t *testing.T) {
 	}
 }

-func TestRunTaskHonorsCancel(t *testing.T) {
-	t.Parallel()
+func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
+	tests := []struct {
+		loader string
+		want   string
+	}{
+		{loader: "", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
+		{loader: "builtin", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
+		{loader: "john", want: "NVIDIA GPU Stress (John/OpenCL)"},
+		{loader: "nccl", want: "NVIDIA GPU Stress (NCCL)"},
+	}
+	for _, tc := range tests {
+		if got := taskDisplayName("nvidia-stress", "acceptance", tc.loader); got != tc.want {
+			t.Fatalf("taskDisplayName(loader=%q)=%q want %q", tc.loader, got, tc.want)
+		}
+	}
+}

+func TestRunTaskHonorsCancel(t *testing.T) {
 	blocked := make(chan struct{})
 	released := make(chan struct{})
 	aRun := func(_ any, ctx context.Context, _ string, _ int, _ func(string)) (string, error) {
@@ -154,3 +171,111 @@ func TestRunTaskHonorsCancel(t *testing.T) {
 		t.Fatal("runTask did not return after cancel")
 	}
 }
+
+func TestRunTaskUsesBurnProfileDurationForCPU(t *testing.T) {
+	var gotDuration int
+	q := &taskQueue{
+		opts: &HandlerOptions{App: &app.App{}},
+	}
+	tk := &Task{
+		ID:        "cpu-burn-1",
+		Name:      "CPU Burn-in",
+		Target:    "cpu",
+		Status:    TaskRunning,
+		CreatedAt: time.Now(),
+		params:    taskParams{BurnProfile: "smoke"},
+	}
+	j := &jobState{}
+
+	orig := runCPUAcceptancePackCtx
+	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, durationSec int, _ func(string)) (string, error) {
+		gotDuration = durationSec
+		return "/tmp/cpu-burn.tar.gz", nil
+	}
+	defer func() { runCPUAcceptancePackCtx = orig }()
+
+	q.runTask(tk, j, context.Background())
+
+	if gotDuration != 5*60 {
+		t.Fatalf("duration=%d want %d", gotDuration, 5*60)
+	}
+}
+
+func TestRunTaskBuildsSupportBundleWithoutApp(t *testing.T) {
+	dir := t.TempDir()
+	q := &taskQueue{
+		opts: &HandlerOptions{ExportDir: dir},
+	}
+	tk := &Task{
+		ID:        "support-bundle-1",
+		Name:      "Support Bundle",
+		Target:    "support-bundle",
+		Status:    TaskRunning,
+		CreatedAt: time.Now(),
+	}
+	j := &jobState{}
+
+	var gotExportDir string
+	orig := buildSupportBundle
+	buildSupportBundle = func(exportDir string) (string, error) {
+		gotExportDir = exportDir
+		return filepath.Join(exportDir, "bundle.tar.gz"), nil
+	}
+	defer func() { buildSupportBundle = orig }()
+
+	q.runTask(tk, j, context.Background())
+
+	if gotExportDir != dir {
+		t.Fatalf("exportDir=%q want %q", gotExportDir, dir)
+	}
+	if j.err != "" {
+		t.Fatalf("unexpected error: %q", j.err)
+	}
+	if !strings.Contains(strings.Join(j.lines, "\n"), "Archive: "+filepath.Join(dir, "bundle.tar.gz")) {
+		t.Fatalf("lines=%v", j.lines)
+	}
+}
+
+func TestRunTaskInstallUsesSharedCommandStreaming(t *testing.T) {
+	q := &taskQueue{
+		opts: &HandlerOptions{},
+	}
+	tk := &Task{
+		ID:        "install-1",
+		Name:      "Install to Disk",
+		Target:    "install",
+		Status:    TaskRunning,
+		CreatedAt: time.Now(),
+		params:    taskParams{Device: "/dev/sda"},
+	}
+	j := &jobState{}
+
+	var gotDevice string
+	var gotLogPath string
+	orig := installCommand
+	installCommand = func(ctx context.Context, device string, logPath string) *exec.Cmd {
+		gotDevice = device
+		gotLogPath = logPath
+		return exec.CommandContext(ctx, "sh", "-c", "printf 'line1\nline2\n'")
+	}
+	defer func() { installCommand = orig }()
+
+	q.runTask(tk, j, context.Background())
+
+	if gotDevice != "/dev/sda" {
+		t.Fatalf("device=%q want /dev/sda", gotDevice)
+	}
+	if gotLogPath == "" {
+		t.Fatal("expected install log path")
+	}
+	logs := strings.Join(j.lines, "\n")
+	if !strings.Contains(logs, "Install log: ") {
+		t.Fatalf("missing install log line: %v", j.lines)
+	}
+	if !strings.Contains(logs, "line1") || !strings.Contains(logs, "line2") {
+		t.Fatalf("missing streamed output: %v", j.lines)
+	}
+	if j.err != "" {
+		t.Fatalf("unexpected error: %q", j.err)
+	}
+}
--- a/2
+++ b/2
--- a/bible-local/decisions/2026-04-01-memtest-build-strategy.md
+++ b/bible-local/decisions/2026-04-01-memtest-build-strategy.md
@@ -0,0 +1,117 @@
+# Decision: Treat memtest as explicit ISO content, not as trusted live-build magic
+
+**Date:** 2026-04-01
+**Status:** active
+
+## Context
+
+We have already iterated on `memtest` multiple times and kept cycling between the same ideas.
+The commit history shows several distinct attempts:
+
+- `f91bce8` — fixed Bookworm memtest file names to `memtest86+x64.bin` / `memtest86+x64.efi`
+- `5857805` — added a binary hook to copy memtest files from the build tree into the ISO root
+- `f96b149` — added fallback extraction from the cached `.deb` when `chroot/boot/` stayed empty
+- `d43a9ae` — removed the custom hook and switched back to live-build built-in memtest integration
+- `60cb8f8` — restored explicit memtest menu entries and added ISO validation
+- `3dbc218` / `3869788` — added archived build logs and better memtest diagnostics
+
+Current evidence from the archived `easy-bee-nvidia-v3.14-amd64` logs dated 2026-04-01:
+
+- `lb binary_memtest` does run and installs `memtest86+`
+- but the final ISO still does **not** contain `boot/memtest86+x64.bin`
+- the final ISO also does **not** contain memtest menu entries in `boot/grub/grub.cfg` or `isolinux/live.cfg`
+
+So the assumption "live-build built-in memtest integration is enough on this stack" is currently false for this project until proven otherwise by a real built ISO.
+
+Additional evidence from the archived `easy-bee-nvidia-v3.17-dirty-amd64` logs dated 2026-04-01:
+
+- the build now completes successfully because memtest is non-blocking by default
+- `lb binary_memtest` still runs and installs `memtest86+`
+- the project-owned hook `config/hooks/normal/9100-memtest.hook.binary` does execute
+- but it executes too early for its current target paths:
+  - `binary/boot/grub/grub.cfg` is still missing at hook time
+  - `binary/isolinux/live.cfg` is still missing at hook time
+  - memtest binaries are also still absent in `binary/boot/`
+- later in the build, live-build does create intermediate bootloader configs with memtest lines in the workdir
+- but the final ISO still lacks memtest binaries and still lacks memtest lines in extracted ISO `boot/grub/grub.cfg` and `isolinux/live.cfg`
+
+So the assumption "the current normal binary hook path is late enough to patch final memtest artifacts" is also false.
+
+## Known Failed Attempts
+
+These approaches were already tried and should not be repeated blindly:
+
+1. Built-in live-build memtest only.
+Reason it failed:
+- `lb binary_memtest` runs, but the final ISO still misses memtest binaries and menu entries.
+
+2. Fixing only the memtest file names for Debian Bookworm.
+Reason it failed:
+- correct file names alone do not make the files appear in the final ISO.
+
+3. Copying memtest from `chroot/boot/` into `binary/boot/` via a binary hook.
+Reason it failed:
+- in this stack `chroot/boot/` is often empty for memtest payloads at the relevant time.
+
+4. Fallback extraction from cached `memtest86+` `.deb`.
+Reason it failed:
+- this was explored already and was not enough to stabilize the final ISO path end-to-end.
+
+5. Restoring explicit memtest menu entries in source bootloader templates only.
+Reason it failed:
+- memtest lines in source templates or intermediate workdir configs do not guarantee the final ISO contains them.
+
+6. Patching `binary/boot/grub/grub.cfg` and `binary/isolinux/live.cfg` from the current `config/hooks/normal/9100-memtest.hook.binary`.
+Reason it failed:
+- the hook runs before those files exist, so the hook cannot patch them there.
+
+## What This Means
+
+When revisiting memtest later, start from the constraints above rather than retrying the same patterns:
+
+- do not assume the built-in memtest stage is sufficient
+- do not assume `chroot/boot/` will contain memtest payloads
+- do not assume source bootloader templates are the last writer of final ISO configs
+- do not assume the current normal binary hook timing is late enough for final patching
+
+Any future memtest fix must explicitly identify:
+
+- where the memtest binaries are reliably available at build time
+- which exact build stage writes the final bootloader configs that land in the ISO
+- and a post-build proof from a real ISO, not only from intermediate workdir files
+
+## Decision
+
+For `bee`, memtest must be treated as an explicit ISO artifact with explicit post-build validation.
+
+Project rules from now on:
+
+- Do **not** trust `--memtest memtest86+` by itself.
+- A memtest implementation is considered valid only if the produced ISO actually contains:
+  - `boot/memtest86+x64.bin`
+  - `boot/memtest86+x64.efi`
+  - a GRUB menu entry
+  - an isolinux menu entry
+- If live-build built-in integration does not produce those artifacts, use an explicit project-owned mechanism such as:
+  - a binary hook copying files into `binary/boot/`
+  - extraction from the cached `memtest86+` `.deb`
+  - another deterministic build-time copy step
+- Do **not** remove such explicit logic later unless a fresh real ISO build proves that built-in integration alone produces all required files and menu entries.
+
+Current implementation direction:
+
+- keep the live-build memtest stage enabled if it helps package acquisition
+- do not rely on the current early `binary_hooks` timing for final patching
+- prefer a post-`lb build` recovery step in `build.sh` that:
+  - patches the fully materialized `LB_DIR/binary` tree
+  - injects memtest binaries there
+  - ensures final bootloader entries there
+  - reruns late binary stages (`binary_checksums`, `binary_iso`, `binary_zsync`) after the patch
+
+## Consequences
+
+- Future memtest changes must begin by reading this ADR and the commits listed above.
+- Future memtest changes must also begin by reading the failed-attempt list above.
+- We should stop re-introducing "prefer built-in live-build memtest" as a default assumption without new evidence.
+- Memtest validation in `build.sh` is not optional; it is the acceptance gate that prevents another silent regression.
+- If we change memtest strategy again, we must update this ADR with the exact build evidence that justified the change.
--- a/bible-local/decisions/README.md
+++ b/bible-local/decisions/README.md
@@ -5,3 +5,4 @@ One file per decision, named `YYYY-MM-DD-short-topic.md`.
 | Date | Decision | Status |
 |---|---|---|
 | 2026-03-05 | Use NVIDIA proprietary driver | active |
+| 2026-04-01 | Treat memtest as explicit ISO content | active |
--- a/bible-local/docs/iso-build-rules.md
+++ b/bible-local/docs/iso-build-rules.md
@@ -17,6 +17,39 @@ This applies to:

 ## Memtest rule

-Prefer live-build's built-in memtest integration over custom hooks or hardcoded
-bootloader paths. If you ever need to reference memtest files manually, verify
-the exact package file list first for the target Debian release.
+Do not assume live-build's built-in memtest integration is sufficient for `bee`.
+We already tried that path and regressed again on 2026-04-01: `lb binary_memtest`
+ran, but the final ISO still lacked memtest binaries and menu entries.
+
+For this project, memtest is accepted only when the produced ISO actually
+contains all of the following:
+
+- `boot/memtest86+x64.bin`
+- `boot/memtest86+x64.efi`
+- a memtest entry in `boot/grub/grub.cfg`
+- a memtest entry in `isolinux/live.cfg`
+
+Rules:
+
+- Keep explicit post-build memtest validation in `build.sh`.
+- If built-in integration does not produce the artifacts above, use a
+  deterministic project-owned copy/extract step instead of hoping live-build
+  will "start working".
+- Do not switch back to built-in-only memtest without fresh build evidence from
+  a real ISO.
+- If you reference memtest files manually, verify the exact package file list
+  first for the target Debian release.
+
+Known bad loops for this repository:
+
+- Do not retry built-in-only memtest without new evidence. We already proved
+  that `lb binary_memtest` can run while the final ISO still has no memtest.
+- Do not assume fixing memtest file names is enough. Correct names did not fix
+  the final artifact path.
+- Do not assume `chroot/boot/` contains memtest payloads at the time hooks run.
+- Do not assume source `grub.cfg` / `live.cfg.in` are the final writers of ISO
+  bootloader configs.
+- Do not assume the current `config/hooks/normal/9100-memtest.hook.binary`
+  timing is late enough to patch final `binary/boot/grub/grub.cfg` or
+  `binary/isolinux/live.cfg`; logs from 2026-04-01 showed those files were not
+  present yet when the hook executed.
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -38,6 +38,7 @@ export BEE_GPU_VENDOR

 . "${BUILDER_DIR}/VERSIONS"
 export PATH="$PATH:/usr/local/go/bin"
+: "${BEE_REQUIRE_MEMTEST:=0}"

 # Allow git to read the bind-mounted repo (different UID inside container).
 git config --global safe.directory "${REPO_ROOT}"
@@ -111,63 +112,546 @@ resolve_iso_version() {
    resolve_audit_version
 }

+iso_list_files() {
+    iso_path="$1"
+
+    if command -v bsdtar >/dev/null 2>&1; then
+        bsdtar -tf "$iso_path"
+        return $?
+    fi
+
+    if command -v xorriso >/dev/null 2>&1; then
+        xorriso -indev "$iso_path" -find / -type f -print 2>/dev/null | sed 's#^/##'
+        return $?
+    fi
+
+    return 127
+}
+
+iso_extract_file() {
+    iso_path="$1"
+    iso_member="$2"
+
+    if command -v bsdtar >/dev/null 2>&1; then
+        bsdtar -xOf "$iso_path" "$iso_member"
+        return $?
+    fi
+
+    if command -v xorriso >/dev/null 2>&1; then
+        xorriso -osirrox on -indev "$iso_path" -cat "/$iso_member" 2>/dev/null
+        return $?
+    fi
+
+    return 127
+}
+
+require_iso_reader() {
+    command -v bsdtar >/dev/null 2>&1 && return 0
+    command -v xorriso >/dev/null 2>&1 && return 0
+    memtest_fail "ISO reader is required for validation/debug (expected bsdtar or xorriso)" "${1:-}"
+}
+
+dump_memtest_debug() {
+    phase="$1"
+    lb_dir="${2:-}"
+    iso_path="${3:-}"
+    phase_slug="$(printf '%s' "${phase}" | tr ' /' '__')"
+    memtest_log="${LOG_DIR:-}/memtest-${phase_slug}.log"
+
+    (
+        echo "=== memtest debug: ${phase} ==="
+
+        echo "-- auto/config --"
+        if [ -f "${BUILDER_DIR}/auto/config" ]; then
+            grep -n -- '--memtest' "${BUILDER_DIR}/auto/config" || echo "  (no --memtest line found)"
+        else
+            echo "  (missing ${BUILDER_DIR}/auto/config)"
+        fi
+
+        echo "-- source bootloader templates --"
+        for cfg in \
+            "${BUILDER_DIR}/config/bootloaders/grub-pc/grub.cfg" \
+            "${BUILDER_DIR}/config/bootloaders/isolinux/live.cfg.in"; do
+            if [ -f "$cfg" ]; then
+                echo "  file: $cfg"
+                grep -n 'Memory Test\|memtest' "$cfg" || echo "    (no memtest lines)"
+            fi
+        done
+
+        echo "-- source binary hooks --"
+        for hook in \
+            "${BUILDER_DIR}/config/hooks/normal/9100-memtest.hook.binary"; do
+            if [ -f "$hook" ]; then
+                echo "  hook: $hook"
+            else
+                echo "  (missing $hook)"
+            fi
+        done
+
+        if [ -n "$lb_dir" ] && [ -d "$lb_dir" ]; then
+            echo "-- live-build workdir package lists --"
+            for pkg in \
+                "$lb_dir/config/package-lists/bee.list.chroot" \
+                "$lb_dir/config/package-lists/bee-gpu.list.chroot" \
+                "$lb_dir/config/package-lists/bee-nvidia.list.chroot"; do
+                if [ -f "$pkg" ]; then
+                    echo "  file: $pkg"
+                    grep -n 'memtest' "$pkg" || echo "    (no memtest lines)"
+                fi
+            done
+
+            echo "-- live-build chroot/boot --"
+            if [ -d "$lb_dir/chroot/boot" ]; then
+                find "$lb_dir/chroot/boot" -maxdepth 1 -name 'memtest*' -print | sed 's/^/  /' || true
+            else
+                echo "  (missing $lb_dir/chroot/boot)"
+            fi
+
+            echo "-- live-build binary/boot --"
+            if [ -d "$lb_dir/binary/boot" ]; then
+                find "$lb_dir/binary/boot" -maxdepth 1 -name 'memtest*' -print | sed 's/^/  /' || true
+            else
+                echo "  (missing $lb_dir/binary/boot)"
+            fi
+
+            echo "-- live-build binary grub cfg --"
+            if [ -f "$lb_dir/binary/boot/grub/grub.cfg" ]; then
+                grep -n 'Memory Test\|memtest' "$lb_dir/binary/boot/grub/grub.cfg" || echo "  (no memtest lines)"
+            else
+                echo "  (missing $lb_dir/binary/boot/grub/grub.cfg)"
+            fi
+
+            echo "-- live-build binary isolinux cfg --"
+            if [ -f "$lb_dir/binary/isolinux/live.cfg" ]; then
+                grep -n 'Memory Test\|memtest' "$lb_dir/binary/isolinux/live.cfg" || echo "  (no memtest lines)"
+            else
+                echo "  (missing $lb_dir/binary/isolinux/live.cfg)"
+            fi
+
+            echo "-- live-build package cache --"
+            if [ -d "$lb_dir/cache/packages.chroot" ]; then
+                find "$lb_dir/cache/packages.chroot" -maxdepth 1 -name 'memtest86+*.deb' -print | sed 's/^/  /' || true
+            else
+                echo "  (missing $lb_dir/cache/packages.chroot)"
+            fi
+        fi
+
+        if [ -n "$iso_path" ] && [ -f "$iso_path" ]; then
+            echo "-- ISO memtest files --"
+            iso_list_files "$iso_path" | grep 'memtest' | sed 's/^/  /' || echo "  (no memtest files in ISO)"
+
+            echo "-- ISO GRUB memtest lines --"
+            iso_extract_file "$iso_path" boot/grub/grub.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo "  (no memtest lines in boot/grub/grub.cfg)"
+
+            echo "-- ISO isolinux memtest lines --"
+            iso_extract_file "$iso_path" isolinux/live.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo "  (no memtest lines in isolinux/live.cfg)"
+        fi
+
+        echo "=== end memtest debug: ${phase} ==="
+    ) | {
+        if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ]; then
+            tee "${memtest_log}"
+        else
+            cat
+        fi
+    }
+}
+
+memtest_fail() {
+    msg="$1"
+    iso_path="${2:-}"
+    level="WARNING"
+    if [ "${BEE_REQUIRE_MEMTEST:-0}" = "1" ]; then
+        level="ERROR"
+    fi
+    echo "${level}: ${msg}" >&2
+    dump_memtest_debug "failure" "${LB_DIR:-}" "$iso_path" >&2
+    if [ "${BEE_REQUIRE_MEMTEST:-0}" = "1" ]; then
+        exit 1
+    fi
+    return 0
+}
+
+iso_memtest_present() {
+    iso_path="$1"
+
+    [ -f "$iso_path" ] || return 1
+
+    if command -v bsdtar >/dev/null 2>&1; then
+        :
+    elif command -v xorriso >/dev/null 2>&1; then
+        :
+    else
+        return 1
+    fi
+
+    iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || return 1
+    iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || return 1
+
+    grub_cfg="$(mktemp)"
+    isolinux_cfg="$(mktemp)"
+
+    iso_extract_file "$iso_path" boot/grub/grub.cfg > "$grub_cfg" 2>/dev/null || {
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 1
+    }
+    iso_extract_file "$iso_path" isolinux/live.cfg > "$isolinux_cfg" 2>/dev/null || {
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 1
+    }
+
+    grep -q 'Memory Test (memtest86+)' "$grub_cfg" || {
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 1
+    }
+    grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || {
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 1
+    }
+    grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || {
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 1
+    }
+    grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || {
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 1
+    }
+    grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || {
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 1
+    }
+
+    rm -f "$grub_cfg" "$isolinux_cfg"
+    return 0
+}
+
 validate_iso_memtest() {
    iso_path="$1"
    echo "=== validating memtest in ISO ==="

-    [ -f "$iso_path" ] || { echo "ERROR: ISO not found for validation: $iso_path" >&2; exit 1; }
-    command -v bsdtar >/dev/null 2>&1 || { echo "ERROR: bsdtar is required for ISO validation" >&2; exit 1; }
-
-    bsdtar -tf "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || {
-        echo "ERROR: memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" >&2
-        exit 1
+    [ -f "$iso_path" ] || {
+        memtest_fail "ISO not found for validation: $iso_path" "$iso_path"
+        return 0
    }
-    bsdtar -tf "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || {
-        echo "ERROR: memtest EFI binary missing in ISO: boot/memtest86+x64.efi" >&2
-        exit 1
+    require_iso_reader "$iso_path" || return 0
+
+    iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || {
+        memtest_fail "memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" "$iso_path"
+        return 0
+    }
+    iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || {
+        memtest_fail "memtest EFI binary missing in ISO: boot/memtest86+x64.efi" "$iso_path"
+        return 0
    }

    grub_cfg="$(mktemp)"
    isolinux_cfg="$(mktemp)"
-    trap 'rm -f "$grub_cfg" "$isolinux_cfg"' EXIT INT TERM

-    bsdtar -xOf "$iso_path" boot/grub/grub.cfg > "$grub_cfg" || {
-        echo "ERROR: failed to extract boot/grub/grub.cfg from ISO" >&2
-        exit 1
+    iso_extract_file "$iso_path" boot/grub/grub.cfg > "$grub_cfg" || {
+        memtest_fail "failed to extract boot/grub/grub.cfg from ISO" "$iso_path"
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 0
    }
-    bsdtar -xOf "$iso_path" isolinux/live.cfg > "$isolinux_cfg" || {
-        echo "ERROR: failed to extract isolinux/live.cfg from ISO" >&2
-        exit 1
+    iso_extract_file "$iso_path" isolinux/live.cfg > "$isolinux_cfg" || {
+        memtest_fail "failed to extract isolinux/live.cfg from ISO" "$iso_path"
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 0
    }

    grep -q 'Memory Test (memtest86+)' "$grub_cfg" || {
-        echo "ERROR: GRUB menu entry for memtest is missing" >&2
-        exit 1
+        memtest_fail "GRUB menu entry for memtest is missing" "$iso_path"
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 0
    }
    grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || {
-        echo "ERROR: GRUB memtest EFI path is missing" >&2
-        exit 1
+        memtest_fail "GRUB memtest EFI path is missing" "$iso_path"
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 0
    }
    grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || {
-        echo "ERROR: GRUB memtest BIOS path is missing" >&2
-        exit 1
+        memtest_fail "GRUB memtest BIOS path is missing" "$iso_path"
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 0
    }
    grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || {
-        echo "ERROR: isolinux menu entry for memtest is missing" >&2
-        exit 1
+        memtest_fail "isolinux menu entry for memtest is missing" "$iso_path"
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 0
    }
    grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || {
-        echo "ERROR: isolinux memtest path is missing" >&2
-        exit 1
+        memtest_fail "isolinux memtest path is missing" "$iso_path"
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        return 0
    }

    rm -f "$grub_cfg" "$isolinux_cfg"
-    trap - EXIT INT TERM
    echo "=== memtest validation OK ==="
 }

+append_memtest_grub_entry() {
+    grub_cfg="$1"
+    [ -f "$grub_cfg" ] || return 1
+    grep -q 'Memory Test (memtest86+)' "$grub_cfg" && return 0
+    grep -q '### BEE MEMTEST ###' "$grub_cfg" && return 0
+
+    cat >> "$grub_cfg" <<'EOF'
+
+### BEE MEMTEST ###
+if [ "${grub_platform}" = "efi" ]; then
+    menuentry "Memory Test (memtest86+)" {
+        chainloader /boot/memtest86+x64.efi
+    }
+else
+    menuentry "Memory Test (memtest86+)" {
+        linux16 /boot/memtest86+x64.bin
+    }
+fi
+### /BEE MEMTEST ###
+EOF
+}
+
+append_memtest_isolinux_entry() {
+    isolinux_cfg="$1"
+    [ -f "$isolinux_cfg" ] || return 1
+    grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" && return 0
+    grep -q '### BEE MEMTEST ###' "$isolinux_cfg" && return 0
+
+    cat >> "$isolinux_cfg" <<'EOF'
+
+# ### BEE MEMTEST ###
+label memtest
+    menu label ^Memory Test (memtest86+)
+    linux /boot/memtest86+x64.bin
+# ### /BEE MEMTEST ###
+EOF
+}
+
+copy_memtest_from_deb() {
+    deb="$1"
+    dst_boot="$2"
+    tmpdir="$(mktemp -d)"
+
+    dpkg-deb -x "$deb" "$tmpdir"
+    for f in memtest86+x64.bin memtest86+x64.efi; do
+        if [ -f "$tmpdir/boot/$f" ]; then
+            cp "$tmpdir/boot/$f" "$dst_boot/$f"
+        fi
+    done
+    rm -rf "$tmpdir"
+}
+
+recover_iso_memtest() {
+    lb_dir="$1"
+    iso_path="$2"
+    binary_boot="$lb_dir/binary/boot"
+    grub_cfg="$lb_dir/binary/boot/grub/grub.cfg"
+    isolinux_cfg="$lb_dir/binary/isolinux/live.cfg"
+
+    echo "=== attempting memtest recovery in binary tree ==="
+
+    mkdir -p "$binary_boot"
+
+    for root in \
+        "$lb_dir/chroot/boot" \
+        "/boot"; do
+        for f in memtest86+x64.bin memtest86+x64.efi; do
+            if [ ! -f "$binary_boot/$f" ] && [ -f "$root/$f" ]; then
+                cp "$root/$f" "$binary_boot/$f"
+                echo "memtest recovery: copied $f from $root"
+            fi
+        done
+    done
+
+    if [ ! -f "$binary_boot/memtest86+x64.bin" ] || [ ! -f "$binary_boot/memtest86+x64.efi" ]; then
+        for dir in \
+            "$lb_dir/cache/packages.binary" \
+            "$lb_dir/cache/packages.chroot" \
+            "$lb_dir/chroot/var/cache/apt/archives" \
+            "${BEE_CACHE_DIR:-${DIST_DIR}/cache}/lb-packages" \
+            "/var/cache/apt/archives"; do
+            [ -d "$dir" ] || continue
+            deb="$(find "$dir" -maxdepth 1 -type f -name 'memtest86+*.deb' 2>/dev/null | head -1)"
+            [ -n "$deb" ] || continue
+            echo "memtest recovery: extracting payload from $deb"
+            copy_memtest_from_deb "$deb" "$binary_boot"
+            break
+        done
+    fi
+
+    if [ ! -f "$binary_boot/memtest86+x64.bin" ] || [ ! -f "$binary_boot/memtest86+x64.efi" ]; then
+        tmpdl="$(mktemp -d)"
+        if (
+            cd "$tmpdl" && apt-get download memtest86+ >/dev/null 2>&1
+        ); then
+            deb="$(find "$tmpdl" -maxdepth 1 -type f -name 'memtest86+*.deb' 2>/dev/null | head -1)"
+            if [ -n "$deb" ]; then
+                echo "memtest recovery: downloaded $deb"
+                copy_memtest_from_deb "$deb" "$binary_boot"
+            fi
+        fi
+        rm -rf "$tmpdl"
+    fi
+
+    if [ -f "$grub_cfg" ]; then
+        append_memtest_grub_entry "$grub_cfg" && echo "memtest recovery: ensured GRUB entry"
+    else
+        echo "memtest recovery: WARNING: missing $grub_cfg"
+    fi
+
+    if [ -f "$isolinux_cfg" ]; then
+        append_memtest_isolinux_entry "$isolinux_cfg" && echo "memtest recovery: ensured isolinux entry"
+    else
+        echo "memtest recovery: WARNING: missing $isolinux_cfg"
+    fi
+
+    run_optional_step_sh "rebuild live-build checksums after memtest recovery" "91-lb-checksums" "lb binary_checksums 2>&1"
+    run_optional_step_sh "rebuild ISO after memtest recovery" "92-lb-binary-iso" "rm -f '$iso_path' && lb binary_iso 2>&1"
+    run_optional_step_sh "rebuild zsync after memtest recovery" "93-lb-zsync" "lb binary_zsync 2>&1"
+}
+
 AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)"
 ISO_VERSION_EFFECTIVE="$(resolve_iso_version)"
+ISO_BASENAME="easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64"
+LOG_DIR="${DIST_DIR}/${ISO_BASENAME}.logs"
+LOG_ARCHIVE="${DIST_DIR}/${ISO_BASENAME}.logs.tar.gz"
+ISO_OUT="${DIST_DIR}/${ISO_BASENAME}.iso"
+LOG_OUT="${LOG_DIR}/build.log"
+
+cleanup_build_log() {
+    status="${1:-$?}"
+    trap - EXIT INT TERM HUP
+
+    if [ "${STEP_LOG_ACTIVE:-0}" = "1" ]; then
+        cleanup_step_log "${status}" || true
+    fi
+
+    if [ "${BUILD_LOG_ACTIVE:-0}" = "1" ]; then
+        BUILD_LOG_ACTIVE=0
+        exec 1>&3 2>&4
+        exec 3>&- 4>&-
+        if [ -n "${BUILD_TEE_PID:-}" ]; then
+            wait "${BUILD_TEE_PID}" 2>/dev/null || true
+        fi
+        rm -f "${BUILD_LOG_PIPE}"
+    fi
+
+    if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ] && command -v tar >/dev/null 2>&1; then
+        rm -f "${LOG_ARCHIVE}"
+        tar -czf "${LOG_ARCHIVE}" -C "${DIST_DIR}" "$(basename "${LOG_DIR}")" 2>/dev/null || true
+    fi
+
+    exit "${status}"
+}
+
+start_build_log() {
+    command -v tee >/dev/null 2>&1 || {
+        echo "ERROR: tee is required for build logging" >&2
+        exit 1
+    }
+
+    rm -rf "${LOG_DIR}"
+    rm -f "${LOG_ARCHIVE}"
+    mkdir -p "${LOG_DIR}"
+    BUILD_LOG_PIPE="$(mktemp -u "${TMPDIR:-/tmp}/bee-build-log.XXXXXX")"
+    mkfifo "${BUILD_LOG_PIPE}"
+
+    exec 3>&1 4>&2
+    tee "${LOG_OUT}" < "${BUILD_LOG_PIPE}" &
+    BUILD_TEE_PID=$!
+    exec > "${BUILD_LOG_PIPE}" 2>&1
+    BUILD_LOG_ACTIVE=1
+
+    trap 'cleanup_build_log "$?"' EXIT INT TERM HUP
+
+    echo "=== build log dir: ${LOG_DIR} ==="
+    echo "=== build log: ${LOG_OUT} ==="
+    echo "=== build log archive: ${LOG_ARCHIVE} ==="
+}
+
+cleanup_step_log() {
+    status="${1:-$?}"
+
+    if [ "${STEP_LOG_ACTIVE:-0}" = "1" ]; then
+        STEP_LOG_ACTIVE=0
+        exec 1>&5 2>&6
+        exec 5>&- 6>&-
+        if [ -n "${STEP_TEE_PID:-}" ]; then
+            wait "${STEP_TEE_PID}" 2>/dev/null || true
+        fi
+        rm -f "${STEP_LOG_PIPE}"
+    fi
+
+    return "${status}"
+}
+
+run_step() {
+    step_name="$1"
+    step_slug="$2"
+    shift 2
+
+    step_log="${LOG_DIR}/${step_slug}.log"
+    echo ""
+    echo "=== step: ${step_name} ==="
+    echo "=== step log: ${step_log} ==="
+
+    STEP_LOG_PIPE="$(mktemp -u "${TMPDIR:-/tmp}/bee-step-log.XXXXXX")"
+    mkfifo "${STEP_LOG_PIPE}"
+
+    exec 5>&1 6>&2
+    tee "${step_log}" < "${STEP_LOG_PIPE}" >&5 &
+    STEP_TEE_PID=$!
+    exec > "${STEP_LOG_PIPE}" 2>&1
+    STEP_LOG_ACTIVE=1
+
+    set +e
+    "$@"
+    step_status=$?
+    set -e
+
+    cleanup_step_log "${step_status}"
+    if [ "${step_status}" -ne 0 ]; then
+        echo "ERROR: step failed: ${step_name} (see ${step_log})" >&2
+        exit "${step_status}"
+    fi
+
+    echo "=== step OK: ${step_name} ==="
+}
+
+run_step_sh() {
+    step_name="$1"
+    step_slug="$2"
+    step_script="$3"
+
+    run_step "${step_name}" "${step_slug}" sh -c "${step_script}"
+}
+
+run_optional_step_sh() {
+    step_name="$1"
+    step_slug="$2"
+    step_script="$3"
+
+    if [ "${BEE_REQUIRE_MEMTEST:-0}" = "1" ]; then
+        run_step_sh "${step_name}" "${step_slug}" "${step_script}"
+        return 0
+    fi
+
+    step_log="${LOG_DIR}/${step_slug}.log"
+    echo ""
+    echo "=== optional step: ${step_name} ==="
+    echo "=== optional step log: ${step_log} ==="
+    set +e
+    sh -c "${step_script}" > "${step_log}" 2>&1
+    step_status=$?
+    set -e
+    cat "${step_log}"
+    if [ "${step_status}" -ne 0 ]; then
+        echo "WARNING: optional step failed: ${step_name} (see ${step_log})" >&2
+    else
+        echo "=== optional step OK: ${step_name} ==="
+    fi
+}
+
+start_build_log

 # Auto-detect kernel ABI: refresh apt index, then query current linux-image-amd64 dependency.
 # If headers for the detected ABI are not yet installed (kernel updated since image build),
@@ -202,8 +686,8 @@ echo "Debian: ${DEBIAN_VERSION}, Kernel ABI: ${DEBIAN_KERNEL_ABI}, Go: ${GO_VERS
 echo "Audit version: ${AUDIT_VERSION_EFFECTIVE}, ISO version: ${ISO_VERSION_EFFECTIVE}"
 echo ""

-echo "=== syncing git submodules ==="
-git -C "${REPO_ROOT}" submodule update --init --recursive
+run_step "sync git submodules" "05-git-submodules" \
+    git -C "${REPO_ROOT}" submodule update --init --recursive

 # --- compile bee binary (static, Linux amd64) ---
 # Shared between variants — built once, reused on second pass.
@@ -215,13 +699,13 @@ if [ -f "$BEE_BIN" ]; then
 fi

 if [ "$NEED_BUILD" = "1" ]; then
-    echo "=== building bee binary ==="
-    cd "${REPO_ROOT}/audit"
-    GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
-        go build \
-        -ldflags "-s -w -X main.Version=${AUDIT_VERSION_EFFECTIVE}" \
-        -o "$BEE_BIN" \
-        ./cmd/bee
+    run_step_sh "build bee binary" "10-build-bee" \
+        "cd '${REPO_ROOT}/audit' && \
+        env GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
+            go build \
+            -ldflags '-s -w -X main.Version=${AUDIT_VERSION_EFFECTIVE}' \
+            -o '${BEE_BIN}' \
+            ./cmd/bee"
    echo "binary: $BEE_BIN"
    if command -v stat >/dev/null 2>&1; then
        BEE_SIZE_BYTES="$(stat -c '%s' "$BEE_BIN" 2>/dev/null || stat -f '%z' "$BEE_BIN")"
@@ -240,9 +724,8 @@ fi
 # --- NVIDIA-only build steps ---
 GPU_BURN_WORKER_BIN="${DIST_DIR}/bee-gpu-burn-worker-linux-amd64"
 if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
-    echo ""
-    echo "=== downloading cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace ==="
-    sh "${BUILDER_DIR}/build-cublas.sh" \
+    run_step "download cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace" "20-cublas" \
+        sh "${BUILDER_DIR}/build-cublas.sh" \
        "${CUBLAS_VERSION}" \
        "${CUDA_USERSPACE_VERSION}" \
        "${NCCL_CUDA_VERSION}" \
@@ -256,8 +739,8 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    fi

    if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
-        echo "=== building bee-gpu-burn worker ==="
-        gcc -O2 -s -Wall -Wextra \
+        run_step "build bee-gpu-burn worker" "21-gpu-burn-worker" \
+            gcc -O2 -s -Wall -Wextra \
            -I"${CUBLAS_CACHE}/include" \
            -o "$GPU_BURN_WORKER_BIN" \
            "${BUILDER_DIR}/bee-gpu-stress.c" \
@@ -378,9 +861,8 @@ done

 # --- NVIDIA kernel modules and userspace libs ---
 if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
-    echo ""
-    echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
-    sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}"
+    run_step "build NVIDIA ${NVIDIA_DRIVER_VERSION} modules" "40-nvidia-module" \
+        sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}"

    KVER="${DEBIAN_KERNEL_ABI}-amd64"
    NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
@@ -408,9 +890,8 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    fi

    # --- build / download NCCL ---
-    echo ""
-    echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
-    sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
+    run_step "download NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}" "50-nccl" \
+        sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"

    NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"

@@ -423,9 +904,8 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    echo "=== cuBLAS: $(ls "${CUBLAS_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="

    # --- build nccl-tests ---
-    echo ""
-    echo "=== building nccl-tests ${NCCL_TESTS_VERSION} ==="
-    sh "${BUILDER_DIR}/build-nccl-tests.sh" \
+    run_step "build nccl-tests ${NCCL_TESTS_VERSION}" "60-nccl-tests" \
+        sh "${BUILDER_DIR}/build-nccl-tests.sh" \
        "${NCCL_TESTS_VERSION}" \
        "${NCCL_VERSION}" \
        "${NCCL_CUDA_VERSION}" \
@@ -439,9 +919,8 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    cp "${NCCL_TESTS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
    echo "=== all_reduce_perf injected ==="

-    echo ""
-    echo "=== building john jumbo ${JOHN_JUMBO_COMMIT} ==="
-    sh "${BUILDER_DIR}/build-john.sh" "${JOHN_JUMBO_COMMIT}" "${DIST_DIR}"
+    run_step "build john jumbo ${JOHN_JUMBO_COMMIT}" "70-john" \
+        sh "${BUILDER_DIR}/build-john.sh" "${JOHN_JUMBO_COMMIT}" "${DIST_DIR}"
    JOHN_CACHE="${DIST_DIR}/john-${JOHN_JUMBO_COMMIT}"
    mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john"
    rsync -a --delete "${JOHN_CACHE}/run/" "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john/run/"
@@ -562,9 +1041,10 @@ BEE_GPU_VENDOR_UPPER="$(echo "${BEE_GPU_VENDOR}" | tr 'a-z' 'A-Z')"
 export BEE_GPU_VENDOR_UPPER

 cd "${LB_DIR}"
-lb clean 2>&1 | tail -3
-lb config 2>&1 | tail -5
-lb build 2>&1
+run_step_sh "live-build clean" "80-lb-clean" "lb clean 2>&1 | tail -3"
+run_step_sh "live-build config" "81-lb-config" "lb config 2>&1 | tail -5"
+dump_memtest_debug "pre-build" "${LB_DIR}"
+run_step_sh "live-build build" "90-lb-build" "lb build 2>&1"

 # --- persist deb package cache back to shared location ---
 # This allows the second variant to reuse all downloaded packages.
@@ -575,8 +1055,12 @@ fi

 # live-build outputs live-image-amd64.hybrid.iso in LB_DIR
 ISO_RAW="${LB_DIR}/live-image-amd64.hybrid.iso"
-ISO_OUT="${DIST_DIR}/easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64.iso"
 if [ -f "$ISO_RAW" ]; then
+    dump_memtest_debug "post-build" "${LB_DIR}" "$ISO_RAW"
+    if ! iso_memtest_present "$ISO_RAW"; then
+        recover_iso_memtest "${LB_DIR}" "$ISO_RAW"
+        dump_memtest_debug "post-recovery" "${LB_DIR}" "$ISO_RAW"
+    fi
    validate_iso_memtest "$ISO_RAW"
    cp "$ISO_RAW" "$ISO_OUT"
    echo ""
--- a/iso/builder/config/hooks/normal/9100-memtest.hook.binary
+++ b/iso/builder/config/hooks/normal/9100-memtest.hook.binary
@@ -0,0 +1,139 @@
+#!/bin/sh
+# Ensure memtest is present in the final ISO even if live-build's built-in
+# memtest stage does not copy the binaries or expose menu entries.
+set -e
+
+: "${BEE_REQUIRE_MEMTEST:=0}"
+
+MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi"
+BINARY_BOOT_DIR="binary/boot"
+GRUB_CFG="binary/boot/grub/grub.cfg"
+ISOLINUX_CFG="binary/isolinux/live.cfg"
+
+log() {
+    echo "memtest hook: $*"
+}
+
+fail_or_warn() {
+    msg="$1"
+    if [ "${BEE_REQUIRE_MEMTEST}" = "1" ]; then
+        log "ERROR: ${msg}"
+        exit 1
+    fi
+    log "WARNING: ${msg}"
+    return 0
+}
+
+copy_memtest_file() {
+    src="$1"
+    base="$(basename "$src")"
+    dst="${BINARY_BOOT_DIR}/${base}"
+
+    [ -f "$src" ] || return 1
+    mkdir -p "${BINARY_BOOT_DIR}"
+    cp "$src" "$dst"
+    log "copied ${base} from ${src}"
+}
+
+extract_memtest_from_deb() {
+    deb="$1"
+    tmpdir="$(mktemp -d)"
+
+    log "extracting memtest payload from ${deb}"
+    dpkg-deb -x "$deb" "$tmpdir"
+    for f in ${MEMTEST_FILES}; do
+        if [ -f "${tmpdir}/boot/${f}" ]; then
+            copy_memtest_file "${tmpdir}/boot/${f}"
+        fi
+    done
+    rm -rf "$tmpdir"
+}
+
+ensure_memtest_binaries() {
+    missing=0
+    for f in ${MEMTEST_FILES}; do
+        [ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
+    done
+    [ "$missing" -eq 1 ] || return 0
+
+    for root in chroot/boot /boot; do
+        for f in ${MEMTEST_FILES}; do
+            [ -f "${BINARY_BOOT_DIR}/${f}" ] || copy_memtest_file "${root}/${f}" || true
+        done
+    done
+
+    missing=0
+    for f in ${MEMTEST_FILES}; do
+        [ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
+    done
+    [ "$missing" -eq 1 ] || return 0
+
+    for root in cache chroot/var/cache/apt/archives /var/cache/apt/archives; do
+        [ -d "$root" ] || continue
+        deb="$(find "$root" -type f \( -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' \) 2>/dev/null | head -1)"
+        [ -n "$deb" ] || continue
+        extract_memtest_from_deb "$deb"
+        break
+    done
+
+    missing=0
+    for f in ${MEMTEST_FILES}; do
+        if [ ! -f "${BINARY_BOOT_DIR}/${f}" ]; then
+            fail_or_warn "missing ${BINARY_BOOT_DIR}/${f}"
+            missing=1
+        fi
+    done
+    [ "$missing" -eq 0 ] || return 0
+}
+
+ensure_grub_entry() {
+    [ -f "$GRUB_CFG" ] || {
+        fail_or_warn "missing ${GRUB_CFG}"
+        return 0
+    }
+
+    grep -q '### BEE MEMTEST ###' "$GRUB_CFG" && return 0
+
+    cat >> "$GRUB_CFG" <<'EOF'
+
+### BEE MEMTEST ###
+if [ "${grub_platform}" = "efi" ]; then
+    menuentry "Memory Test (memtest86+)" {
+        chainloader /boot/memtest86+x64.efi
+    }
+else
+    menuentry "Memory Test (memtest86+)" {
+        linux16 /boot/memtest86+x64.bin
+    }
+fi
+### /BEE MEMTEST ###
+EOF
+
+    log "appended memtest entry to ${GRUB_CFG}"
+}
+
+ensure_isolinux_entry() {
+    [ -f "$ISOLINUX_CFG" ] || {
+        fail_or_warn "missing ${ISOLINUX_CFG}"
+        return 0
+    }
+
+    grep -q '### BEE MEMTEST ###' "$ISOLINUX_CFG" && return 0
+
+    cat >> "$ISOLINUX_CFG" <<'EOF'
+
+# ### BEE MEMTEST ###
+label memtest
+    menu label ^Memory Test (memtest86+)
+    linux /boot/memtest86+x64.bin
+# ### /BEE MEMTEST ###
+EOF
+
+    log "appended memtest entry to ${ISOLINUX_CFG}"
+}
+
+log "ensuring memtest binaries and menu entries in binary image"
+ensure_memtest_binaries
+ensure_grub_entry
+ensure_isolinux_entry
+log "memtest assets ready"
--- a/iso/overlay/usr/local/bin/bee-john-gpu-stress
+++ b/iso/overlay/usr/local/bin/bee-john-gpu-stress
@@ -7,6 +7,8 @@ EXCLUDE=""
 FORMAT=""
 JOHN_DIR="/usr/local/lib/bee/john/run"
 JOHN_BIN="${JOHN_DIR}/john"
+export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
+export LD_LIBRARY_PATH="/usr/lib:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"

 usage() {
    echo "usage: $0 [--seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
@@ -24,6 +26,21 @@ contains_csv() {
 }

 show_opencl_diagnostics() {
+    echo "-- OpenCL ICD vendors --" >&2
+    if [ -d /etc/OpenCL/vendors ]; then
+        ls -l /etc/OpenCL/vendors >&2 || true
+        for icd in /etc/OpenCL/vendors/*.icd; do
+            [ -f "${icd}" ] || continue
+            echo "  file: ${icd}" >&2
+            sed 's/^/    /' "${icd}" >&2 || true
+        done
+    else
+        echo "  /etc/OpenCL/vendors is missing" >&2
+    fi
+    echo "-- NVIDIA device nodes --" >&2
+    ls -l /dev/nvidia* >&2 || true
+    echo "-- ldconfig OpenCL/NVIDIA --" >&2
+    ldconfig -p 2>/dev/null | grep 'libOpenCL\|libcuda\|libnvidia-opencl' >&2 || true
    if command -v clinfo >/dev/null 2>&1; then
        echo "-- clinfo -l --" >&2
        clinfo -l >&2 || true
@@ -32,6 +49,17 @@ show_opencl_diagnostics() {
    ./john --list=opencl-devices >&2 || true
 }

+refresh_nvidia_runtime() {
+    if [ "$(id -u)" != "0" ]; then
+        return 1
+    fi
+    if command -v bee-nvidia-load >/dev/null 2>&1; then
+        bee-nvidia-load >/dev/null 2>&1 || true
+    fi
+    ldconfig >/dev/null 2>&1 || true
+    return 0
+}
+
 ensure_nvidia_uvm() {
    if lsmod 2>/dev/null | grep -q '^nvidia_uvm '; then
        return 0
@@ -61,6 +89,13 @@ ensure_opencl_ready() {
        return 0
    fi

+    if refresh_nvidia_runtime; then
+        out=$(./john --list=opencl-devices 2>&1 || true)
+        if echo "${out}" | grep -q "Device #"; then
+            return 0
+        fi
+    fi
+
    if ensure_nvidia_uvm; then
        out=$(./john --list=opencl-devices 2>&1 || true)
        if echo "${out}" | grep -q "Device #"; then
Author	SHA1	Message	Date
Mikhail Chusavitin	f6f4923ac9	fix(iso): recover memtest after live-build	2026-04-01 08:55:57 +03:00
Mikhail Chusavitin	c394845b34	refactor(webui): queue install and bundle tasks - v3.18	2026-04-01 08:46:46 +03:00
Mikhail Chusavitin	3472afea32	fix(iso): make memtest non-blocking by default	2026-04-01 08:33:36 +03:00
Mikhail Chusavitin	942f11937f	chore(submodule): update bible - v3.16	2026-04-01 08:23:39 +03:00
Mikhail Chusavitin	b5b34983f1	fix(webui): repair audit actions and CPU burn flow - v3.15	2026-04-01 08:19:11 +03:00
Michael Chus	45221d1e9a	fix(stress): label loaders and improve john opencl diagnostics	2026-04-01 07:31:52 +03:00
Michael Chus	3869788bac	fix(iso): validate memtest with xorriso fallback	2026-04-01 07:24:05 +03:00
Michael Chus	3dbc2184ef	fix(iso): archive build logs and memtest diagnostics	2026-04-01 07:14:53 +03:00