Compare commits

...

8 Commits
v3.12 ... v3.19

Author SHA1 Message Date
Mikhail Chusavitin
f6f4923ac9 fix(iso): recover memtest after live-build 2026-04-01 08:55:57 +03:00
Mikhail Chusavitin
c394845b34 refactor(webui): queue install and bundle tasks - v3.18 2026-04-01 08:46:46 +03:00
Mikhail Chusavitin
3472afea32 fix(iso): make memtest non-blocking by default 2026-04-01 08:33:36 +03:00
Mikhail Chusavitin
942f11937f chore(submodule): update bible - v3.16 2026-04-01 08:23:39 +03:00
Mikhail Chusavitin
b5b34983f1 fix(webui): repair audit actions and CPU burn flow - v3.15 2026-04-01 08:19:11 +03:00
45221d1e9a fix(stress): label loaders and improve john opencl diagnostics 2026-04-01 07:31:52 +03:00
3869788bac fix(iso): validate memtest with xorriso fallback 2026-04-01 07:24:05 +03:00
3dbc2184ef fix(iso): archive build logs and memtest diagnostics 2026-04-01 07:14:53 +03:00
19 changed files with 1554 additions and 161 deletions

View File

@@ -36,6 +36,8 @@ var supportBundleCommands = []struct {
{name: "system/dmesg-tail.txt", cmd: []string{"sh", "-c", "dmesg | tail -n 200"}},
}
const supportBundleGlob = "bee-support-*.tar.gz"
func BuildSupportBundle(exportDir string) (string, error) {
exportDir = strings.TrimSpace(exportDir)
if exportDir == "" {
@@ -86,34 +88,64 @@ func BuildSupportBundle(exportDir string) (string, error) {
return archivePath, nil
}
func LatestSupportBundlePath() (string, error) {
return latestSupportBundlePath(os.TempDir())
}
func cleanupOldSupportBundles(dir string) error {
matches, err := filepath.Glob(filepath.Join(dir, "bee-support-*.tar.gz"))
matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
if err != nil {
return err
}
type entry struct {
path string
mod time.Time
entries := supportBundleEntries(matches)
for path, mod := range entries {
if time.Since(mod) > 24*time.Hour {
_ = os.Remove(path)
delete(entries, path)
}
}
list := make([]entry, 0, len(matches))
ordered := orderSupportBundles(entries)
if len(ordered) > 3 {
for _, old := range ordered[3:] {
_ = os.Remove(old)
}
}
return nil
}
func latestSupportBundlePath(dir string) (string, error) {
matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
if err != nil {
return "", err
}
ordered := orderSupportBundles(supportBundleEntries(matches))
if len(ordered) == 0 {
return "", os.ErrNotExist
}
return ordered[0], nil
}
func supportBundleEntries(matches []string) map[string]time.Time {
entries := make(map[string]time.Time, len(matches))
for _, match := range matches {
info, err := os.Stat(match)
if err != nil {
continue
}
if time.Since(info.ModTime()) > 24*time.Hour {
_ = os.Remove(match)
continue
}
list = append(list, entry{path: match, mod: info.ModTime()})
entries[match] = info.ModTime()
}
sort.Slice(list, func(i, j int) bool { return list[i].mod.After(list[j].mod) })
if len(list) > 3 {
for _, old := range list[3:] {
_ = os.Remove(old.path)
}
return entries
}
func orderSupportBundles(entries map[string]time.Time) []string {
ordered := make([]string, 0, len(entries))
for path := range entries {
ordered = append(ordered, path)
}
return nil
sort.Slice(ordered, func(i, j int) bool {
return entries[ordered[i]].After(entries[ordered[j]])
})
return ordered
}
func writeJournalDump(dst string) error {

View File

@@ -16,7 +16,7 @@ func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts N
return "", err
}
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-stress", []satJob{
return runAcceptancePackCtx(ctx, baseDir, nvidiaStressArchivePrefix(opts.Loader), []satJob{
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
{name: "02-nvidia-smi-list.log", cmd: []string{"nvidia-smi", "-L"}},
job,
@@ -24,6 +24,17 @@ func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts N
}, logFunc)
}
func nvidiaStressArchivePrefix(loader string) string {
switch strings.TrimSpace(strings.ToLower(loader)) {
case NvidiaStressLoaderJohn:
return "gpu-nvidia-john"
case NvidiaStressLoaderNCCL:
return "gpu-nvidia-nccl"
default:
return "gpu-nvidia-burn"
}
}
func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
selected, err := resolveNvidiaGPUSelection(opts.GPUIndices, opts.ExcludeGPUIndices)
if err != nil {

View File

@@ -684,7 +684,11 @@ func resolveSATCommand(cmd []string) ([]string, error) {
case "rvs":
return resolveRVSCommand(cmd[1:]...)
}
return cmd, nil
path, err := satLookPath(cmd[0])
if err != nil {
return nil, fmt.Errorf("%s not found in PATH: %w", cmd[0], err)
}
return append([]string{path}, cmd[1:]...), nil
}
func resolveRVSCommand(args ...string) ([]string, error) {

View File

@@ -162,6 +162,25 @@ func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
}
}
func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
t.Parallel()
tests := []struct {
loader string
want string
}{
{loader: NvidiaStressLoaderBuiltin, want: "gpu-nvidia-burn"},
{loader: NvidiaStressLoaderJohn, want: "gpu-nvidia-john"},
{loader: NvidiaStressLoaderNCCL, want: "gpu-nvidia-nccl"},
{loader: "", want: "gpu-nvidia-burn"},
}
for _, tt := range tests {
if got := nvidiaStressArchivePrefix(tt.loader); got != tt.want {
t.Fatalf("loader=%q prefix=%q want %q", tt.loader, got, tt.want)
}
}
}
func TestEnvIntFallback(t *testing.T) {
os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
@@ -237,6 +256,44 @@ func TestResolveROCmSMICommandFromPATH(t *testing.T) {
}
}
func TestResolveSATCommandUsesLookPathForGenericTools(t *testing.T) {
oldLookPath := satLookPath
satLookPath = func(file string) (string, error) {
if file == "stress-ng" {
return "/usr/bin/stress-ng", nil
}
return "", exec.ErrNotFound
}
t.Cleanup(func() { satLookPath = oldLookPath })
cmd, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
if err != nil {
t.Fatalf("resolveSATCommand error: %v", err)
}
if len(cmd) != 3 {
t.Fatalf("cmd len=%d want 3 (%v)", len(cmd), cmd)
}
if cmd[0] != "/usr/bin/stress-ng" {
t.Fatalf("cmd[0]=%q want /usr/bin/stress-ng", cmd[0])
}
}
func TestResolveSATCommandFailsForMissingGenericTool(t *testing.T) {
oldLookPath := satLookPath
satLookPath = func(file string) (string, error) {
return "", exec.ErrNotFound
}
t.Cleanup(func() { satLookPath = oldLookPath })
_, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), "stress-ng not found in PATH") {
t.Fatalf("error=%q", err)
}
}
func TestResolveROCmSMICommandFallsBackToROCmTree(t *testing.T) {
tmp := t.TempDir()
execPath := filepath.Join(tmp, "opt", "rocm", "bin", "rocm-smi")

View File

@@ -2,11 +2,12 @@ package webui
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"regexp"
@@ -85,15 +86,16 @@ func streamJob(w http.ResponseWriter, r *http.Request, j *jobState) {
}
}
// runCmdJob runs an exec.Cmd as a background job, streaming stdout+stderr lines.
func runCmdJob(j *jobState, cmd *exec.Cmd) {
// streamCmdJob runs an exec.Cmd and streams stdout+stderr lines into j.
func streamCmdJob(j *jobState, cmd *exec.Cmd) error {
pr, pw := io.Pipe()
cmd.Stdout = pw
cmd.Stderr = pw
if err := cmd.Start(); err != nil {
j.finish(err.Error())
return
_ = pw.Close()
_ = pr.Close()
return err
}
// Lower the CPU scheduling priority of stress/audit subprocesses to nice+10
// so the X server and kernel interrupt handling remain responsive under load
@@ -102,8 +104,10 @@ func runCmdJob(j *jobState, cmd *exec.Cmd) {
_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, 10)
}
scanDone := make(chan error, 1)
go func() {
scanner := bufio.NewScanner(pr)
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for scanner.Scan() {
// Split on \r to handle progress-bar style output (e.g. \r overwrites)
// and strip ANSI escape codes so logs are readable in the browser.
@@ -115,15 +119,21 @@ func runCmdJob(j *jobState, cmd *exec.Cmd) {
}
}
}
if err := scanner.Err(); err != nil && !errors.Is(err, io.ErrClosedPipe) {
scanDone <- err
return
}
scanDone <- nil
}()
err := cmd.Wait()
_ = pw.Close()
scanErr := <-scanDone
_ = pr.Close()
if err != nil {
j.finish(err.Error())
} else {
j.finish("")
return err
}
return scanErr
}
// ── Audit ─────────────────────────────────────────────────────────────────────
@@ -179,19 +189,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
Profile string `json:"profile"`
DisplayName string `json:"display_name"`
}
if r.ContentLength > 0 {
_ = json.NewDecoder(r.Body).Decode(&body)
}
name := taskNames[target]
if body.Profile != "" {
if n, ok := burnNames[target]; ok {
name = n
if r.Body != nil {
if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
writeError(w, http.StatusBadRequest, "invalid request body")
return
}
}
if name == "" {
name = target
}
name := taskDisplayName(target, body.Profile, body.Loader)
t := &Task{
ID: newJobID("sat-" + target),
Name: name,
@@ -420,15 +425,23 @@ func (h *handler) handleAPIExportList(w http.ResponseWriter, r *http.Request) {
}
func (h *handler) handleAPIExportBundle(w http.ResponseWriter, r *http.Request) {
archive, err := app.BuildSupportBundle(h.opts.ExportDir)
if err != nil {
writeError(w, http.StatusInternalServerError, err.Error())
if globalQueue.hasActiveTarget("support-bundle") {
writeError(w, http.StatusConflict, "support bundle task is already pending or running")
return
}
t := &Task{
ID: newJobID("support-bundle"),
Name: "Support Bundle",
Target: "support-bundle",
Status: TaskPending,
CreatedAt: time.Now(),
}
globalQueue.enqueue(t)
writeJSON(w, map[string]string{
"status": "ok",
"path": archive,
"url": "/export/support.tar.gz",
"status": "queued",
"task_id": t.ID,
"job_id": t.ID,
"url": "/export/support.tar.gz",
})
}
@@ -516,10 +529,7 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
h.installMu.Lock()
installRunning := h.installJob != nil && !h.installJob.isDone()
h.installMu.Unlock()
if installRunning {
if globalQueue.hasActiveTarget("install") {
writeError(w, http.StatusConflict, "install to disk is already running")
return
}
@@ -634,35 +644,23 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
writeError(w, http.StatusConflict, "install to RAM task is already pending or running")
return
}
h.installMu.Lock()
if h.installJob != nil && !h.installJob.isDone() {
h.installMu.Unlock()
writeError(w, http.StatusConflict, "install already running")
if globalQueue.hasActiveTarget("install") {
writeError(w, http.StatusConflict, "install task is already pending or running")
return
}
j := &jobState{}
h.installJob = j
h.installMu.Unlock()
logFile := platform.InstallLogPath(req.Device)
go runCmdJob(j, exec.CommandContext(context.Background(), "bee-install", req.Device, logFile))
w.WriteHeader(http.StatusNoContent)
}
func (h *handler) handleAPIInstallStream(w http.ResponseWriter, r *http.Request) {
h.installMu.Lock()
j := h.installJob
h.installMu.Unlock()
if j == nil {
if !sseStart(w) {
return
}
sseWrite(w, "done", "")
return
t := &Task{
ID: newJobID("install"),
Name: "Install to Disk",
Target: "install",
Priority: 20,
Status: TaskPending,
CreatedAt: time.Now(),
params: taskParams{
Device: req.Device,
},
}
streamJob(w, r, j)
globalQueue.enqueue(t)
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
}
// ── Metrics SSE ───────────────────────────────────────────────────────────────
@@ -933,8 +931,31 @@ func parseXrandrOutput(out string) []displayInfo {
return infos
}
func xrandrCommand(args ...string) *exec.Cmd {
cmd := exec.Command("xrandr", args...)
env := append([]string{}, os.Environ()...)
hasDisplay := false
hasXAuthority := false
for _, kv := range env {
if strings.HasPrefix(kv, "DISPLAY=") && strings.TrimPrefix(kv, "DISPLAY=") != "" {
hasDisplay = true
}
if strings.HasPrefix(kv, "XAUTHORITY=") && strings.TrimPrefix(kv, "XAUTHORITY=") != "" {
hasXAuthority = true
}
}
if !hasDisplay {
env = append(env, "DISPLAY=:0")
}
if !hasXAuthority {
env = append(env, "XAUTHORITY=/home/bee/.Xauthority")
}
cmd.Env = env
return cmd
}
func (h *handler) handleAPIDisplayResolutions(w http.ResponseWriter, _ *http.Request) {
out, err := exec.Command("xrandr").Output()
out, err := xrandrCommand().Output()
if err != nil {
writeError(w, http.StatusInternalServerError, "xrandr: "+err.Error())
return
@@ -961,7 +982,7 @@ func (h *handler) handleAPIDisplaySet(w http.ResponseWriter, r *http.Request) {
writeError(w, http.StatusBadRequest, "invalid output name")
return
}
if out, err := exec.Command("xrandr", "--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
if out, err := xrandrCommand("--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
writeError(w, http.StatusInternalServerError, "xrandr: "+strings.TrimSpace(string(out)))
return
}

View File

@@ -0,0 +1,102 @@
package webui
import (
"encoding/json"
"net/http/httptest"
"strings"
"testing"
"bee/audit/internal/app"
)
func TestXrandrCommandAddsDefaultX11Env(t *testing.T) {
t.Setenv("DISPLAY", "")
t.Setenv("XAUTHORITY", "")
cmd := xrandrCommand("--query")
var hasDisplay bool
var hasXAuthority bool
for _, kv := range cmd.Env {
if kv == "DISPLAY=:0" {
hasDisplay = true
}
if kv == "XAUTHORITY=/home/bee/.Xauthority" {
hasXAuthority = true
}
}
if !hasDisplay {
t.Fatalf("DISPLAY not injected: %v", cmd.Env)
}
if !hasXAuthority {
t.Fatalf("XAUTHORITY not injected: %v", cmd.Env)
}
}
func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
globalQueue.mu.Lock()
originalTasks := globalQueue.tasks
globalQueue.tasks = nil
globalQueue.mu.Unlock()
t.Cleanup(func() {
globalQueue.mu.Lock()
globalQueue.tasks = originalTasks
globalQueue.mu.Unlock()
})
h := &handler{opts: HandlerOptions{App: &app.App{}}}
req := httptest.NewRequest("POST", "/api/sat/cpu/run", strings.NewReader(`{"profile":"smoke"}`))
req.ContentLength = -1
rec := httptest.NewRecorder()
h.handleAPISATRun("cpu").ServeHTTP(rec, req)
if rec.Code != 200 {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
globalQueue.mu.Lock()
defer globalQueue.mu.Unlock()
if len(globalQueue.tasks) != 1 {
t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
}
if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
t.Fatalf("burn profile=%q want smoke", got)
}
}
func TestHandleAPIExportBundleQueuesTask(t *testing.T) {
globalQueue.mu.Lock()
originalTasks := globalQueue.tasks
globalQueue.tasks = nil
globalQueue.mu.Unlock()
t.Cleanup(func() {
globalQueue.mu.Lock()
globalQueue.tasks = originalTasks
globalQueue.mu.Unlock()
})
h := &handler{opts: HandlerOptions{ExportDir: t.TempDir()}}
req := httptest.NewRequest("POST", "/api/export/bundle", nil)
rec := httptest.NewRecorder()
h.handleAPIExportBundle(rec, req)
if rec.Code != 200 {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
var body map[string]string
if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
t.Fatalf("decode response: %v", err)
}
if body["task_id"] == "" {
t.Fatalf("missing task_id in response: %v", body)
}
globalQueue.mu.Lock()
defer globalQueue.mu.Unlock()
if len(globalQueue.tasks) != 1 {
t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
}
if got := globalQueue.tasks[0].Target; got != "support-bundle" {
t.Fatalf("target=%q want support-bundle", got)
}
}

View File

@@ -4,6 +4,8 @@ import (
"database/sql"
"encoding/csv"
"io"
"os"
"path/filepath"
"strconv"
"time"
@@ -20,6 +22,9 @@ type MetricsDB struct {
// openMetricsDB opens (or creates) the metrics database at the given path.
func openMetricsDB(path string) (*MetricsDB, error) {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return nil, err
}
db, err := sql.Open("sqlite", path+"?_journal=WAL&_busy_timeout=5000")
if err != nil {
return nil, err
@@ -132,7 +137,7 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
defer rows.Close()
type sysRow struct {
ts int64
ts int64
cpu, mem, pwr float64
}
var sysRows []sysRow
@@ -156,7 +161,10 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
maxTS := sysRows[len(sysRows)-1].ts
// Load GPU rows in range
type gpuKey struct{ ts int64; idx int }
type gpuKey struct {
ts int64
idx int
}
gpuData := map[gpuKey]platform.GPUMetricRow{}
gRows, err := m.db.Query(
`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
@@ -174,7 +182,10 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
}
// Load fan rows in range
type fanKey struct{ ts int64; name string }
type fanKey struct {
ts int64
name string
}
fanData := map[fanKey]float64{}
fRows, err := m.db.Query(
`SELECT ts,name,rpm FROM fan_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
@@ -192,7 +203,10 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
}
// Load temp rows in range
type tempKey struct{ ts int64; name string }
type tempKey struct {
ts int64
name string
}
tempData := map[tempKey]platform.TempReading{}
tRows, err := m.db.Query(
`SELECT ts,name,grp,celsius FROM temp_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,

View File

@@ -289,7 +289,7 @@ func renderAudit() string {
func renderHardwareSummaryCard(opts HandlerOptions) string {
data, err := loadSnapshot(opts.AuditPath)
if err != nil {
return `<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body"><span class="badge badge-unknown">No audit data</span></div></div>`
return `<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body"><button class="btn btn-primary" onclick="auditModalRun()">&#9654; Run Audit</button></div></div>`
}
// Parse just enough fields for the summary banner
var snap struct {
@@ -926,7 +926,7 @@ func renderExport(exportDir string) string {
return `<div class="grid2">
<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Creates a tar.gz archive of all audit files, SAT results, and logs.</p>
<a class="btn btn-primary" href="/export/support.tar.gz">&#8595; Download Support Bundle</a>
` + renderSupportBundleInline() + `
</div></div>
<div class="card"><div class="card-head">Export Files</div><div class="card-body">
<table><tr><th>File</th></tr>` + rows.String() + `</table>
@@ -1024,6 +1024,77 @@ func listExportFiles(exportDir string) ([]string, error) {
return entries, nil
}
func renderSupportBundleInline() string {
return `<button id="support-bundle-btn" class="btn btn-primary" onclick="supportBundleBuild()">Build Support Bundle</button>
<a id="support-bundle-download" class="btn btn-secondary" href="/export/support.tar.gz" style="display:none">&#8595; Download Support Bundle</a>
<div id="support-bundle-status" style="margin-top:12px;font-size:13px;color:var(--muted)">No support bundle built in this session.</div>
<div id="support-bundle-log" class="terminal" style="display:none;margin-top:12px;max-height:260px"></div>
<script>
(function(){
var _supportBundleES = null;
window.supportBundleBuild = function() {
var btn = document.getElementById('support-bundle-btn');
var status = document.getElementById('support-bundle-status');
var log = document.getElementById('support-bundle-log');
var download = document.getElementById('support-bundle-download');
if (_supportBundleES) {
_supportBundleES.close();
_supportBundleES = null;
}
btn.disabled = true;
btn.textContent = 'Building...';
status.textContent = 'Queueing support bundle task...';
status.style.color = 'var(--muted)';
log.style.display = '';
log.textContent = '';
download.style.display = 'none';
fetch('/api/export/bundle', {method:'POST'}).then(function(r){
return r.json().then(function(j){
if (!r.ok) throw new Error(j.error || r.statusText);
return j;
});
}).then(function(data){
if (!data.task_id) throw new Error('missing task id');
status.textContent = 'Building support bundle...';
_supportBundleES = new EventSource('/api/tasks/' + data.task_id + '/stream');
_supportBundleES.onmessage = function(e) {
log.textContent += e.data + '\n';
log.scrollTop = log.scrollHeight;
};
_supportBundleES.addEventListener('done', function(e) {
_supportBundleES.close();
_supportBundleES = null;
btn.disabled = false;
btn.textContent = 'Build Support Bundle';
if (e.data) {
status.textContent = 'Error: ' + e.data;
status.style.color = 'var(--crit-fg)';
return;
}
status.textContent = 'Support bundle ready.';
status.style.color = 'var(--ok-fg)';
download.style.display = '';
});
_supportBundleES.onerror = function() {
if (_supportBundleES) _supportBundleES.close();
_supportBundleES = null;
btn.disabled = false;
btn.textContent = 'Build Support Bundle';
status.textContent = 'Support bundle stream disconnected.';
status.style.color = 'var(--crit-fg)';
};
}).catch(function(e){
btn.disabled = false;
btn.textContent = 'Build Support Bundle';
status.textContent = 'Error: ' + e;
status.style.color = 'var(--crit-fg)';
});
};
})();
</script>`
}
// ── Display Resolution ────────────────────────────────────────────────────────
func renderDisplayInline() string {
@@ -1113,7 +1184,7 @@ function installToRAM() {
<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
<a class="btn btn-primary" href="/export/support.tar.gz">&#8595; Download Support Bundle</a>
` + renderSupportBundleInline() + `
</div></div>
<div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">&#8635; Check</button></div>
@@ -1292,21 +1363,23 @@ function installStart() {
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({device: _installSelected.device})
}).then(function(r){
if (r.status === 204) {
installStreamLog();
} else {
return r.json().then(function(j){ throw new Error(j.error || r.statusText); });
}
return r.json().then(function(j){
if (!r.ok) throw new Error(j.error || r.statusText);
return j;
});
}).then(function(j){
if (!j.task_id) throw new Error('missing task id');
installStreamLog(j.task_id);
}).catch(function(e){
status.textContent = 'Error: ' + e;
status.style.color = 'var(--crit-fg)';
});
}
function installStreamLog() {
function installStreamLog(taskId) {
var term = document.getElementById('install-terminal');
var status = document.getElementById('install-status');
var es = new EventSource('/api/install/stream');
var es = new EventSource('/api/tasks/' + taskId + '/stream');
es.onmessage = function(e) {
term.textContent += e.data + '\n';
term.scrollTop = term.scrollHeight;

View File

@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"html"
"log/slog"
"mime"
"net/http"
"os"
@@ -143,9 +144,6 @@ type handler struct {
latest *platform.LiveMetricSample
// metrics persistence (nil if DB unavailable)
metricsDB *MetricsDB
// install job (at most one at a time)
installJob *jobState
installMu sync.Mutex
// pending network change (rollback on timeout)
pendingNet *pendingNetChange
pendingNetMu sync.Mutex
@@ -180,7 +178,11 @@ func NewHandler(opts HandlerOptions) http.Handler {
if len(samples) > 0 {
h.setLatestMetric(samples[len(samples)-1])
}
} else {
slog.Warn("metrics history unavailable", "path", metricsDBPath, "err", err)
}
} else {
slog.Warn("metrics db disabled", "path", metricsDBPath, "err", err)
}
h.startMetricsCollector()
@@ -266,7 +268,6 @@ func NewHandler(opts HandlerOptions) http.Handler {
// Install
mux.HandleFunc("GET /api/install/disks", h.handleAPIInstallDisks)
mux.HandleFunc("POST /api/install/run", h.handleAPIInstallRun)
mux.HandleFunc("GET /api/install/stream", h.handleAPIInstallStream)
// Metrics — SSE stream of live sensor data + server-side SVG charts + CSV export
mux.HandleFunc("GET /api/metrics/stream", h.handleAPIMetricsStream)
@@ -366,9 +367,13 @@ func (h *handler) handleRuntimeHealthJSON(w http.ResponseWriter, r *http.Request
}
func (h *handler) handleSupportBundleDownload(w http.ResponseWriter, r *http.Request) {
archive, err := app.BuildSupportBundle(h.opts.ExportDir)
archive, err := app.LatestSupportBundlePath()
if err != nil {
http.Error(w, fmt.Sprintf("build support bundle: %v", err), http.StatusInternalServerError)
if errors.Is(err, os.ErrNotExist) {
http.Error(w, "support bundle not built yet", http.StatusNotFound)
return
}
http.Error(w, fmt.Sprintf("locate support bundle: %v", err), http.StatusInternalServerError)
return
}
w.Header().Set("Cache-Control", "no-store")

View File

@@ -136,6 +136,33 @@ func TestRootRendersDashboard(t *testing.T) {
}
}
func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
dir := t.TempDir()
exportDir := filepath.Join(dir, "export")
if err := os.MkdirAll(exportDir, 0755); err != nil {
t.Fatal(err)
}
handler := NewHandler(HandlerOptions{
Title: "Bee Hardware Audit",
AuditPath: filepath.Join(dir, "missing-audit.json"),
ExportDir: exportDir,
})
rec := httptest.NewRecorder()
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
body := rec.Body.String()
if !strings.Contains(body, `Run Audit`) {
t.Fatalf("dashboard missing run audit button: %s", body)
}
if strings.Contains(body, `No audit data`) {
t.Fatalf("dashboard still shows empty audit badge: %s", body)
}
}
func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "audit.json")
@@ -232,6 +259,17 @@ func TestSupportBundleEndpointReturnsArchive(t *testing.T) {
if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.log"), []byte("audit log"), 0644); err != nil {
t.Fatal(err)
}
archive, err := os.CreateTemp(os.TempDir(), "bee-support-server-test-*.tar.gz")
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { _ = os.Remove(archive.Name()) })
if _, err := archive.WriteString("support-bundle"); err != nil {
t.Fatal(err)
}
if err := archive.Close(); err != nil {
t.Fatal(err)
}
handler := NewHandler(HandlerOptions{ExportDir: exportDir})
rec := httptest.NewRecorder()

View File

@@ -6,8 +6,10 @@ import (
"fmt"
"net/http"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"sync"
"time"
@@ -39,6 +41,7 @@ var taskNames = map[string]string{
"sat-stress": "SAT Stress (stressapptest)",
"platform-stress": "Platform Thermal Cycling",
"audit": "Audit",
"support-bundle": "Support Bundle",
"install": "Install to Disk",
"install-to-ram": "Install to RAM",
}
@@ -51,6 +54,33 @@ var burnNames = map[string]string{
"amd": "AMD GPU Burn-in",
}
func nvidiaStressTaskName(loader string) string {
switch strings.TrimSpace(strings.ToLower(loader)) {
case platform.NvidiaStressLoaderJohn:
return "NVIDIA GPU Stress (John/OpenCL)"
case platform.NvidiaStressLoaderNCCL:
return "NVIDIA GPU Stress (NCCL)"
default:
return "NVIDIA GPU Stress (bee-gpu-burn)"
}
}
func taskDisplayName(target, profile, loader string) string {
name := taskNames[target]
if profile != "" {
if n, ok := burnNames[target]; ok {
name = n
}
}
if target == "nvidia-stress" {
name = nvidiaStressTaskName(loader)
}
if name == "" {
name = target
}
return name
}
// Task represents one unit of work in the queue.
type Task struct {
ID string `json:"id"`
@@ -185,6 +215,10 @@ var (
runSATStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunSATStressPackCtx(ctx, baseDir, durationSec, logFunc)
}
buildSupportBundle = app.BuildSupportBundle
installCommand = func(ctx context.Context, device string, logPath string) *exec.Cmd {
return exec.CommandContext(ctx, "bee-install", device, logPath)
}
)
// enqueue adds a task to the queue and notifies the worker.
@@ -382,9 +416,9 @@ func setCPUGovernor(governor string) {
// runTask executes the work for a task, writing output to j.
func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
if q.opts == nil || q.opts.App == nil {
j.append("ERROR: app not configured")
j.finish("app not configured")
if q.opts == nil {
j.append("ERROR: handler options not configured")
j.finish("handler options not configured")
return
}
a := q.opts.App
@@ -401,6 +435,10 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
switch t.Target {
case "nvidia":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
diagLevel := t.params.DiagLevel
if t.params.BurnProfile != "" && diagLevel <= 0 {
diagLevel = resolveBurnPreset(t.params.BurnProfile).NvidiaDiag
@@ -418,6 +456,10 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
archive, err = a.RunNvidiaAcceptancePack("", j.append)
}
case "nvidia-stress":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
@@ -429,10 +471,22 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
ExcludeGPUIndices: t.params.ExcludeGPUIndices,
}, j.append)
case "memory":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
archive, err = runMemoryAcceptancePackCtx(a, ctx, "", j.append)
case "storage":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
archive, err = runStorageAcceptancePackCtx(a, ctx, "", j.append)
case "cpu":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
@@ -440,35 +494,68 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
if dur <= 0 {
dur = 60
}
j.append(fmt.Sprintf("CPU stress duration: %ds", dur))
archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
case "amd":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
case "amd-mem":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
archive, err = runAMDMemIntegrityPackCtx(a, ctx, "", j.append)
case "amd-bandwidth":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
archive, err = runAMDMemBandwidthPackCtx(a, ctx, "", j.append)
case "amd-stress":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
archive, err = runAMDStressPackCtx(a, ctx, "", dur, j.append)
case "memory-stress":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
archive, err = runMemoryStressPackCtx(a, ctx, "", dur, j.append)
case "sat-stress":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
case "platform-stress":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
opts := resolvePlatformStressPreset(t.params.BurnProfile)
archive, err = a.RunPlatformStress(ctx, "", opts, j.append)
case "audit":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
result, e := a.RunAuditNow(q.opts.RuntimeMode)
if e != nil {
err = e
@@ -477,7 +564,22 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
j.append(line)
}
}
case "support-bundle":
j.append("Building support bundle...")
archive, err = buildSupportBundle(q.opts.ExportDir)
case "install":
if strings.TrimSpace(t.params.Device) == "" {
err = fmt.Errorf("device is required")
break
}
installLogPath := platform.InstallLogPath(t.params.Device)
j.append("Install log: " + installLogPath)
err = streamCmdJob(j, installCommand(ctx, t.params.Device, installLogPath))
case "install-to-ram":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
err = a.RunInstallToRAM(ctx, j.append)
default:
j.append("ERROR: unknown target: " + t.Target)

View File

@@ -3,7 +3,9 @@ package webui
import (
"context"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
@@ -95,9 +97,24 @@ func TestResolveBurnPreset(t *testing.T) {
}
}
func TestRunTaskHonorsCancel(t *testing.T) {
t.Parallel()
func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
tests := []struct {
loader string
want string
}{
{loader: "", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
{loader: "builtin", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
{loader: "john", want: "NVIDIA GPU Stress (John/OpenCL)"},
{loader: "nccl", want: "NVIDIA GPU Stress (NCCL)"},
}
for _, tc := range tests {
if got := taskDisplayName("nvidia-stress", "acceptance", tc.loader); got != tc.want {
t.Fatalf("taskDisplayName(loader=%q)=%q want %q", tc.loader, got, tc.want)
}
}
}
func TestRunTaskHonorsCancel(t *testing.T) {
blocked := make(chan struct{})
released := make(chan struct{})
aRun := func(_ any, ctx context.Context, _ string, _ int, _ func(string)) (string, error) {
@@ -154,3 +171,111 @@ func TestRunTaskHonorsCancel(t *testing.T) {
t.Fatal("runTask did not return after cancel")
}
}
func TestRunTaskUsesBurnProfileDurationForCPU(t *testing.T) {
var gotDuration int
q := &taskQueue{
opts: &HandlerOptions{App: &app.App{}},
}
tk := &Task{
ID: "cpu-burn-1",
Name: "CPU Burn-in",
Target: "cpu",
Status: TaskRunning,
CreatedAt: time.Now(),
params: taskParams{BurnProfile: "smoke"},
}
j := &jobState{}
orig := runCPUAcceptancePackCtx
runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, durationSec int, _ func(string)) (string, error) {
gotDuration = durationSec
return "/tmp/cpu-burn.tar.gz", nil
}
defer func() { runCPUAcceptancePackCtx = orig }()
q.runTask(tk, j, context.Background())
if gotDuration != 5*60 {
t.Fatalf("duration=%d want %d", gotDuration, 5*60)
}
}
func TestRunTaskBuildsSupportBundleWithoutApp(t *testing.T) {
dir := t.TempDir()
q := &taskQueue{
opts: &HandlerOptions{ExportDir: dir},
}
tk := &Task{
ID: "support-bundle-1",
Name: "Support Bundle",
Target: "support-bundle",
Status: TaskRunning,
CreatedAt: time.Now(),
}
j := &jobState{}
var gotExportDir string
orig := buildSupportBundle
buildSupportBundle = func(exportDir string) (string, error) {
gotExportDir = exportDir
return filepath.Join(exportDir, "bundle.tar.gz"), nil
}
defer func() { buildSupportBundle = orig }()
q.runTask(tk, j, context.Background())
if gotExportDir != dir {
t.Fatalf("exportDir=%q want %q", gotExportDir, dir)
}
if j.err != "" {
t.Fatalf("unexpected error: %q", j.err)
}
if !strings.Contains(strings.Join(j.lines, "\n"), "Archive: "+filepath.Join(dir, "bundle.tar.gz")) {
t.Fatalf("lines=%v", j.lines)
}
}
func TestRunTaskInstallUsesSharedCommandStreaming(t *testing.T) {
q := &taskQueue{
opts: &HandlerOptions{},
}
tk := &Task{
ID: "install-1",
Name: "Install to Disk",
Target: "install",
Status: TaskRunning,
CreatedAt: time.Now(),
params: taskParams{Device: "/dev/sda"},
}
j := &jobState{}
var gotDevice string
var gotLogPath string
orig := installCommand
installCommand = func(ctx context.Context, device string, logPath string) *exec.Cmd {
gotDevice = device
gotLogPath = logPath
return exec.CommandContext(ctx, "sh", "-c", "printf 'line1\nline2\n'")
}
defer func() { installCommand = orig }()
q.runTask(tk, j, context.Background())
if gotDevice != "/dev/sda" {
t.Fatalf("device=%q want /dev/sda", gotDevice)
}
if gotLogPath == "" {
t.Fatal("expected install log path")
}
logs := strings.Join(j.lines, "\n")
if !strings.Contains(logs, "Install log: ") {
t.Fatalf("missing install log line: %v", j.lines)
}
if !strings.Contains(logs, "line1") || !strings.Contains(logs, "line2") {
t.Fatalf("missing streamed output: %v", j.lines)
}
if j.err != "" {
t.Fatalf("unexpected error: %q", j.err)
}
}

2
bible

Submodule bible updated: 456c1f022c...688b87e98d

View File

@@ -0,0 +1,117 @@
# Decision: Treat memtest as explicit ISO content, not as trusted live-build magic
**Date:** 2026-04-01
**Status:** active
## Context
We have already iterated on `memtest` multiple times and kept cycling between the same ideas.
The commit history shows several distinct attempts:
- `f91bce8` — fixed Bookworm memtest file names to `memtest86+x64.bin` / `memtest86+x64.efi`
- `5857805` — added a binary hook to copy memtest files from the build tree into the ISO root
- `f96b149` — added fallback extraction from the cached `.deb` when `chroot/boot/` stayed empty
- `d43a9ae` — removed the custom hook and switched back to live-build built-in memtest integration
- `60cb8f8` — restored explicit memtest menu entries and added ISO validation
- `3dbc218` / `3869788` — added archived build logs and better memtest diagnostics
Current evidence from the archived `easy-bee-nvidia-v3.14-amd64` logs dated 2026-04-01:
- `lb binary_memtest` does run and installs `memtest86+`
- but the final ISO still does **not** contain `boot/memtest86+x64.bin`
- the final ISO also does **not** contain memtest menu entries in `boot/grub/grub.cfg` or `isolinux/live.cfg`
So the assumption "live-build built-in memtest integration is enough on this stack" is currently false for this project until proven otherwise by a real built ISO.
Additional evidence from the archived `easy-bee-nvidia-v3.17-dirty-amd64` logs dated 2026-04-01:
- the build now completes successfully because memtest is non-blocking by default
- `lb binary_memtest` still runs and installs `memtest86+`
- the project-owned hook `config/hooks/normal/9100-memtest.hook.binary` does execute
- but it executes too early for its current target paths:
- `binary/boot/grub/grub.cfg` is still missing at hook time
- `binary/isolinux/live.cfg` is still missing at hook time
- memtest binaries are also still absent in `binary/boot/`
- later in the build, live-build does create intermediate bootloader configs with memtest lines in the workdir
- but the final ISO still lacks memtest binaries and still lacks memtest lines in extracted ISO `boot/grub/grub.cfg` and `isolinux/live.cfg`
So the assumption "the current normal binary hook path is late enough to patch final memtest artifacts" is also false.
## Known Failed Attempts
These approaches were already tried and should not be repeated blindly:
1. Built-in live-build memtest only.
Reason it failed:
- `lb binary_memtest` runs, but the final ISO still misses memtest binaries and menu entries.
2. Fixing only the memtest file names for Debian Bookworm.
Reason it failed:
- correct file names alone do not make the files appear in the final ISO.
3. Copying memtest from `chroot/boot/` into `binary/boot/` via a binary hook.
Reason it failed:
- in this stack `chroot/boot/` is often empty for memtest payloads at the relevant time.
4. Fallback extraction from cached `memtest86+` `.deb`.
Reason it failed:
- this was explored already and was not enough to stabilize the final ISO path end-to-end.
5. Restoring explicit memtest menu entries in source bootloader templates only.
Reason it failed:
- memtest lines in source templates or intermediate workdir configs do not guarantee the final ISO contains them.
6. Patching `binary/boot/grub/grub.cfg` and `binary/isolinux/live.cfg` from the current `config/hooks/normal/9100-memtest.hook.binary`.
Reason it failed:
- the hook runs before those files exist, so the hook cannot patch them there.
## What This Means
When revisiting memtest later, start from the constraints above rather than retrying the same patterns:
- do not assume the built-in memtest stage is sufficient
- do not assume `chroot/boot/` will contain memtest payloads
- do not assume source bootloader templates are the last writer of final ISO configs
- do not assume the current normal binary hook timing is late enough for final patching
Any future memtest fix must explicitly identify:
- where the memtest binaries are reliably available at build time
- which exact build stage writes the final bootloader configs that land in the ISO
- and a post-build proof from a real ISO, not only from intermediate workdir files
## Decision
For `bee`, memtest must be treated as an explicit ISO artifact with explicit post-build validation.
Project rules from now on:
- Do **not** trust `--memtest memtest86+` by itself.
- A memtest implementation is considered valid only if the produced ISO actually contains:
- `boot/memtest86+x64.bin`
- `boot/memtest86+x64.efi`
- a GRUB menu entry
- an isolinux menu entry
- If live-build built-in integration does not produce those artifacts, use an explicit project-owned mechanism such as:
- a binary hook copying files into `binary/boot/`
- extraction from the cached `memtest86+` `.deb`
- another deterministic build-time copy step
- Do **not** remove such explicit logic later unless a fresh real ISO build proves that built-in integration alone produces all required files and menu entries.
Current implementation direction:
- keep the live-build memtest stage enabled if it helps package acquisition
- do not rely on the current early `binary_hooks` timing for final patching
- prefer a post-`lb build` recovery step in `build.sh` that:
- patches the fully materialized `LB_DIR/binary` tree
- injects memtest binaries there
- ensures final bootloader entries there
- reruns late binary stages (`binary_checksums`, `binary_iso`, `binary_zsync`) after the patch
## Consequences
- Future memtest changes must begin by reading this ADR and the commits listed above.
- Future memtest changes must also begin by reading the failed-attempt list above.
- We should stop re-introducing "prefer built-in live-build memtest" as a default assumption without new evidence.
- Memtest validation in `build.sh` is not optional; it is the acceptance gate that prevents another silent regression.
- If we change memtest strategy again, we must update this ADR with the exact build evidence that justified the change.

View File

@@ -5,3 +5,4 @@ One file per decision, named `YYYY-MM-DD-short-topic.md`.
| Date | Decision | Status |
|---|---|---|
| 2026-03-05 | Use NVIDIA proprietary driver | active |
| 2026-04-01 | Treat memtest as explicit ISO content | active |

View File

@@ -17,6 +17,39 @@ This applies to:
## Memtest rule
Prefer live-build's built-in memtest integration over custom hooks or hardcoded
bootloader paths. If you ever need to reference memtest files manually, verify
the exact package file list first for the target Debian release.
Do not assume live-build's built-in memtest integration is sufficient for `bee`.
We already tried that path and regressed again on 2026-04-01: `lb binary_memtest`
ran, but the final ISO still lacked memtest binaries and menu entries.
For this project, memtest is accepted only when the produced ISO actually
contains all of the following:
- `boot/memtest86+x64.bin`
- `boot/memtest86+x64.efi`
- a memtest entry in `boot/grub/grub.cfg`
- a memtest entry in `isolinux/live.cfg`
Rules:
- Keep explicit post-build memtest validation in `build.sh`.
- If built-in integration does not produce the artifacts above, use a
deterministic project-owned copy/extract step instead of hoping live-build
will "start working".
- Do not switch back to built-in-only memtest without fresh build evidence from
a real ISO.
- If you reference memtest files manually, verify the exact package file list
first for the target Debian release.
Known bad loops for this repository:
- Do not retry built-in-only memtest without new evidence. We already proved
that `lb binary_memtest` can run while the final ISO still has no memtest.
- Do not assume fixing memtest file names is enough. Correct names did not fix
the final artifact path.
- Do not assume `chroot/boot/` contains memtest payloads at the time hooks run.
- Do not assume source `grub.cfg` / `live.cfg.in` are the final writers of ISO
bootloader configs.
- Do not assume the current `config/hooks/normal/9100-memtest.hook.binary`
timing is late enough to patch final `binary/boot/grub/grub.cfg` or
`binary/isolinux/live.cfg`; logs from 2026-04-01 showed those files were not
present yet when the hook executed.

View File

@@ -38,6 +38,7 @@ export BEE_GPU_VENDOR
. "${BUILDER_DIR}/VERSIONS"
export PATH="$PATH:/usr/local/go/bin"
: "${BEE_REQUIRE_MEMTEST:=0}"
# Allow git to read the bind-mounted repo (different UID inside container).
git config --global safe.directory "${REPO_ROOT}"
@@ -111,63 +112,546 @@ resolve_iso_version() {
resolve_audit_version
}
iso_list_files() {
iso_path="$1"
if command -v bsdtar >/dev/null 2>&1; then
bsdtar -tf "$iso_path"
return $?
fi
if command -v xorriso >/dev/null 2>&1; then
xorriso -indev "$iso_path" -find / -type f -print 2>/dev/null | sed 's#^/##'
return $?
fi
return 127
}
iso_extract_file() {
iso_path="$1"
iso_member="$2"
if command -v bsdtar >/dev/null 2>&1; then
bsdtar -xOf "$iso_path" "$iso_member"
return $?
fi
if command -v xorriso >/dev/null 2>&1; then
xorriso -osirrox on -indev "$iso_path" -cat "/$iso_member" 2>/dev/null
return $?
fi
return 127
}
require_iso_reader() {
command -v bsdtar >/dev/null 2>&1 && return 0
command -v xorriso >/dev/null 2>&1 && return 0
memtest_fail "ISO reader is required for validation/debug (expected bsdtar or xorriso)" "${1:-}"
}
dump_memtest_debug() {
phase="$1"
lb_dir="${2:-}"
iso_path="${3:-}"
phase_slug="$(printf '%s' "${phase}" | tr ' /' '__')"
memtest_log="${LOG_DIR:-}/memtest-${phase_slug}.log"
(
echo "=== memtest debug: ${phase} ==="
echo "-- auto/config --"
if [ -f "${BUILDER_DIR}/auto/config" ]; then
grep -n -- '--memtest' "${BUILDER_DIR}/auto/config" || echo " (no --memtest line found)"
else
echo " (missing ${BUILDER_DIR}/auto/config)"
fi
echo "-- source bootloader templates --"
for cfg in \
"${BUILDER_DIR}/config/bootloaders/grub-pc/grub.cfg" \
"${BUILDER_DIR}/config/bootloaders/isolinux/live.cfg.in"; do
if [ -f "$cfg" ]; then
echo " file: $cfg"
grep -n 'Memory Test\|memtest' "$cfg" || echo " (no memtest lines)"
fi
done
echo "-- source binary hooks --"
for hook in \
"${BUILDER_DIR}/config/hooks/normal/9100-memtest.hook.binary"; do
if [ -f "$hook" ]; then
echo " hook: $hook"
else
echo " (missing $hook)"
fi
done
if [ -n "$lb_dir" ] && [ -d "$lb_dir" ]; then
echo "-- live-build workdir package lists --"
for pkg in \
"$lb_dir/config/package-lists/bee.list.chroot" \
"$lb_dir/config/package-lists/bee-gpu.list.chroot" \
"$lb_dir/config/package-lists/bee-nvidia.list.chroot"; do
if [ -f "$pkg" ]; then
echo " file: $pkg"
grep -n 'memtest' "$pkg" || echo " (no memtest lines)"
fi
done
echo "-- live-build chroot/boot --"
if [ -d "$lb_dir/chroot/boot" ]; then
find "$lb_dir/chroot/boot" -maxdepth 1 -name 'memtest*' -print | sed 's/^/ /' || true
else
echo " (missing $lb_dir/chroot/boot)"
fi
echo "-- live-build binary/boot --"
if [ -d "$lb_dir/binary/boot" ]; then
find "$lb_dir/binary/boot" -maxdepth 1 -name 'memtest*' -print | sed 's/^/ /' || true
else
echo " (missing $lb_dir/binary/boot)"
fi
echo "-- live-build binary grub cfg --"
if [ -f "$lb_dir/binary/boot/grub/grub.cfg" ]; then
grep -n 'Memory Test\|memtest' "$lb_dir/binary/boot/grub/grub.cfg" || echo " (no memtest lines)"
else
echo " (missing $lb_dir/binary/boot/grub/grub.cfg)"
fi
echo "-- live-build binary isolinux cfg --"
if [ -f "$lb_dir/binary/isolinux/live.cfg" ]; then
grep -n 'Memory Test\|memtest' "$lb_dir/binary/isolinux/live.cfg" || echo " (no memtest lines)"
else
echo " (missing $lb_dir/binary/isolinux/live.cfg)"
fi
echo "-- live-build package cache --"
if [ -d "$lb_dir/cache/packages.chroot" ]; then
find "$lb_dir/cache/packages.chroot" -maxdepth 1 -name 'memtest86+*.deb' -print | sed 's/^/ /' || true
else
echo " (missing $lb_dir/cache/packages.chroot)"
fi
fi
if [ -n "$iso_path" ] && [ -f "$iso_path" ]; then
echo "-- ISO memtest files --"
iso_list_files "$iso_path" | grep 'memtest' | sed 's/^/ /' || echo " (no memtest files in ISO)"
echo "-- ISO GRUB memtest lines --"
iso_extract_file "$iso_path" boot/grub/grub.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo " (no memtest lines in boot/grub/grub.cfg)"
echo "-- ISO isolinux memtest lines --"
iso_extract_file "$iso_path" isolinux/live.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo " (no memtest lines in isolinux/live.cfg)"
fi
echo "=== end memtest debug: ${phase} ==="
) | {
if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ]; then
tee "${memtest_log}"
else
cat
fi
}
}
memtest_fail() {
msg="$1"
iso_path="${2:-}"
level="WARNING"
if [ "${BEE_REQUIRE_MEMTEST:-0}" = "1" ]; then
level="ERROR"
fi
echo "${level}: ${msg}" >&2
dump_memtest_debug "failure" "${LB_DIR:-}" "$iso_path" >&2
if [ "${BEE_REQUIRE_MEMTEST:-0}" = "1" ]; then
exit 1
fi
return 0
}
iso_memtest_present() {
iso_path="$1"
[ -f "$iso_path" ] || return 1
if command -v bsdtar >/dev/null 2>&1; then
:
elif command -v xorriso >/dev/null 2>&1; then
:
else
return 1
fi
iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || return 1
iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || return 1
grub_cfg="$(mktemp)"
isolinux_cfg="$(mktemp)"
iso_extract_file "$iso_path" boot/grub/grub.cfg > "$grub_cfg" 2>/dev/null || {
rm -f "$grub_cfg" "$isolinux_cfg"
return 1
}
iso_extract_file "$iso_path" isolinux/live.cfg > "$isolinux_cfg" 2>/dev/null || {
rm -f "$grub_cfg" "$isolinux_cfg"
return 1
}
grep -q 'Memory Test (memtest86+)' "$grub_cfg" || {
rm -f "$grub_cfg" "$isolinux_cfg"
return 1
}
grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || {
rm -f "$grub_cfg" "$isolinux_cfg"
return 1
}
grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || {
rm -f "$grub_cfg" "$isolinux_cfg"
return 1
}
grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || {
rm -f "$grub_cfg" "$isolinux_cfg"
return 1
}
grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || {
rm -f "$grub_cfg" "$isolinux_cfg"
return 1
}
rm -f "$grub_cfg" "$isolinux_cfg"
return 0
}
validate_iso_memtest() {
iso_path="$1"
echo "=== validating memtest in ISO ==="
[ -f "$iso_path" ] || { echo "ERROR: ISO not found for validation: $iso_path" >&2; exit 1; }
command -v bsdtar >/dev/null 2>&1 || { echo "ERROR: bsdtar is required for ISO validation" >&2; exit 1; }
bsdtar -tf "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || {
echo "ERROR: memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" >&2
exit 1
[ -f "$iso_path" ] || {
memtest_fail "ISO not found for validation: $iso_path" "$iso_path"
return 0
}
bsdtar -tf "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || {
echo "ERROR: memtest EFI binary missing in ISO: boot/memtest86+x64.efi" >&2
exit 1
require_iso_reader "$iso_path" || return 0
iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || {
memtest_fail "memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" "$iso_path"
return 0
}
iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || {
memtest_fail "memtest EFI binary missing in ISO: boot/memtest86+x64.efi" "$iso_path"
return 0
}
grub_cfg="$(mktemp)"
isolinux_cfg="$(mktemp)"
trap 'rm -f "$grub_cfg" "$isolinux_cfg"' EXIT INT TERM
bsdtar -xOf "$iso_path" boot/grub/grub.cfg > "$grub_cfg" || {
echo "ERROR: failed to extract boot/grub/grub.cfg from ISO" >&2
exit 1
iso_extract_file "$iso_path" boot/grub/grub.cfg > "$grub_cfg" || {
memtest_fail "failed to extract boot/grub/grub.cfg from ISO" "$iso_path"
rm -f "$grub_cfg" "$isolinux_cfg"
return 0
}
bsdtar -xOf "$iso_path" isolinux/live.cfg > "$isolinux_cfg" || {
echo "ERROR: failed to extract isolinux/live.cfg from ISO" >&2
exit 1
iso_extract_file "$iso_path" isolinux/live.cfg > "$isolinux_cfg" || {
memtest_fail "failed to extract isolinux/live.cfg from ISO" "$iso_path"
rm -f "$grub_cfg" "$isolinux_cfg"
return 0
}
grep -q 'Memory Test (memtest86+)' "$grub_cfg" || {
echo "ERROR: GRUB menu entry for memtest is missing" >&2
exit 1
memtest_fail "GRUB menu entry for memtest is missing" "$iso_path"
rm -f "$grub_cfg" "$isolinux_cfg"
return 0
}
grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || {
echo "ERROR: GRUB memtest EFI path is missing" >&2
exit 1
memtest_fail "GRUB memtest EFI path is missing" "$iso_path"
rm -f "$grub_cfg" "$isolinux_cfg"
return 0
}
grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || {
echo "ERROR: GRUB memtest BIOS path is missing" >&2
exit 1
memtest_fail "GRUB memtest BIOS path is missing" "$iso_path"
rm -f "$grub_cfg" "$isolinux_cfg"
return 0
}
grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || {
echo "ERROR: isolinux menu entry for memtest is missing" >&2
exit 1
memtest_fail "isolinux menu entry for memtest is missing" "$iso_path"
rm -f "$grub_cfg" "$isolinux_cfg"
return 0
}
grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || {
echo "ERROR: isolinux memtest path is missing" >&2
exit 1
memtest_fail "isolinux memtest path is missing" "$iso_path"
rm -f "$grub_cfg" "$isolinux_cfg"
return 0
}
rm -f "$grub_cfg" "$isolinux_cfg"
trap - EXIT INT TERM
echo "=== memtest validation OK ==="
}
append_memtest_grub_entry() {
grub_cfg="$1"
[ -f "$grub_cfg" ] || return 1
grep -q 'Memory Test (memtest86+)' "$grub_cfg" && return 0
grep -q '### BEE MEMTEST ###' "$grub_cfg" && return 0
cat >> "$grub_cfg" <<'EOF'
### BEE MEMTEST ###
if [ "${grub_platform}" = "efi" ]; then
menuentry "Memory Test (memtest86+)" {
chainloader /boot/memtest86+x64.efi
}
else
menuentry "Memory Test (memtest86+)" {
linux16 /boot/memtest86+x64.bin
}
fi
### /BEE MEMTEST ###
EOF
}
append_memtest_isolinux_entry() {
isolinux_cfg="$1"
[ -f "$isolinux_cfg" ] || return 1
grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" && return 0
grep -q '### BEE MEMTEST ###' "$isolinux_cfg" && return 0
cat >> "$isolinux_cfg" <<'EOF'
# ### BEE MEMTEST ###
label memtest
menu label ^Memory Test (memtest86+)
linux /boot/memtest86+x64.bin
# ### /BEE MEMTEST ###
EOF
}
copy_memtest_from_deb() {
deb="$1"
dst_boot="$2"
tmpdir="$(mktemp -d)"
dpkg-deb -x "$deb" "$tmpdir"
for f in memtest86+x64.bin memtest86+x64.efi; do
if [ -f "$tmpdir/boot/$f" ]; then
cp "$tmpdir/boot/$f" "$dst_boot/$f"
fi
done
rm -rf "$tmpdir"
}
recover_iso_memtest() {
lb_dir="$1"
iso_path="$2"
binary_boot="$lb_dir/binary/boot"
grub_cfg="$lb_dir/binary/boot/grub/grub.cfg"
isolinux_cfg="$lb_dir/binary/isolinux/live.cfg"
echo "=== attempting memtest recovery in binary tree ==="
mkdir -p "$binary_boot"
for root in \
"$lb_dir/chroot/boot" \
"/boot"; do
for f in memtest86+x64.bin memtest86+x64.efi; do
if [ ! -f "$binary_boot/$f" ] && [ -f "$root/$f" ]; then
cp "$root/$f" "$binary_boot/$f"
echo "memtest recovery: copied $f from $root"
fi
done
done
if [ ! -f "$binary_boot/memtest86+x64.bin" ] || [ ! -f "$binary_boot/memtest86+x64.efi" ]; then
for dir in \
"$lb_dir/cache/packages.binary" \
"$lb_dir/cache/packages.chroot" \
"$lb_dir/chroot/var/cache/apt/archives" \
"${BEE_CACHE_DIR:-${DIST_DIR}/cache}/lb-packages" \
"/var/cache/apt/archives"; do
[ -d "$dir" ] || continue
deb="$(find "$dir" -maxdepth 1 -type f -name 'memtest86+*.deb' 2>/dev/null | head -1)"
[ -n "$deb" ] || continue
echo "memtest recovery: extracting payload from $deb"
copy_memtest_from_deb "$deb" "$binary_boot"
break
done
fi
if [ ! -f "$binary_boot/memtest86+x64.bin" ] || [ ! -f "$binary_boot/memtest86+x64.efi" ]; then
tmpdl="$(mktemp -d)"
if (
cd "$tmpdl" && apt-get download memtest86+ >/dev/null 2>&1
); then
deb="$(find "$tmpdl" -maxdepth 1 -type f -name 'memtest86+*.deb' 2>/dev/null | head -1)"
if [ -n "$deb" ]; then
echo "memtest recovery: downloaded $deb"
copy_memtest_from_deb "$deb" "$binary_boot"
fi
fi
rm -rf "$tmpdl"
fi
if [ -f "$grub_cfg" ]; then
append_memtest_grub_entry "$grub_cfg" && echo "memtest recovery: ensured GRUB entry"
else
echo "memtest recovery: WARNING: missing $grub_cfg"
fi
if [ -f "$isolinux_cfg" ]; then
append_memtest_isolinux_entry "$isolinux_cfg" && echo "memtest recovery: ensured isolinux entry"
else
echo "memtest recovery: WARNING: missing $isolinux_cfg"
fi
run_optional_step_sh "rebuild live-build checksums after memtest recovery" "91-lb-checksums" "lb binary_checksums 2>&1"
run_optional_step_sh "rebuild ISO after memtest recovery" "92-lb-binary-iso" "rm -f '$iso_path' && lb binary_iso 2>&1"
run_optional_step_sh "rebuild zsync after memtest recovery" "93-lb-zsync" "lb binary_zsync 2>&1"
}
AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)"
ISO_VERSION_EFFECTIVE="$(resolve_iso_version)"
ISO_BASENAME="easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64"
LOG_DIR="${DIST_DIR}/${ISO_BASENAME}.logs"
LOG_ARCHIVE="${DIST_DIR}/${ISO_BASENAME}.logs.tar.gz"
ISO_OUT="${DIST_DIR}/${ISO_BASENAME}.iso"
LOG_OUT="${LOG_DIR}/build.log"
cleanup_build_log() {
status="${1:-$?}"
trap - EXIT INT TERM HUP
if [ "${STEP_LOG_ACTIVE:-0}" = "1" ]; then
cleanup_step_log "${status}" || true
fi
if [ "${BUILD_LOG_ACTIVE:-0}" = "1" ]; then
BUILD_LOG_ACTIVE=0
exec 1>&3 2>&4
exec 3>&- 4>&-
if [ -n "${BUILD_TEE_PID:-}" ]; then
wait "${BUILD_TEE_PID}" 2>/dev/null || true
fi
rm -f "${BUILD_LOG_PIPE}"
fi
if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ] && command -v tar >/dev/null 2>&1; then
rm -f "${LOG_ARCHIVE}"
tar -czf "${LOG_ARCHIVE}" -C "${DIST_DIR}" "$(basename "${LOG_DIR}")" 2>/dev/null || true
fi
exit "${status}"
}
start_build_log() {
command -v tee >/dev/null 2>&1 || {
echo "ERROR: tee is required for build logging" >&2
exit 1
}
rm -rf "${LOG_DIR}"
rm -f "${LOG_ARCHIVE}"
mkdir -p "${LOG_DIR}"
BUILD_LOG_PIPE="$(mktemp -u "${TMPDIR:-/tmp}/bee-build-log.XXXXXX")"
mkfifo "${BUILD_LOG_PIPE}"
exec 3>&1 4>&2
tee "${LOG_OUT}" < "${BUILD_LOG_PIPE}" &
BUILD_TEE_PID=$!
exec > "${BUILD_LOG_PIPE}" 2>&1
BUILD_LOG_ACTIVE=1
trap 'cleanup_build_log "$?"' EXIT INT TERM HUP
echo "=== build log dir: ${LOG_DIR} ==="
echo "=== build log: ${LOG_OUT} ==="
echo "=== build log archive: ${LOG_ARCHIVE} ==="
}
cleanup_step_log() {
status="${1:-$?}"
if [ "${STEP_LOG_ACTIVE:-0}" = "1" ]; then
STEP_LOG_ACTIVE=0
exec 1>&5 2>&6
exec 5>&- 6>&-
if [ -n "${STEP_TEE_PID:-}" ]; then
wait "${STEP_TEE_PID}" 2>/dev/null || true
fi
rm -f "${STEP_LOG_PIPE}"
fi
return "${status}"
}
run_step() {
step_name="$1"
step_slug="$2"
shift 2
step_log="${LOG_DIR}/${step_slug}.log"
echo ""
echo "=== step: ${step_name} ==="
echo "=== step log: ${step_log} ==="
STEP_LOG_PIPE="$(mktemp -u "${TMPDIR:-/tmp}/bee-step-log.XXXXXX")"
mkfifo "${STEP_LOG_PIPE}"
exec 5>&1 6>&2
tee "${step_log}" < "${STEP_LOG_PIPE}" >&5 &
STEP_TEE_PID=$!
exec > "${STEP_LOG_PIPE}" 2>&1
STEP_LOG_ACTIVE=1
set +e
"$@"
step_status=$?
set -e
cleanup_step_log "${step_status}"
if [ "${step_status}" -ne 0 ]; then
echo "ERROR: step failed: ${step_name} (see ${step_log})" >&2
exit "${step_status}"
fi
echo "=== step OK: ${step_name} ==="
}
run_step_sh() {
step_name="$1"
step_slug="$2"
step_script="$3"
run_step "${step_name}" "${step_slug}" sh -c "${step_script}"
}
run_optional_step_sh() {
step_name="$1"
step_slug="$2"
step_script="$3"
if [ "${BEE_REQUIRE_MEMTEST:-0}" = "1" ]; then
run_step_sh "${step_name}" "${step_slug}" "${step_script}"
return 0
fi
step_log="${LOG_DIR}/${step_slug}.log"
echo ""
echo "=== optional step: ${step_name} ==="
echo "=== optional step log: ${step_log} ==="
set +e
sh -c "${step_script}" > "${step_log}" 2>&1
step_status=$?
set -e
cat "${step_log}"
if [ "${step_status}" -ne 0 ]; then
echo "WARNING: optional step failed: ${step_name} (see ${step_log})" >&2
else
echo "=== optional step OK: ${step_name} ==="
fi
}
start_build_log
# Auto-detect kernel ABI: refresh apt index, then query current linux-image-amd64 dependency.
# If headers for the detected ABI are not yet installed (kernel updated since image build),
@@ -202,8 +686,8 @@ echo "Debian: ${DEBIAN_VERSION}, Kernel ABI: ${DEBIAN_KERNEL_ABI}, Go: ${GO_VERS
echo "Audit version: ${AUDIT_VERSION_EFFECTIVE}, ISO version: ${ISO_VERSION_EFFECTIVE}"
echo ""
echo "=== syncing git submodules ==="
git -C "${REPO_ROOT}" submodule update --init --recursive
run_step "sync git submodules" "05-git-submodules" \
git -C "${REPO_ROOT}" submodule update --init --recursive
# --- compile bee binary (static, Linux amd64) ---
# Shared between variants — built once, reused on second pass.
@@ -215,13 +699,13 @@ if [ -f "$BEE_BIN" ]; then
fi
if [ "$NEED_BUILD" = "1" ]; then
echo "=== building bee binary ==="
cd "${REPO_ROOT}/audit"
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
go build \
-ldflags "-s -w -X main.Version=${AUDIT_VERSION_EFFECTIVE}" \
-o "$BEE_BIN" \
./cmd/bee
run_step_sh "build bee binary" "10-build-bee" \
"cd '${REPO_ROOT}/audit' && \
env GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
go build \
-ldflags '-s -w -X main.Version=${AUDIT_VERSION_EFFECTIVE}' \
-o '${BEE_BIN}' \
./cmd/bee"
echo "binary: $BEE_BIN"
if command -v stat >/dev/null 2>&1; then
BEE_SIZE_BYTES="$(stat -c '%s' "$BEE_BIN" 2>/dev/null || stat -f '%z' "$BEE_BIN")"
@@ -240,9 +724,8 @@ fi
# --- NVIDIA-only build steps ---
GPU_BURN_WORKER_BIN="${DIST_DIR}/bee-gpu-burn-worker-linux-amd64"
if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
echo ""
echo "=== downloading cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace ==="
sh "${BUILDER_DIR}/build-cublas.sh" \
run_step "download cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace" "20-cublas" \
sh "${BUILDER_DIR}/build-cublas.sh" \
"${CUBLAS_VERSION}" \
"${CUDA_USERSPACE_VERSION}" \
"${NCCL_CUDA_VERSION}" \
@@ -256,8 +739,8 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
fi
if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
echo "=== building bee-gpu-burn worker ==="
gcc -O2 -s -Wall -Wextra \
run_step "build bee-gpu-burn worker" "21-gpu-burn-worker" \
gcc -O2 -s -Wall -Wextra \
-I"${CUBLAS_CACHE}/include" \
-o "$GPU_BURN_WORKER_BIN" \
"${BUILDER_DIR}/bee-gpu-stress.c" \
@@ -378,9 +861,8 @@ done
# --- NVIDIA kernel modules and userspace libs ---
if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
echo ""
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}"
run_step "build NVIDIA ${NVIDIA_DRIVER_VERSION} modules" "40-nvidia-module" \
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}"
KVER="${DEBIAN_KERNEL_ABI}-amd64"
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
@@ -408,9 +890,8 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
fi
# --- build / download NCCL ---
echo ""
echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
run_step "download NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}" "50-nccl" \
sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"
@@ -423,9 +904,8 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
echo "=== cuBLAS: $(ls "${CUBLAS_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
# --- build nccl-tests ---
echo ""
echo "=== building nccl-tests ${NCCL_TESTS_VERSION} ==="
sh "${BUILDER_DIR}/build-nccl-tests.sh" \
run_step "build nccl-tests ${NCCL_TESTS_VERSION}" "60-nccl-tests" \
sh "${BUILDER_DIR}/build-nccl-tests.sh" \
"${NCCL_TESTS_VERSION}" \
"${NCCL_VERSION}" \
"${NCCL_CUDA_VERSION}" \
@@ -439,9 +919,8 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
cp "${NCCL_TESTS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
echo "=== all_reduce_perf injected ==="
echo ""
echo "=== building john jumbo ${JOHN_JUMBO_COMMIT} ==="
sh "${BUILDER_DIR}/build-john.sh" "${JOHN_JUMBO_COMMIT}" "${DIST_DIR}"
run_step "build john jumbo ${JOHN_JUMBO_COMMIT}" "70-john" \
sh "${BUILDER_DIR}/build-john.sh" "${JOHN_JUMBO_COMMIT}" "${DIST_DIR}"
JOHN_CACHE="${DIST_DIR}/john-${JOHN_JUMBO_COMMIT}"
mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john"
rsync -a --delete "${JOHN_CACHE}/run/" "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john/run/"
@@ -562,9 +1041,10 @@ BEE_GPU_VENDOR_UPPER="$(echo "${BEE_GPU_VENDOR}" | tr 'a-z' 'A-Z')"
export BEE_GPU_VENDOR_UPPER
cd "${LB_DIR}"
lb clean 2>&1 | tail -3
lb config 2>&1 | tail -5
lb build 2>&1
run_step_sh "live-build clean" "80-lb-clean" "lb clean 2>&1 | tail -3"
run_step_sh "live-build config" "81-lb-config" "lb config 2>&1 | tail -5"
dump_memtest_debug "pre-build" "${LB_DIR}"
run_step_sh "live-build build" "90-lb-build" "lb build 2>&1"
# --- persist deb package cache back to shared location ---
# This allows the second variant to reuse all downloaded packages.
@@ -575,8 +1055,12 @@ fi
# live-build outputs live-image-amd64.hybrid.iso in LB_DIR
ISO_RAW="${LB_DIR}/live-image-amd64.hybrid.iso"
ISO_OUT="${DIST_DIR}/easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64.iso"
if [ -f "$ISO_RAW" ]; then
dump_memtest_debug "post-build" "${LB_DIR}" "$ISO_RAW"
if ! iso_memtest_present "$ISO_RAW"; then
recover_iso_memtest "${LB_DIR}" "$ISO_RAW"
dump_memtest_debug "post-recovery" "${LB_DIR}" "$ISO_RAW"
fi
validate_iso_memtest "$ISO_RAW"
cp "$ISO_RAW" "$ISO_OUT"
echo ""

View File

@@ -0,0 +1,139 @@
#!/bin/sh
# Ensure memtest is present in the final ISO even if live-build's built-in
# memtest stage does not copy the binaries or expose menu entries.
set -e
: "${BEE_REQUIRE_MEMTEST:=0}"
MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi"
BINARY_BOOT_DIR="binary/boot"
GRUB_CFG="binary/boot/grub/grub.cfg"
ISOLINUX_CFG="binary/isolinux/live.cfg"
log() {
echo "memtest hook: $*"
}
fail_or_warn() {
msg="$1"
if [ "${BEE_REQUIRE_MEMTEST}" = "1" ]; then
log "ERROR: ${msg}"
exit 1
fi
log "WARNING: ${msg}"
return 0
}
copy_memtest_file() {
src="$1"
base="$(basename "$src")"
dst="${BINARY_BOOT_DIR}/${base}"
[ -f "$src" ] || return 1
mkdir -p "${BINARY_BOOT_DIR}"
cp "$src" "$dst"
log "copied ${base} from ${src}"
}
extract_memtest_from_deb() {
deb="$1"
tmpdir="$(mktemp -d)"
log "extracting memtest payload from ${deb}"
dpkg-deb -x "$deb" "$tmpdir"
for f in ${MEMTEST_FILES}; do
if [ -f "${tmpdir}/boot/${f}" ]; then
copy_memtest_file "${tmpdir}/boot/${f}"
fi
done
rm -rf "$tmpdir"
}
ensure_memtest_binaries() {
missing=0
for f in ${MEMTEST_FILES}; do
[ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
done
[ "$missing" -eq 1 ] || return 0
for root in chroot/boot /boot; do
for f in ${MEMTEST_FILES}; do
[ -f "${BINARY_BOOT_DIR}/${f}" ] || copy_memtest_file "${root}/${f}" || true
done
done
missing=0
for f in ${MEMTEST_FILES}; do
[ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
done
[ "$missing" -eq 1 ] || return 0
for root in cache chroot/var/cache/apt/archives /var/cache/apt/archives; do
[ -d "$root" ] || continue
deb="$(find "$root" -type f \( -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' \) 2>/dev/null | head -1)"
[ -n "$deb" ] || continue
extract_memtest_from_deb "$deb"
break
done
missing=0
for f in ${MEMTEST_FILES}; do
if [ ! -f "${BINARY_BOOT_DIR}/${f}" ]; then
fail_or_warn "missing ${BINARY_BOOT_DIR}/${f}"
missing=1
fi
done
[ "$missing" -eq 0 ] || return 0
}
ensure_grub_entry() {
[ -f "$GRUB_CFG" ] || {
fail_or_warn "missing ${GRUB_CFG}"
return 0
}
grep -q '### BEE MEMTEST ###' "$GRUB_CFG" && return 0
cat >> "$GRUB_CFG" <<'EOF'
### BEE MEMTEST ###
if [ "${grub_platform}" = "efi" ]; then
menuentry "Memory Test (memtest86+)" {
chainloader /boot/memtest86+x64.efi
}
else
menuentry "Memory Test (memtest86+)" {
linux16 /boot/memtest86+x64.bin
}
fi
### /BEE MEMTEST ###
EOF
log "appended memtest entry to ${GRUB_CFG}"
}
ensure_isolinux_entry() {
[ -f "$ISOLINUX_CFG" ] || {
fail_or_warn "missing ${ISOLINUX_CFG}"
return 0
}
grep -q '### BEE MEMTEST ###' "$ISOLINUX_CFG" && return 0
cat >> "$ISOLINUX_CFG" <<'EOF'
# ### BEE MEMTEST ###
label memtest
menu label ^Memory Test (memtest86+)
linux /boot/memtest86+x64.bin
# ### /BEE MEMTEST ###
EOF
log "appended memtest entry to ${ISOLINUX_CFG}"
}
log "ensuring memtest binaries and menu entries in binary image"
ensure_memtest_binaries
ensure_grub_entry
ensure_isolinux_entry
log "memtest assets ready"

View File

@@ -7,6 +7,8 @@ EXCLUDE=""
FORMAT=""
JOHN_DIR="/usr/local/lib/bee/john/run"
JOHN_BIN="${JOHN_DIR}/john"
export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
export LD_LIBRARY_PATH="/usr/lib:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
usage() {
echo "usage: $0 [--seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
@@ -24,6 +26,21 @@ contains_csv() {
}
show_opencl_diagnostics() {
echo "-- OpenCL ICD vendors --" >&2
if [ -d /etc/OpenCL/vendors ]; then
ls -l /etc/OpenCL/vendors >&2 || true
for icd in /etc/OpenCL/vendors/*.icd; do
[ -f "${icd}" ] || continue
echo " file: ${icd}" >&2
sed 's/^/ /' "${icd}" >&2 || true
done
else
echo " /etc/OpenCL/vendors is missing" >&2
fi
echo "-- NVIDIA device nodes --" >&2
ls -l /dev/nvidia* >&2 || true
echo "-- ldconfig OpenCL/NVIDIA --" >&2
ldconfig -p 2>/dev/null | grep 'libOpenCL\|libcuda\|libnvidia-opencl' >&2 || true
if command -v clinfo >/dev/null 2>&1; then
echo "-- clinfo -l --" >&2
clinfo -l >&2 || true
@@ -32,6 +49,17 @@ show_opencl_diagnostics() {
./john --list=opencl-devices >&2 || true
}
refresh_nvidia_runtime() {
if [ "$(id -u)" != "0" ]; then
return 1
fi
if command -v bee-nvidia-load >/dev/null 2>&1; then
bee-nvidia-load >/dev/null 2>&1 || true
fi
ldconfig >/dev/null 2>&1 || true
return 0
}
ensure_nvidia_uvm() {
if lsmod 2>/dev/null | grep -q '^nvidia_uvm '; then
return 0
@@ -61,6 +89,13 @@ ensure_opencl_ready() {
return 0
fi
if refresh_nvidia_runtime; then
out=$(./john --list=opencl-devices 2>&1 || true)
if echo "${out}" | grep -q "Device #"; then
return 0
fi
fi
if ensure_nvidia_uvm; then
out=$(./john --list=opencl-devices 2>&1 || true)
if echo "${out}" | grep -q "Device #"; then