feat: task queue, UI overhaul, burn tests, install-to-RAM

- Task queue: all SAT/audit jobs enqueue and run one-at-a-time;
  tasks persist past page navigation; new Tasks page with cancel/priority/log stream
- UI: consolidate nav (Validate, Burn, Tasks, Tools); Audit becomes modal;
  Dashboard hardware summary badges + split metrics charts (load/temp/power);
  Tools page consolidates network, services, install, support bundle
- AMD GPU: acceptance test and stress burn cards; GPU presence API greys
  out irrelevant SAT cards automatically
- Burn tests: Memory Stress (stress-ng --vm), SAT Stress (stressapptest)
- Install to RAM: copies squashfs to /dev/shm, re-associates loop devices
  via LOOP_CHANGE_FD ioctl so live media can be ejected
- Charts: relative time axis (0 = now, negative left)
- memtester: LimitMEMLOCK=infinity in bee-web.service; empty output → UNSUPPORTED
- SAT overlay applied dynamically on every /audit.json serve
- MIME panic guard for LiveCD ramdisk I/O errors
- ISO: add memtest86+, stressapptest packages; memtest86+ GRUB entry;
  disable screensaver/DPMS in bee-openbox-session
- Unknown SAT status severity = 1 (does not override OK)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-28 21:15:11 +03:00
parent 911745e4da
commit 0a98ed8ae9
22 changed files with 1964 additions and 326 deletions

View File

@@ -9,7 +9,6 @@ import (
"net/http"
"os/exec"
"path/filepath"
"strings"
"sync/atomic"
"time"
@@ -110,39 +109,37 @@ func runCmdJob(j *jobState, cmd *exec.Cmd) {
// ── Audit ─────────────────────────────────────────────────────────────────────
func (h *handler) handleAPIAuditRun(w http.ResponseWriter, r *http.Request) {
func (h *handler) handleAPIAuditRun(w http.ResponseWriter, _ *http.Request) {
if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
id := newJobID("audit")
j := globalJobs.create(id)
go func() {
j.append("Running audit...")
result, err := h.opts.App.RunAuditNow(h.opts.RuntimeMode)
if err != nil {
j.append("ERROR: " + err.Error())
j.finish(err.Error())
return
}
for _, line := range strings.Split(result.Body, "\n") {
if line != "" {
j.append(line)
}
}
j.finish("")
}()
writeJSON(w, map[string]string{"job_id": id})
t := &Task{
ID: newJobID("audit"),
Name: "Audit",
Target: "audit",
Status: TaskPending,
CreatedAt: time.Now(),
}
globalQueue.enqueue(t)
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
}
func (h *handler) handleAPIAuditStream(w http.ResponseWriter, r *http.Request) {
id := r.URL.Query().Get("job_id")
j, ok := globalJobs.get(id)
if !ok {
http.Error(w, "job not found", http.StatusNotFound)
if id == "" {
id = r.URL.Query().Get("task_id")
}
// Try task queue first, then legacy job manager
if j, ok := globalQueue.findJob(id); ok {
streamJob(w, r, j)
return
}
streamJob(w, r, j)
if j, ok := globalJobs.get(id); ok {
streamJob(w, r, j)
return
}
http.Error(w, "job not found", http.StatusNotFound)
}
// ── SAT ───────────────────────────────────────────────────────────────────────
@@ -153,96 +150,87 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
id := newJobID("sat-" + target)
j := globalJobs.create(id)
ctx, cancel := context.WithCancel(context.Background())
j.cancel = cancel
go func() {
defer cancel()
j.append(fmt.Sprintf("Starting %s acceptance test...", target))
var (
archive string
err error
)
var body struct {
Duration int `json:"duration"`
DiagLevel int `json:"diag_level"`
GPUIndices []int `json:"gpu_indices"`
}
body.DiagLevel = 1
if r.ContentLength > 0 {
_ = json.NewDecoder(r.Body).Decode(&body)
}
// Parse optional parameters
var body struct {
Duration int `json:"duration"`
DiagLevel int `json:"diag_level"`
GPUIndices []int `json:"gpu_indices"`
}
body.DiagLevel = 1
if r.ContentLength > 0 {
_ = json.NewDecoder(r.Body).Decode(&body)
}
switch target {
case "nvidia":
if len(body.GPUIndices) > 0 || body.DiagLevel > 0 {
result, e := h.opts.App.RunNvidiaAcceptancePackWithOptions(
ctx, "", body.DiagLevel, body.GPUIndices,
)
if e != nil {
err = e
} else {
archive = result.Body
}
} else {
archive, err = h.opts.App.RunNvidiaAcceptancePack("")
}
case "memory":
archive, err = h.opts.App.RunMemoryAcceptancePack("")
case "storage":
archive, err = h.opts.App.RunStorageAcceptancePack("")
case "cpu":
dur := body.Duration
if dur <= 0 {
dur = 60
}
archive, err = h.opts.App.RunCPUAcceptancePack("", dur)
}
if err != nil {
if ctx.Err() != nil {
j.append("Aborted.")
j.finish("aborted")
} else {
j.append("ERROR: " + err.Error())
j.finish(err.Error())
}
return
}
j.append(fmt.Sprintf("Archive written: %s", archive))
j.finish("")
}()
writeJSON(w, map[string]string{"job_id": id})
name := taskNames[target]
if name == "" {
name = target
}
t := &Task{
ID: newJobID("sat-" + target),
Name: name,
Target: target,
Status: TaskPending,
CreatedAt: time.Now(),
params: taskParams{
Duration: body.Duration,
DiagLevel: body.DiagLevel,
GPUIndices: body.GPUIndices,
},
}
globalQueue.enqueue(t)
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
}
}
func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
id := r.URL.Query().Get("job_id")
j, ok := globalJobs.get(id)
if !ok {
http.Error(w, "job not found", http.StatusNotFound)
if id == "" {
id = r.URL.Query().Get("task_id")
}
if j, ok := globalQueue.findJob(id); ok {
streamJob(w, r, j)
return
}
streamJob(w, r, j)
if j, ok := globalJobs.get(id); ok {
streamJob(w, r, j)
return
}
http.Error(w, "job not found", http.StatusNotFound)
}
func (h *handler) handleAPISATAbort(w http.ResponseWriter, r *http.Request) {
id := r.URL.Query().Get("job_id")
j, ok := globalJobs.get(id)
if !ok {
http.Error(w, "job not found", http.StatusNotFound)
if id == "" {
id = r.URL.Query().Get("task_id")
}
if t, ok := globalQueue.findByID(id); ok {
globalQueue.mu.Lock()
switch t.Status {
case TaskPending:
t.Status = TaskCancelled
now := time.Now()
t.DoneAt = &now
case TaskRunning:
if t.job != nil {
t.job.abort()
}
t.Status = TaskCancelled
now := time.Now()
t.DoneAt = &now
}
globalQueue.mu.Unlock()
writeJSON(w, map[string]string{"status": "aborted"})
return
}
if j.abort() {
writeJSON(w, map[string]string{"status": "aborted"})
} else {
writeJSON(w, map[string]string{"status": "not_running"})
if j, ok := globalJobs.get(id); ok {
if j.abort() {
writeJSON(w, map[string]string{"status": "aborted"})
} else {
writeJSON(w, map[string]string{"status": "not_running"})
}
return
}
http.Error(w, "job not found", http.StatusNotFound)
}
// ── Services ──────────────────────────────────────────────────────────────────
@@ -401,6 +389,51 @@ func (h *handler) handleAPIExportBundle(w http.ResponseWriter, r *http.Request)
})
}
// ── GPU presence ──────────────────────────────────────────────────────────────
func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
gp := h.opts.App.DetectGPUPresence()
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]bool{
"nvidia": gp.Nvidia,
"amd": gp.AMD,
})
}
// ── System ────────────────────────────────────────────────────────────────────
func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
inRAM := h.opts.App.IsLiveMediaInRAM()
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]bool{"in_ram": inRAM})
}
func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) {
if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
t := &Task{
ID: newJobID("install-to-ram"),
Name: "Install to RAM",
Target: "install-to-ram",
Priority: 10,
Status: TaskPending,
CreatedAt: time.Now(),
}
globalQueue.enqueue(t)
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]string{"task_id": t.ID})
}
// ── Tools ─────────────────────────────────────────────────────────────────────
var standardTools = []string{
@@ -507,7 +540,7 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
h.installMu.Unlock()
logFile := platform.InstallLogPath(req.Device)
go runCmdJob(j, exec.CommandContext(r.Context(), "bee-install", req.Device, logFile))
go runCmdJob(j, exec.CommandContext(context.Background(), "bee-install", req.Device, logFile))
w.WriteHeader(http.StatusNoContent)
}
@@ -589,3 +622,95 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
}
}
}
// ── Network toggle ────────────────────────────────────────────────────────────
const netRollbackTimeout = 60 * time.Second
func (h *handler) handleAPINetworkToggle(w http.ResponseWriter, r *http.Request) {
if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured")
return
}
var req struct {
Iface string `json:"iface"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Iface == "" {
writeError(w, http.StatusBadRequest, "iface is required")
return
}
wasUp, err := h.opts.App.GetInterfaceState(req.Iface)
if err != nil {
writeError(w, http.StatusInternalServerError, err.Error())
return
}
if err := h.opts.App.SetInterfaceState(req.Iface, !wasUp); err != nil {
writeError(w, http.StatusInternalServerError, err.Error())
return
}
// Cancel any existing pending change (rollback it first).
h.pendingNetMu.Lock()
if h.pendingNet != nil {
prev := h.pendingNet
prev.mu.Lock()
prev.timer.Stop()
_ = h.opts.App.SetInterfaceState(prev.iface, prev.wasUp)
prev.mu.Unlock()
}
pnc := &pendingNetChange{iface: req.Iface, wasUp: wasUp}
pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
_ = h.opts.App.SetInterfaceState(req.Iface, wasUp)
h.pendingNetMu.Lock()
if h.pendingNet == pnc {
h.pendingNet = nil
}
h.pendingNetMu.Unlock()
})
h.pendingNet = pnc
h.pendingNetMu.Unlock()
newState := "up"
if wasUp {
newState = "down"
}
writeJSON(w, map[string]any{
"iface": req.Iface,
"new_state": newState,
"rollback_in": int(netRollbackTimeout.Seconds()),
})
}
func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) {
h.pendingNetMu.Lock()
pnc := h.pendingNet
h.pendingNet = nil
h.pendingNetMu.Unlock()
if pnc != nil {
pnc.mu.Lock()
pnc.timer.Stop()
pnc.mu.Unlock()
}
writeJSON(w, map[string]string{"status": "confirmed"})
}
func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Request) {
h.pendingNetMu.Lock()
pnc := h.pendingNet
h.pendingNet = nil
h.pendingNetMu.Unlock()
if pnc == nil {
writeError(w, http.StatusConflict, "no pending network change")
return
}
pnc.mu.Lock()
pnc.timer.Stop()
pnc.mu.Unlock()
if h.opts.App != nil {
_ = h.opts.App.SetInterfaceState(pnc.iface, pnc.wasUp)
}
writeJSON(w, map[string]string{"status": "rolled back"})
}