feat: task queue, UI overhaul, burn tests, install-to-RAM

- Task queue: all SAT/audit jobs enqueue and run one-at-a-time;
  tasks persist past page navigation; new Tasks page with cancel/priority/log stream
- UI: consolidate nav (Validate, Burn, Tasks, Tools); Audit becomes modal;
  Dashboard hardware summary badges + split metrics charts (load/temp/power);
  Tools page consolidates network, services, install, support bundle
- AMD GPU: acceptance test and stress burn cards; GPU presence API greys
  out irrelevant SAT cards automatically
- Burn tests: Memory Stress (stress-ng --vm), SAT Stress (stressapptest)
- Install to RAM: copies squashfs to /dev/shm, re-associates loop devices
  via LOOP_CHANGE_FD ioctl so live media can be ejected
- Charts: relative time axis (0 = now, negative left)
- memtester: LimitMEMLOCK=infinity in bee-web.service; empty output → UNSUPPORTED
- SAT overlay applied dynamically on every /audit.json serve
- MIME panic guard for LiveCD ramdisk I/O errors
- ISO: add memtest86+, stressapptest packages; memtest86+ GRUB entry;
  disable screensaver/DPMS in bee-openbox-session
- Unknown SAT status severity = 1 (does not override OK)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-28 21:15:11 +03:00
parent 911745e4da
commit 0a98ed8ae9
22 changed files with 1964 additions and 326 deletions

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"fmt"
"mime"
"net/http"
"os"
"path/filepath"
@@ -20,6 +21,17 @@ import (
const defaultTitle = "Bee Hardware Audit"
func init() {
// On some LiveCD ramdisk environments, /usr/share/mime/globs2 exists but
// causes an I/O error mid-read. Go's mime package panics (not errors) in
// that case, crashing the first HTTP goroutine that serves a static file.
// Pre-trigger initialization here with recover so subsequent calls are safe.
func() {
defer func() { recover() }() //nolint:errcheck
mime.TypeByExtension(".gz")
}()
}
// HandlerOptions configures the web UI handler.
type HandlerOptions struct {
Title string
@@ -31,14 +43,14 @@ type HandlerOptions struct {
// metricsRing holds a rolling window of live metric samples.
type metricsRing struct {
mu sync.Mutex
vals []float64
labels []string
size int
mu sync.Mutex
vals []float64
times []time.Time
size int
}
func newMetricsRing(size int) *metricsRing {
return &metricsRing{size: size, vals: make([]float64, 0, size), labels: make([]string, 0, size)}
return &metricsRing{size: size, vals: make([]float64, 0, size), times: make([]time.Time, 0, size)}
}
func (r *metricsRing) push(v float64) {
@@ -46,20 +58,40 @@ func (r *metricsRing) push(v float64) {
defer r.mu.Unlock()
if len(r.vals) >= r.size {
r.vals = r.vals[1:]
r.labels = r.labels[1:]
r.times = r.times[1:]
}
r.vals = append(r.vals, v)
r.labels = append(r.labels, time.Now().Format("15:04"))
r.times = append(r.times, time.Now())
}
func (r *metricsRing) snapshot() ([]float64, []string) {
r.mu.Lock()
defer r.mu.Unlock()
v := make([]float64, len(r.vals))
l := make([]string, len(r.labels))
copy(v, r.vals)
copy(l, r.labels)
return v, l
now := time.Now()
labels := make([]string, len(r.times))
for i, t := range r.times {
labels[i] = relAgeLabel(now.Sub(t))
}
return v, labels
}
func relAgeLabel(age time.Duration) string {
if age <= 0 {
return "0"
}
if age < time.Hour {
m := int(age.Minutes())
if m == 0 {
return "-<1m"
}
return fmt.Sprintf("-%dm", m)
}
if age < 24*time.Hour {
return fmt.Sprintf("-%dh", int(age.Hours()))
}
return fmt.Sprintf("-%dd", int(age.Hours()/24))
}
// gpuRings holds per-GPU ring buffers.
@@ -70,6 +102,14 @@ type gpuRings struct {
Power *metricsRing
}
// pendingNetChange tracks a network state change awaiting confirmation.
type pendingNetChange struct {
iface string
wasUp bool
timer *time.Timer
mu sync.Mutex
}
// handler is the HTTP handler for the web UI.
type handler struct {
opts HandlerOptions
@@ -87,6 +127,9 @@ type handler struct {
// install job (at most one at a time)
installJob *jobState
installMu sync.Mutex
// pending network change (rollback on timeout)
pendingNet *pendingNetChange
pendingNetMu sync.Mutex
}
// NewHandler creates the HTTP mux with all routes.
@@ -108,6 +151,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
ringMemLoad: newMetricsRing(120),
ringPower: newMetricsRing(120),
}
globalQueue.startWorker(&opts)
mux := http.NewServeMux()
// ── Infrastructure ──────────────────────────────────────────────────────
@@ -131,9 +175,20 @@ func NewHandler(opts HandlerOptions) http.Handler {
mux.HandleFunc("POST /api/sat/memory/run", h.handleAPISATRun("memory"))
mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd"))
mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress"))
mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress"))
mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress"))
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
// Tasks
mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
mux.HandleFunc("POST /api/tasks/cancel-all", h.handleAPITasksCancelAll)
mux.HandleFunc("POST /api/tasks/{id}/cancel", h.handleAPITasksCancel)
mux.HandleFunc("POST /api/tasks/{id}/priority", h.handleAPITasksPriority)
mux.HandleFunc("GET /api/tasks/{id}/stream", h.handleAPITasksStream)
// Services
mux.HandleFunc("GET /api/services", h.handleAPIServicesList)
mux.HandleFunc("POST /api/services/action", h.handleAPIServicesAction)
@@ -142,6 +197,9 @@ func NewHandler(opts HandlerOptions) http.Handler {
mux.HandleFunc("GET /api/network", h.handleAPINetworkStatus)
mux.HandleFunc("POST /api/network/dhcp", h.handleAPINetworkDHCP)
mux.HandleFunc("POST /api/network/static", h.handleAPINetworkStatic)
mux.HandleFunc("POST /api/network/toggle", h.handleAPINetworkToggle)
mux.HandleFunc("POST /api/network/confirm", h.handleAPINetworkConfirm)
mux.HandleFunc("POST /api/network/rollback", h.handleAPINetworkRollback)
// Export
mux.HandleFunc("GET /api/export/list", h.handleAPIExportList)
@@ -150,6 +208,13 @@ func NewHandler(opts HandlerOptions) http.Handler {
// Tools
mux.HandleFunc("GET /api/tools/check", h.handleAPIToolsCheck)
// GPU presence
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
// System
mux.HandleFunc("GET /api/system/ram-status", h.handleAPIRAMStatus)
mux.HandleFunc("POST /api/system/install-to-ram", h.handleAPIInstallToRAM)
// Preflight
mux.HandleFunc("GET /api/preflight", h.handleAPIPreflight)
@@ -197,6 +262,11 @@ func (h *handler) handleAuditJSON(w http.ResponseWriter, r *http.Request) {
http.Error(w, fmt.Sprintf("read audit snapshot: %v", err), http.StatusInternalServerError)
return
}
// Re-apply SAT overlay on every request so that SAT results run after the
// last audit always appear in the downloaded JSON without needing a re-audit.
if overlaid, err := app.ApplySATOverlay(data); err == nil {
data = overlaid
}
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "application/json; charset=utf-8")
_, _ = w.Write(data)
@@ -240,9 +310,33 @@ func (h *handler) handleExportFile(w http.ResponseWriter, r *http.Request) {
http.Error(w, "invalid path", http.StatusBadRequest)
return
}
// Set Content-Type explicitly to avoid mime.TypeByExtension which panics on
// LiveCD environments where /usr/share/mime/globs2 has an I/O read error.
w.Header().Set("Content-Type", mimeByExt(filepath.Ext(clean)))
http.ServeFile(w, r, filepath.Join(h.opts.ExportDir, clean))
}
// mimeByExt returns a Content-Type for known extensions, falling back to
// application/octet-stream. Used to avoid calling mime.TypeByExtension.
func mimeByExt(ext string) string {
switch strings.ToLower(ext) {
case ".json":
return "application/json"
case ".gz":
return "application/gzip"
case ".tar":
return "application/x-tar"
case ".log", ".txt":
return "text/plain; charset=utf-8"
case ".html":
return "text/html; charset=utf-8"
case ".svg":
return "image/svg+xml"
default:
return "application/octet-stream"
}
}
func (h *handler) handleExportIndex(w http.ResponseWriter, r *http.Request) {
body, err := renderExportIndex(h.opts.ExportDir)
if err != nil {
@@ -274,18 +368,35 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
var names []string
var labels []string
var title string
var yMin, yMax *float64 // nil = auto; for load charts fixed 0-100
switch {
case path == "server":
title = "Server"
vCPUTemp, l := h.ringCPUTemp.snapshot()
vCPULoad, _ := h.ringCPULoad.snapshot()
// ── Server sub-charts ─────────────────────────────────────────────────
case path == "server-load":
title = "CPU / Memory Load"
vCPULoad, l := h.ringCPULoad.snapshot()
vMemLoad, _ := h.ringMemLoad.snapshot()
vPower, _ := h.ringPower.snapshot()
labels = l
datasets = [][]float64{vCPUTemp, vCPULoad, vMemLoad, vPower}
names = []string{"CPU Temp °C", "CPU Load %", "Mem Load %", "Power W"}
datasets = [][]float64{vCPULoad, vMemLoad}
names = []string{"CPU Load %", "Mem Load %"}
yMin = floatPtr(0)
yMax = floatPtr(100)
case path == "server-temp":
title = "CPU Temperature"
vCPUTemp, l := h.ringCPUTemp.snapshot()
labels = l
datasets = [][]float64{vCPUTemp}
names = []string{"CPU Temp °C"}
yMin = floatPtr(0)
yMax = autoMax120(vCPUTemp)
case path == "server-power":
title = "Power & Fans"
vPower, l := h.ringPower.snapshot()
labels = l
datasets = [][]float64{vPower}
names = []string{"Power W"}
h.ringsMu.Lock()
for i, fr := range h.ringFans {
fv, _ := fr.snapshot()
@@ -297,11 +408,20 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
names = append(names, name+" RPM")
}
h.ringsMu.Unlock()
yMin = floatPtr(0)
yMax = autoMax120(datasets...)
// ── GPU sub-charts ────────────────────────────────────────────────────
case strings.HasPrefix(path, "gpu/"):
idxStr := strings.TrimPrefix(path, "gpu/")
rest := strings.TrimPrefix(path, "gpu/")
// rest is either "{idx}-load", "{idx}-temp", "{idx}-power", or legacy "{idx}"
sub := ""
if i := strings.LastIndex(rest, "-"); i > 0 {
sub = rest[i+1:]
rest = rest[:i]
}
idx := 0
fmt.Sscanf(idxStr, "%d", &idx)
fmt.Sscanf(rest, "%d", &idx)
h.ringsMu.Lock()
var gr *gpuRings
if idx < len(h.gpuRings) {
@@ -312,21 +432,71 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
http.NotFound(w, r)
return
}
vTemp, l := gr.Temp.snapshot()
vUtil, _ := gr.Util.snapshot()
vMemUtil, _ := gr.MemUtil.snapshot()
vPower, _ := gr.Power.snapshot()
labels = l
title = fmt.Sprintf("GPU %d", idx)
datasets = [][]float64{vTemp, vUtil, vMemUtil, vPower}
names = []string{"Temp °C", "Load %", "Mem %", "Power W"}
switch sub {
case "load":
vUtil, l := gr.Util.snapshot()
vMemUtil, _ := gr.MemUtil.snapshot()
labels = l
title = fmt.Sprintf("GPU %d Load", idx)
datasets = [][]float64{vUtil, vMemUtil}
names = []string{"Load %", "Mem %"}
yMin = floatPtr(0)
yMax = floatPtr(100)
case "temp":
vTemp, l := gr.Temp.snapshot()
labels = l
title = fmt.Sprintf("GPU %d Temperature", idx)
datasets = [][]float64{vTemp}
names = []string{"Temp °C"}
yMin = floatPtr(0)
yMax = autoMax120(vTemp)
default: // "power" or legacy (no sub)
vPower, l := gr.Power.snapshot()
labels = l
title = fmt.Sprintf("GPU %d Power", idx)
datasets = [][]float64{vPower}
names = []string{"Power W"}
yMin = floatPtr(0)
yMax = autoMax120(vPower)
}
default:
http.NotFound(w, r)
return
}
// Ensure all datasets same length as labels
buf, err := renderChartSVG(title, datasets, names, labels, yMin, yMax)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "image/svg+xml")
w.Header().Set("Cache-Control", "no-store")
_, _ = w.Write(buf)
}
// floatPtr returns a pointer to a float64 value.
func floatPtr(v float64) *float64 { return &v }
// autoMax120 returns 0→max+20% Y-axis max across all datasets.
func autoMax120(datasets ...[]float64) *float64 {
max := 0.0
for _, ds := range datasets {
for _, v := range ds {
if v > max {
max = v
}
}
}
if max == 0 {
return nil // let library auto-scale
}
v := max * 1.2
return &v
}
// renderChartSVG renders a line chart SVG with a fixed Y-axis range.
func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
n := len(labels)
if n == 0 {
n = 1
@@ -337,31 +507,25 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
datasets[i] = make([]float64, n)
}
}
sparse := sparseLabels(labels, 6)
opt := gocharts.NewLineChartOptionWithData(datasets)
opt.Title = gocharts.TitleOption{Text: title}
opt.XAxis.Labels = sparse
opt.Legend = gocharts.LegendOption{SeriesNames: names}
if yMin != nil || yMax != nil {
opt.YAxis = []gocharts.YAxisOption{{Min: yMin, Max: yMax}}
}
p := gocharts.NewPainter(gocharts.PainterOptions{
OutputFormat: gocharts.ChartOutputSVG,
Width: 1400,
Height: 280,
Height: 240,
}, gocharts.PainterThemeOption(gocharts.GetTheme("grafana")))
if err := p.LineChart(opt); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
return nil, err
}
buf, err := p.Bytes()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "image/svg+xml")
w.Header().Set("Cache-Control", "no-store")
_, _ = w.Write(buf)
return p.Bytes()
}
func safeIdx(s []float64, i int) float64 {
@@ -392,6 +556,15 @@ func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
if page == "" {
page = "dashboard"
}
// Redirect old routes to new names
switch page {
case "tests":
http.Redirect(w, r, "/validate", http.StatusMovedPermanently)
return
case "burn-in":
http.Redirect(w, r, "/burn", http.StatusMovedPermanently)
return
}
body := renderPage(page, h.opts)
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "text/html; charset=utf-8")