Files
bee/audit/internal/webui/server.go
Mikhail Chusavitin 8575cf06f8 webui: show all RAID drives per controller and add drive-prepare action
RAID Controller Management previously hid any LSI drive that wasn't
already Frgn/UGood/JBOD, and scoped VROC "free drives" from all system
disks instead of the ones actually wired to the VROC controller's
ports - drives attached directly to the CPU or another HBA could leak
in. Now every drive is listed per its own controller, and LSI drives
not already ready for array creation get a "Prepare" button that
forces them to Unconfigured Good via storcli.

Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
2026-07-01 13:32:03 +03:00

1475 lines
43 KiB
Go

package webui
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"html"
"io"
"log/slog"
"mime"
"net"
"net/http"
"os"
"path/filepath"
"runtime/debug"
"sort"
"strings"
"sync"
"time"
"bee/audit/internal/app"
"bee/audit/internal/platform"
"bee/audit/internal/runtimeenv"
"reanimator/chart/viewer"
"reanimator/chart/web"
)
const defaultTitle = "Bee Hardware Audit"
func init() {
// On some LiveCD ramdisk environments, /usr/share/mime/globs2 exists but
// causes an I/O error mid-read. Go's mime package panics (not errors) in
// that case, crashing the first HTTP goroutine that serves a static file.
// Pre-trigger initialization here with recover so subsequent calls are safe.
func() {
defer func() { recover() }() //nolint:errcheck
mime.TypeByExtension(".gz")
}()
}
// HandlerOptions configures the web UI handler.
type HandlerOptions struct {
Title string
BuildLabel string
AuditPath string
ExportDir string
App *app.App
RuntimeMode runtimeenv.Mode
}
// metricsRing holds a rolling window of live metric samples.
type metricsRing struct {
mu sync.Mutex
vals []float64
times []time.Time
size int
}
func newMetricsRing(size int) *metricsRing {
return &metricsRing{size: size, vals: make([]float64, 0, size), times: make([]time.Time, 0, size)}
}
func (r *metricsRing) push(v float64) {
r.mu.Lock()
defer r.mu.Unlock()
if len(r.vals) >= r.size {
r.vals = r.vals[1:]
r.times = r.times[1:]
}
r.vals = append(r.vals, v)
r.times = append(r.times, time.Now())
}
func (r *metricsRing) snapshot() ([]float64, []string) {
r.mu.Lock()
defer r.mu.Unlock()
v := make([]float64, len(r.vals))
copy(v, r.vals)
labels := make([]string, len(r.times))
if len(r.times) == 0 {
return v, labels
}
sameDay := timestampsSameLocalDay(r.times)
for i, t := range r.times {
labels[i] = formatTimelineLabel(t.Local(), sameDay)
}
return v, labels
}
func (r *metricsRing) latest() (float64, bool) {
r.mu.Lock()
defer r.mu.Unlock()
if len(r.vals) == 0 {
return 0, false
}
return r.vals[len(r.vals)-1], true
}
func timestampsSameLocalDay(times []time.Time) bool {
if len(times) == 0 {
return true
}
first := times[0].Local()
for _, t := range times[1:] {
local := t.Local()
if local.Year() != first.Year() || local.YearDay() != first.YearDay() {
return false
}
}
return true
}
func formatTimelineLabel(ts time.Time, sameDay bool) string {
if sameDay {
return ts.Format("15:04")
}
return ts.Format("01-02 15:04")
}
// gpuRings holds per-GPU ring buffers.
type gpuRings struct {
Temp *metricsRing
Util *metricsRing
MemUtil *metricsRing
Power *metricsRing
}
type namedMetricsRing struct {
Name string
Ring *metricsRing
}
// metricsChartWindow is the number of samples kept in the live ring buffer.
// At metricsCollectInterval = 5 s this covers 30 minutes of live history.
const metricsChartWindow = 360
// metricsDownsampleAge is the age after which old metrics rows are downsampled
// to 1 sample per minute. Data fresher than this is kept at full resolution.
const metricsDownsampleAge = 2 * time.Hour
// metricsRetainWindow is the total retention period for metrics rows.
// Rows older than this are deleted entirely by the background compactor.
const metricsRetainWindow = 48 * time.Hour
var metricsCollectInterval = 5 * time.Second
// pendingNetChange tracks a network state change awaiting confirmation.
type pendingNetChange struct {
snapshot platform.NetworkSnapshot
deadline time.Time
timer *time.Timer
mu sync.Mutex
}
// handler is the HTTP handler for the web UI.
type handler struct {
opts HandlerOptions
mux *http.ServeMux
// server rings
ringCPULoad *metricsRing
ringMemLoad *metricsRing
ringPower *metricsRing
ringFans []*metricsRing
fanNames []string
cpuTempRings []*namedMetricsRing
ambientTempRings []*namedMetricsRing
// per-GPU rings (index = GPU index)
gpuRings []*gpuRings
ringsMu sync.Mutex
latestMu sync.RWMutex
latest *platform.LiveMetricSample
// metrics persistence (nil if DB unavailable)
metricsDB *MetricsDB
// pending network change (rollback on timeout)
pendingNet *pendingNetChange
pendingNetMu sync.Mutex
// kmsg hardware error watcher
kmsg *kmsgWatcher
}
// NewHandler creates the HTTP mux with all routes.
func NewHandler(opts HandlerOptions) http.Handler {
if strings.TrimSpace(opts.Title) == "" {
opts.Title = defaultTitle
}
if strings.TrimSpace(opts.ExportDir) == "" {
opts.ExportDir = app.DefaultExportDir
}
if opts.RuntimeMode == "" {
opts.RuntimeMode = runtimeenv.ModeAuto
}
h := &handler{
opts: opts,
ringCPULoad: newMetricsRing(120),
ringMemLoad: newMetricsRing(120),
ringPower: newMetricsRing(120),
}
// Open metrics DB and pre-fill ring buffers from history.
if db, err := openMetricsDB(metricsDBPath); err == nil {
h.metricsDB = db
if samples, err := db.LoadRecent(metricsChartWindow); err == nil {
for _, s := range samples {
h.feedRings(s)
}
if len(samples) > 0 {
h.setLatestMetric(samples[len(samples)-1])
}
} else {
slog.Warn("metrics history unavailable", "path", metricsDBPath, "err", err)
}
} else {
slog.Warn("metrics db disabled", "path", metricsDBPath, "err", err)
}
h.startMetricsCollector()
// Start kmsg hardware error watcher if the app (and its status DB) is available.
if opts.App != nil {
h.kmsg = newKmsgWatcher(opts.App.StatusDB)
h.kmsg.start()
globalQueue.kmsgWatcher = h.kmsg
// Start periodic health poller for components that don't emit kernel log events (e.g. PSU).
if opts.App.StatusDB != nil {
newHealthPoller(opts.App.StatusDB).start()
}
}
globalQueue.startWorker(&opts)
mux := http.NewServeMux()
// ── Infrastructure ──────────────────────────────────────────────────────
mux.HandleFunc("GET /healthz", h.handleHealthz)
mux.HandleFunc("GET /api/ready", h.handleReady)
mux.HandleFunc("GET /loading", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = w.Write([]byte(loadingPageHTML))
})
// ── Existing read-only endpoints (preserved for compatibility) ──────────
mux.HandleFunc("GET /audit.json", h.handleAuditJSON)
mux.HandleFunc("GET /runtime-health.json", h.handleRuntimeHealthJSON)
mux.HandleFunc("GET /export/support.tar.gz", h.handleSupportBundleDownload)
mux.HandleFunc("GET /export/file", h.handleExportFile)
mux.HandleFunc("GET /export/", h.handleExportIndex)
mux.HandleFunc("GET /viewer", h.handleViewer)
// ── API ──────────────────────────────────────────────────────────────────
// Audit
mux.HandleFunc("POST /api/audit/run", h.handleAPIAuditRun)
mux.HandleFunc("GET /api/audit/stream", h.handleAPIAuditStream)
// SAT
mux.HandleFunc("POST /api/sat/nvidia/run", h.handleAPISATRun("nvidia"))
mux.HandleFunc("POST /api/sat/nvidia-targeted-stress/run", h.handleAPISATRun("nvidia-targeted-stress"))
mux.HandleFunc("POST /api/sat/nvidia-compute/run", h.handleAPISATRun("nvidia-compute"))
mux.HandleFunc("POST /api/sat/nvidia-targeted-power/run", h.handleAPISATRun("nvidia-targeted-power"))
mux.HandleFunc("POST /api/sat/nvidia-pulse/run", h.handleAPISATRun("nvidia-pulse"))
mux.HandleFunc("POST /api/sat/nvidia-interconnect/run", h.handleAPISATRun("nvidia-interconnect"))
mux.HandleFunc("POST /api/sat/nvidia-bandwidth/run", h.handleAPISATRun("nvidia-bandwidth"))
mux.HandleFunc("POST /api/sat/nvidia-stress/run", h.handleAPISATRun("nvidia-stress"))
mux.HandleFunc("POST /api/sat/memory/run", h.handleAPISATRun("memory"))
mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd"))
mux.HandleFunc("POST /api/sat/amd-mem/run", h.handleAPISATRun("amd-mem"))
mux.HandleFunc("POST /api/sat/amd-bandwidth/run", h.handleAPISATRun("amd-bandwidth"))
mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress"))
mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress"))
mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress"))
mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
mux.HandleFunc("POST /api/bee-bench/nvidia/perf/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf"))
mux.HandleFunc("POST /api/bee-bench/nvidia/power/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power"))
mux.HandleFunc("POST /api/bee-bench/nvidia/autotune/run", h.handleAPIBenchmarkAutotuneRun())
mux.HandleFunc("GET /api/bee-bench/nvidia/autotune/status", h.handleAPIBenchmarkAutotuneStatus)
mux.HandleFunc("GET /api/benchmark/results", h.handleAPIBenchmarkResults)
// Tasks
mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
mux.HandleFunc("POST /api/tasks/cancel-all", h.handleAPITasksCancelAll)
mux.HandleFunc("POST /api/tasks/kill-workers", h.handleAPITasksKillWorkers)
mux.HandleFunc("POST /api/tasks/{id}/cancel", h.handleAPITasksCancel)
mux.HandleFunc("POST /api/tasks/{id}/priority", h.handleAPITasksPriority)
mux.HandleFunc("GET /api/tasks/{id}/stream", h.handleAPITasksStream)
mux.HandleFunc("GET /api/tasks/{id}/charts", h.handleAPITaskChartsIndex)
mux.HandleFunc("GET /api/tasks/{id}/chart/", h.handleAPITaskChartSVG)
mux.HandleFunc("GET /tasks/{id}", h.handleTaskPage)
// Services
mux.HandleFunc("GET /api/services", h.handleAPIServicesList)
mux.HandleFunc("POST /api/services/action", h.handleAPIServicesAction)
// Network
mux.HandleFunc("GET /api/network", h.handleAPINetworkStatus)
mux.HandleFunc("POST /api/network/dhcp", h.handleAPINetworkDHCP)
mux.HandleFunc("POST /api/network/static", h.handleAPINetworkStatic)
mux.HandleFunc("POST /api/network/toggle", h.handleAPINetworkToggle)
mux.HandleFunc("POST /api/network/confirm", h.handleAPINetworkConfirm)
mux.HandleFunc("POST /api/network/rollback", h.handleAPINetworkRollback)
// Export
mux.HandleFunc("GET /api/export/list", h.handleAPIExportList)
mux.HandleFunc("GET /api/export/usb", h.handleAPIExportUSBTargets)
mux.HandleFunc("GET /api/blackbox/status", h.handleAPIBlackboxStatus)
mux.HandleFunc("POST /api/blackbox/enable", h.handleAPIBlackboxEnable)
mux.HandleFunc("POST /api/blackbox/disable", h.handleAPIBlackboxDisable)
// Tools
mux.HandleFunc("GET /api/tools/check", h.handleAPIToolsCheck)
mux.HandleFunc("GET /api/tools/nvme-formats", h.handleAPINVMeFormats)
mux.HandleFunc("POST /api/tools/nvme-format/run", h.handleAPINVMeFormatRun)
mux.HandleFunc("GET /api/tools/saa-dmi", h.handleAPISAADMIRead)
mux.HandleFunc("POST /api/tools/saa-dmi/write", h.handleAPISAADMIWrite)
mux.HandleFunc("GET /api/tools/ipmi-fru", h.handleAPIIPMIFRURead)
mux.HandleFunc("POST /api/tools/ipmi-fru/write", h.handleAPIIPMIFRUWrite)
mux.HandleFunc("GET /api/tools/huawei-elabel", h.handleAPIHuaweiElabelRead)
mux.HandleFunc("POST /api/tools/huawei-elabel/write", h.handleAPIHuaweiElabelWrite)
mux.HandleFunc("GET /api/tools/raid/status", h.handleAPIRAIDStatus)
mux.HandleFunc("POST /api/tools/raid/foreign", h.handleAPIRAIDForeignAction)
mux.HandleFunc("POST /api/tools/raid/create-mirror", h.handleAPIRAIDCreateMirror)
mux.HandleFunc("POST /api/tools/raid/prepare-drive", h.handleAPIRAIDPrepareDrive)
// GPU presence / tools
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
mux.HandleFunc("GET /api/gpu/nvidia", h.handleAPIGNVIDIAGPUs)
mux.HandleFunc("GET /api/gpu/nvidia-status", h.handleAPIGNVIDIAGPUStatuses)
mux.HandleFunc("POST /api/gpu/nvidia-reset", h.handleAPIGNVIDIAReset)
mux.HandleFunc("GET /api/gpu/tools", h.handleAPIGPUTools)
// System
mux.HandleFunc("GET /api/system/ram-status", h.handleAPIRAMStatus)
mux.HandleFunc("POST /api/system/install-to-ram", h.handleAPIInstallToRAM)
mux.HandleFunc("POST /api/system/reboot", h.handleAPISystemReboot)
mux.HandleFunc("POST /api/system/shutdown", h.handleAPISystemShutdown)
// Preflight
mux.HandleFunc("GET /api/preflight", h.handleAPIPreflight)
// Install
mux.HandleFunc("GET /api/install/disks", h.handleAPIInstallDisks)
mux.HandleFunc("POST /api/install/run", h.handleAPIInstallRun)
// Hardware component detail (fragment for modal in Hardware Summary card)
mux.HandleFunc("GET /api/hardware-summary", h.handleAPIHardwareSummary)
mux.HandleFunc("GET /api/components/{type}", h.handleAPIComponentDetail)
// Metrics — SSE stream of live sensor data + server-side SVG charts + CSV export
mux.HandleFunc("GET /api/metrics/stream", h.handleAPIMetricsStream)
mux.HandleFunc("GET /api/metrics/latest", h.handleAPIMetricsLatest)
mux.HandleFunc("GET /api/metrics/chart/", h.handleMetricsChartSVG)
mux.HandleFunc("GET /api/metrics/export.csv", h.handleAPIMetricsExportCSV)
// Reanimator chart static assets (viewer template expects /static/*)
mux.Handle("GET /static/", http.StripPrefix("/static/", web.Static()))
// ── Pages ────────────────────────────────────────────────────────────────
mux.HandleFunc("GET /", h.handlePage)
h.mux = mux
return recoverMiddleware(mux)
}
func (h *handler) startMetricsCollector() {
goRecoverLoop("metrics collector", 2*time.Second, func() {
ticker := time.NewTicker(metricsCollectInterval)
defer ticker.Stop()
pruneTicker := time.NewTicker(time.Hour)
defer pruneTicker.Stop()
for {
select {
case <-ticker.C:
sample := platform.SampleLiveMetrics()
if h.metricsDB != nil {
_ = h.metricsDB.Write(sample)
}
h.feedRings(sample)
h.setLatestMetric(sample)
case <-pruneTicker.C:
if h.metricsDB != nil {
now := time.Now().UTC()
_ = h.metricsDB.Downsample(now.Add(-metricsDownsampleAge), now.Add(-metricsRetainWindow))
_ = h.metricsDB.Prune(now.Add(-metricsRetainWindow))
}
}
}
})
}
func (h *handler) setLatestMetric(sample platform.LiveMetricSample) {
h.latestMu.Lock()
defer h.latestMu.Unlock()
cp := sample
h.latest = &cp
}
func (h *handler) latestMetric() (platform.LiveMetricSample, bool) {
h.latestMu.RLock()
defer h.latestMu.RUnlock()
if h.latest == nil {
return platform.LiveMetricSample{}, false
}
return *h.latest, true
}
// ListenAndServe starts the HTTP server.
func ListenAndServe(addr string, opts HandlerOptions) error {
srv := &http.Server{
Addr: addr,
Handler: NewHandler(opts),
ReadHeaderTimeout: 5 * time.Second,
ReadTimeout: 30 * time.Second,
IdleTimeout: 2 * time.Minute,
}
return srv.ListenAndServe()
}
type trackingResponseWriter struct {
http.ResponseWriter
wroteHeader bool
}
func (w *trackingResponseWriter) WriteHeader(statusCode int) {
w.wroteHeader = true
w.ResponseWriter.WriteHeader(statusCode)
}
func (w *trackingResponseWriter) Write(p []byte) (int, error) {
w.wroteHeader = true
return w.ResponseWriter.Write(p)
}
func (w *trackingResponseWriter) Flush() {
w.wroteHeader = true
if f, ok := w.ResponseWriter.(http.Flusher); ok {
f.Flush()
}
}
func (w *trackingResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
h, ok := w.ResponseWriter.(http.Hijacker)
if !ok {
return nil, nil, fmt.Errorf("hijacking not supported")
}
return h.Hijack()
}
func (w *trackingResponseWriter) Push(target string, opts *http.PushOptions) error {
p, ok := w.ResponseWriter.(http.Pusher)
if !ok {
return http.ErrNotSupported
}
return p.Push(target, opts)
}
func (w *trackingResponseWriter) ReadFrom(r io.Reader) (int64, error) {
rf, ok := w.ResponseWriter.(io.ReaderFrom)
if !ok {
return io.Copy(w.ResponseWriter, r)
}
w.wroteHeader = true
return rf.ReadFrom(r)
}
func recoverMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
tw := &trackingResponseWriter{ResponseWriter: w}
defer func() {
if rec := recover(); rec != nil {
slog.Error("http handler panic",
"method", r.Method,
"path", r.URL.Path,
"panic", fmt.Sprint(rec),
"stack", string(debug.Stack()),
)
if !tw.wroteHeader {
http.Error(tw, "internal server error", http.StatusInternalServerError)
}
}
}()
next.ServeHTTP(tw, r)
})
}
// ── Infrastructure handlers ──────────────────────────────────────────────────
func (h *handler) handleHealthz(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Cache-Control", "no-store")
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("ok"))
}
// ── Compatibility endpoints ──────────────────────────────────────────────────
func (h *handler) handleAuditJSON(w http.ResponseWriter, r *http.Request) {
data, err := loadSnapshot(h.opts.AuditPath)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
http.Error(w, "audit snapshot not found", http.StatusNotFound)
return
}
http.Error(w, fmt.Sprintf("read audit snapshot: %v", err), http.StatusInternalServerError)
return
}
// Re-apply SAT overlay on every request so that SAT results run after the
// last audit always appear in the downloaded JSON without needing a re-audit.
if overlaid, err := app.ApplySATOverlay(data); err == nil {
data = overlaid
}
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "application/json; charset=utf-8")
_, _ = w.Write(data)
}
func (h *handler) handleRuntimeHealthJSON(w http.ResponseWriter, r *http.Request) {
data, err := loadSnapshot(filepath.Join(h.opts.ExportDir, "runtime-health.json"))
if err != nil {
if errors.Is(err, os.ErrNotExist) {
http.Error(w, "runtime health not found", http.StatusNotFound)
return
}
http.Error(w, fmt.Sprintf("read runtime health: %v", err), http.StatusInternalServerError)
return
}
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "application/json; charset=utf-8")
_, _ = w.Write(data)
}
func (h *handler) handleSupportBundleDownload(w http.ResponseWriter, r *http.Request) {
archive, err := app.BuildSupportBundle(h.opts.ExportDir)
if err != nil {
http.Error(w, fmt.Sprintf("build support bundle: %v", err), http.StatusInternalServerError)
return
}
defer os.Remove(archive)
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "application/gzip")
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filepath.Base(archive)))
http.ServeFile(w, r, archive)
}
func (h *handler) handleExportFile(w http.ResponseWriter, r *http.Request) {
rel := strings.TrimSpace(r.URL.Query().Get("path"))
if rel == "" {
http.Error(w, "path is required", http.StatusBadRequest)
return
}
clean := filepath.Clean(rel)
if clean == "." || strings.HasPrefix(clean, "..") {
http.Error(w, "invalid path", http.StatusBadRequest)
return
}
// Set Content-Type explicitly to avoid mime.TypeByExtension which panics on
// LiveCD environments where /usr/share/mime/globs2 has an I/O read error.
w.Header().Set("Content-Type", mimeByExt(filepath.Ext(clean)))
http.ServeFile(w, r, filepath.Join(h.opts.ExportDir, clean))
}
// mimeByExt returns a Content-Type for known extensions, falling back to
// application/octet-stream. Used to avoid calling mime.TypeByExtension.
func mimeByExt(ext string) string {
switch strings.ToLower(ext) {
case ".json":
return "application/json"
case ".gz":
return "application/gzip"
case ".tar":
return "application/x-tar"
case ".log", ".txt":
return "text/plain; charset=utf-8"
case ".html":
return "text/html; charset=utf-8"
case ".svg":
return "image/svg+xml"
default:
return "application/octet-stream"
}
}
func (h *handler) handleExportIndex(w http.ResponseWriter, r *http.Request) {
body, err := renderExportIndex(h.opts.ExportDir)
if err != nil {
http.Error(w, fmt.Sprintf("render export index: %v", err), http.StatusInternalServerError)
return
}
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = w.Write([]byte(body))
}
func (h *handler) handleViewer(w http.ResponseWriter, r *http.Request) {
snapshot, _ := loadSnapshot(h.opts.AuditPath)
snapshot = enrichSnapshotForViewer(snapshot)
body, err := viewer.RenderHTML(snapshot, h.opts.Title)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = w.Write(body)
}
func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request) {
path := strings.TrimPrefix(r.URL.Path, "/api/metrics/chart/")
path = strings.TrimSuffix(path, ".svg")
if h.metricsDB == nil {
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
return
}
samples, err := h.metricsDB.LoadAll()
if err != nil || len(samples) == 0 {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
return
}
timeline := metricsTimelineSegments(samples, time.Now())
if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
var overviewOk bool
var buf []byte
buf, overviewOk, err = renderGPUOverviewChartSVG(idx, samples, timeline)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if !overviewOk {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
return
}
w.Header().Set("Content-Type", "image/svg+xml")
w.Header().Set("Cache-Control", "no-store")
_, _ = w.Write(buf)
return
}
datasets, names, labels, title, yMin, yMax, stacked, ok := chartDataFromSamples(path, samples)
if !ok {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
return
}
var buf []byte
if stacked {
buf, err = renderStackedMetricChartSVG(
title,
labels,
sampleTimes(samples),
datasets,
names,
yMax,
chartCanvasHeightForPath(path, len(names)),
timeline,
)
} else {
buf, err = renderMetricChartSVG(
title,
labels,
sampleTimes(samples),
datasets,
names,
yMin,
yMax,
chartCanvasHeightForPath(path, len(names)),
timeline,
)
}
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "image/svg+xml")
w.Header().Set("Cache-Control", "no-store")
_, _ = w.Write(buf)
}
func chartDataFromSamples(path string, samples []platform.LiveMetricSample) (datasets [][]float64, names []string, labels []string, title string, yMin, yMax *float64, stacked bool, ok bool) {
labels = sampleTimeLabels(samples)
switch {
case path == "server-load":
title = "CPU / Memory Load"
cpu := make([]float64, len(samples))
mem := make([]float64, len(samples))
for i, s := range samples {
cpu[i] = s.CPULoadPct
mem[i] = s.MemLoadPct
}
datasets = [][]float64{cpu, mem}
names = []string{"CPU Load %", "Mem Load %"}
yMin = floatPtr(0)
yMax = floatPtr(100)
case path == "server-temp", path == "server-temp-cpu":
title = "CPU Temperature"
datasets, names = namedTempDatasets(samples, "cpu")
yMin = floatPtr(0)
yMax = autoMax120(datasets...)
case path == "server-temp-gpu":
title = "GPU Temperature"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.TempC })
yMin = floatPtr(0)
yMax = autoMax120(datasets...)
case path == "server-temp-ambient":
title = "Ambient / Other Sensors"
datasets, names = namedTempDatasets(samples, "ambient")
yMin = floatPtr(0)
yMax = autoMax120(datasets...)
case path == "server-power":
title = "System Power"
power := make([]float64, len(samples))
label := "Power W"
for i, s := range samples {
power[i] = s.PowerW
if strings.TrimSpace(s.PowerSource) != "" {
label = fmt.Sprintf("Power W · %s", s.PowerSource)
if strings.TrimSpace(s.PowerMode) != "" {
label += fmt.Sprintf(" (%s)", s.PowerMode)
}
}
}
power = normalizePowerSeries(power)
datasets = [][]float64{power}
names = []string{label}
yMin = floatPtr(0)
yMax = autoMax120(power)
case path == "server-fans":
title = "Fan RPM"
datasets, names = namedFanDatasets(samples)
yMin, yMax = autoBounds120(datasets...)
case path == "gpu-all-load":
title = "GPU Compute Load"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
yMin = floatPtr(0)
yMax = floatPtr(100)
case path == "gpu-all-memload":
title = "GPU Memory Load"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
yMin = floatPtr(0)
yMax = floatPtr(100)
case path == "gpu-all-power":
title = "GPU Power"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.PowerW })
yMin, yMax = autoBounds120(datasets...)
case path == "gpu-all-temp":
title = "GPU Temperature"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.TempC })
yMin = floatPtr(0)
yMax = autoMax120(datasets...)
case path == "gpu-all-clock":
title = "GPU Core Clock"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
yMin, yMax = autoBounds120(datasets...)
case path == "gpu-all-memclock":
title = "GPU Memory Clock"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
yMin, yMax = autoBounds120(datasets...)
case strings.HasPrefix(path, "gpu/"):
idx, sub, ok := parseGPUChartPath(path)
if !ok {
return nil, nil, nil, "", nil, nil, false, false
}
switch sub {
case "load":
title = gpuDisplayLabel(idx) + " Load"
util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
if util == nil && mem == nil {
return nil, nil, nil, "", nil, nil, false, false
}
datasets = [][]float64{coalesceDataset(util, len(samples)), coalesceDataset(mem, len(samples))}
names = []string{"Load %", "Mem %"}
yMin = floatPtr(0)
yMax = floatPtr(100)
case "temp":
title = gpuDisplayLabel(idx) + " Temperature"
temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
if temp == nil {
return nil, nil, nil, "", nil, nil, false, false
}
datasets = [][]float64{temp}
names = []string{"Temp °C"}
yMin = floatPtr(0)
yMax = autoMax120(temp)
case "clock":
title = gpuDisplayLabel(idx) + " Core Clock"
clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
if clock == nil {
return nil, nil, nil, "", nil, nil, false, false
}
datasets = [][]float64{clock}
names = []string{"Core Clock MHz"}
yMin, yMax = autoBounds120(clock)
case "memclock":
title = gpuDisplayLabel(idx) + " Memory Clock"
clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
if clock == nil {
return nil, nil, nil, "", nil, nil, false, false
}
datasets = [][]float64{clock}
names = []string{"Memory Clock MHz"}
yMin, yMax = autoBounds120(clock)
default:
title = gpuDisplayLabel(idx) + " Power"
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
if power == nil {
return nil, nil, nil, "", nil, nil, false, false
}
datasets = [][]float64{power}
names = []string{"Power W"}
yMin, yMax = autoBounds120(power)
}
default:
return nil, nil, nil, "", nil, nil, false, false
}
return datasets, names, labels, title, yMin, yMax, stacked, len(datasets) > 0
}
func parseGPUChartPath(path string) (idx int, sub string, ok bool) {
if !strings.HasPrefix(path, "gpu/") {
return 0, "", false
}
rest := strings.TrimPrefix(path, "gpu/")
if rest == "" {
return 0, "", false
}
sub = ""
if i := strings.LastIndex(rest, "-"); i > 0 {
sub = rest[i+1:]
rest = rest[:i]
}
n, err := fmt.Sscanf(rest, "%d", &idx)
if err != nil || n != 1 {
return 0, "", false
}
return idx, sub, true
}
func sampleTimeLabels(samples []platform.LiveMetricSample) []string {
labels := make([]string, len(samples))
if len(samples) == 0 {
return labels
}
times := make([]time.Time, len(samples))
for i, s := range samples {
times[i] = s.Timestamp
}
sameDay := timestampsSameLocalDay(times)
for i, s := range samples {
labels[i] = formatTimelineLabel(s.Timestamp.Local(), sameDay)
}
return labels
}
func namedTempDatasets(samples []platform.LiveMetricSample, group string) ([][]float64, []string) {
seen := map[string]bool{}
var names []string
for _, s := range samples {
for _, t := range s.Temps {
if t.Group == group && !seen[t.Name] {
seen[t.Name] = true
names = append(names, t.Name)
}
}
}
sort.Strings(names)
datasets := make([][]float64, 0, len(names))
for _, name := range names {
ds := make([]float64, len(samples))
for i, s := range samples {
for _, t := range s.Temps {
if t.Group == group && t.Name == name {
ds[i] = t.Celsius
break
}
}
}
datasets = append(datasets, ds)
}
return datasets, names
}
func namedFanDatasets(samples []platform.LiveMetricSample) ([][]float64, []string) {
seen := map[string]bool{}
var names []string
for _, s := range samples {
for _, f := range s.Fans {
if !seen[f.Name] {
seen[f.Name] = true
names = append(names, f.Name)
}
}
}
sort.Strings(names)
datasets := make([][]float64, 0, len(names))
for _, name := range names {
ds := make([]float64, len(samples))
for i, s := range samples {
for _, f := range s.Fans {
if f.Name == name {
ds[i] = f.RPM
break
}
}
}
datasets = append(datasets, normalizeFanSeries(ds))
}
return datasets, names
}
func gpuDatasets(samples []platform.LiveMetricSample, pick func(platform.GPUMetricRow) float64) ([][]float64, []string) {
seen := map[int]bool{}
var indices []int
for _, s := range samples {
for _, g := range s.GPUs {
if !seen[g.GPUIndex] {
seen[g.GPUIndex] = true
indices = append(indices, g.GPUIndex)
}
}
}
sort.Ints(indices)
datasets := make([][]float64, 0, len(indices))
names := make([]string, 0, len(indices))
for _, idx := range indices {
ds := gpuDatasetByIndex(samples, idx, pick)
if ds == nil {
continue
}
datasets = append(datasets, ds)
names = append(names, gpuDisplayLabel(idx))
}
return datasets, names
}
func gpuDatasetByIndex(samples []platform.LiveMetricSample, idx int, pick func(platform.GPUMetricRow) float64) []float64 {
found := false
ds := make([]float64, len(samples))
for i, s := range samples {
for _, g := range s.GPUs {
if g.GPUIndex == idx {
ds[i] = pick(g)
found = true
break
}
}
}
if !found {
return nil
}
return ds
}
func coalesceDataset(ds []float64, n int) []float64 {
if ds != nil {
return ds
}
return make([]float64, n)
}
func normalizePowerSeries(ds []float64) []float64 {
if len(ds) == 0 {
return nil
}
out := make([]float64, len(ds))
copy(out, ds)
last := 0.0
haveLast := false
for i, v := range out {
if v > 0 {
last = v
haveLast = true
continue
}
if haveLast {
out[i] = last
}
}
return out
}
// psuSlotsFromSamples returns the sorted list of PSU slot numbers seen across samples.
func psuSlotsFromSamples(samples []platform.LiveMetricSample) []int {
seen := map[int]struct{}{}
for _, s := range samples {
for _, p := range s.PSUs {
seen[p.Slot] = struct{}{}
}
}
slots := make([]int, 0, len(seen))
for s := range seen {
slots = append(slots, s)
}
sort.Ints(slots)
return slots
}
// psuStackedTotal returns the point-by-point sum of all PSU datasets (for scale calculation).
func psuStackedTotal(datasets [][]float64) []float64 {
if len(datasets) == 0 {
return nil
}
n := len(datasets[0])
total := make([]float64, n)
for _, ds := range datasets {
for i, v := range ds {
total[i] += v
}
}
return total
}
func normalizeFanSeries(ds []float64) []float64 {
if len(ds) == 0 {
return nil
}
out := make([]float64, len(ds))
var lastPositive float64
for i, v := range ds {
if v > 0 {
lastPositive = v
out[i] = v
continue
}
if lastPositive > 0 {
out[i] = lastPositive
continue
}
out[i] = 0
}
return out
}
// floatPtr returns a pointer to a float64 value.
func floatPtr(v float64) *float64 { return &v }
// autoMax120 returns 0→max+20% Y-axis max across all datasets.
func autoMax120(datasets ...[]float64) *float64 {
max := 0.0
for _, ds := range datasets {
for _, v := range ds {
if v > max {
max = v
}
}
}
if max == 0 {
return nil // let library auto-scale
}
v := max * 1.2
return &v
}
func autoBounds120(datasets ...[]float64) (*float64, *float64) {
min := 0.0
max := 0.0
first := true
for _, ds := range datasets {
for _, v := range ds {
if first {
min, max = v, v
first = false
continue
}
if v < min {
min = v
}
if v > max {
max = v
}
}
}
if first {
return nil, nil
}
if max <= 0 {
return floatPtr(0), nil
}
span := max - min
if span <= 0 {
span = max * 0.1
if span <= 0 {
span = 1
}
}
pad := span * 0.2
low := min - pad
if low < 0 {
low = 0
}
high := max + pad
return floatPtr(low), floatPtr(high)
}
func gpuChartLabelIndices(total, target int) []int {
if total <= 0 {
return nil
}
if total == 1 {
return []int{0}
}
step := total / target
if step < 1 {
step = 1
}
var indices []int
for i := 0; i < total; i += step {
indices = append(indices, i)
}
if indices[len(indices)-1] != total-1 {
indices = append(indices, total-1)
}
return indices
}
func chartCanvasHeightForPath(path string, seriesCount int) int {
height := chartCanvasHeight(seriesCount)
if isGPUChartPath(path) {
return height * 2
}
return height
}
func isGPUChartPath(path string) bool {
return strings.HasPrefix(path, "gpu-all-") || strings.HasPrefix(path, "gpu/")
}
func chartLegendVisible(seriesCount int) bool {
return seriesCount <= 8
}
func chartCanvasHeight(seriesCount int) int {
if chartLegendVisible(seriesCount) {
return 360
}
return 288
}
// globalStats returns min, average, and max across all values in all datasets.
func globalStats(datasets [][]float64) (mn, avg, mx float64) {
var sum float64
var count int
first := true
for _, ds := range datasets {
for _, v := range ds {
if first {
mn, mx = v, v
first = false
}
if v < mn {
mn = v
}
if v > mx {
mx = v
}
sum += v
count++
}
}
if count > 0 {
avg = sum / float64(count)
}
return mn, avg, mx
}
func sanitizeChartText(s string) string {
if s == "" {
return ""
}
return html.EscapeString(strings.Map(func(r rune) rune {
if r < 0x20 && r != '\t' && r != '\n' && r != '\r' {
return -1
}
return r
}, s))
}
func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
var datasets [][]float64
var names []string
var labels []string
for _, item := range rings {
if item == nil || item.Ring == nil {
continue
}
vals, l := item.Ring.snapshot()
datasets = append(datasets, vals)
names = append(names, item.Name)
if len(labels) == 0 {
labels = l
}
}
return datasets, names, labels
}
func snapshotFanRings(rings []*metricsRing, fanNames []string) ([][]float64, []string, []string) {
var datasets [][]float64
var names []string
var labels []string
for i, ring := range rings {
if ring == nil {
continue
}
vals, l := ring.snapshot()
datasets = append(datasets, normalizeFanSeries(vals))
name := "Fan"
if i < len(fanNames) {
name = fanNames[i]
}
names = append(names, name+" RPM")
if len(labels) == 0 {
labels = l
}
}
return datasets, names, labels
}
func chartLegendNumber(v float64) string {
neg := v < 0
if v < 0 {
v = -v
}
var out string
switch {
case v >= 10000:
out = fmt.Sprintf("%dk", int((v+500)/1000))
case v >= 1000:
s := fmt.Sprintf("%.2f", v/1000)
s = strings.TrimRight(strings.TrimRight(s, "0"), ".")
out = strings.ReplaceAll(s, ".", ",") + "k"
default:
out = fmt.Sprintf("%.0f", v)
}
if neg {
return "-" + out
}
return out
}
func chartYAxisNumber(v float64) string {
neg := v < 0
if neg {
v = -v
}
var out string
switch {
case v >= 10000:
out = fmt.Sprintf("%dк", int((v+500)/1000))
case v >= 1000:
// Use one decimal place so ticks like 1400, 1600, 1800 read as
// "1,4к", "1,6к", "1,8к" instead of the ambiguous "1к"/"2к".
s := fmt.Sprintf("%.1f", v/1000)
s = strings.TrimRight(strings.TrimRight(s, "0"), ".")
out = strings.ReplaceAll(s, ".", ",") + "к"
default:
out = fmt.Sprintf("%.0f", v)
}
if neg {
return "-" + out
}
return out
}
func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Request) {
if h.metricsDB == nil {
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
return
}
w.Header().Set("Content-Type", "text/csv; charset=utf-8")
w.Header().Set("Content-Disposition", `attachment; filename="bee-metrics.csv"`)
w.Header().Set("Cache-Control", "no-store")
_ = h.metricsDB.ExportCSV(w)
}
// ── Page handler ─────────────────────────────────────────────────────────────
func (h *handler) handleReady(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Cache-Control", "no-store")
if strings.TrimSpace(h.opts.AuditPath) == "" {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("ready"))
return
}
if _, err := os.Stat(h.opts.AuditPath); err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("starting"))
return
}
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("ready"))
}
const loadingPageHTML = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>EASY-BEE — Starting</title>
<style>
*{margin:0;padding:0;box-sizing:border-box}
html,body{height:100%;background:#0f1117;display:flex;align-items:center;justify-content:center;font-family:'Courier New',monospace;color:#e2e8f0}
.wrap{text-align:center;width:420px}
.brand{font-size:22px;letter-spacing:.18em;color:#f6c90e;margin-bottom:6px;text-align:left}
.subtitle{font-size:12px;color:#a0aec0;text-align:left;margin-bottom:24px}
.spinner{width:36px;height:36px;border:3px solid #2d3748;border-top-color:#f6c90e;border-radius:50%;animation:spin .8s linear infinite;margin:0 auto 14px}
.spinner.hidden{display:none}
@keyframes spin{to{transform:rotate(360deg)}}
.status{font-size:13px;color:#a0aec0;margin-bottom:20px;min-height:18px}
table{width:100%;border-collapse:collapse;font-size:12px;margin-bottom:20px;display:none}
td{padding:3px 6px;text-align:left}
td:first-child{color:#718096;width:55%}
.ok{color:#68d391}
.run{color:#f6c90e}
.fail{color:#fc8181}
.dim{color:#4a5568}
.btn{background:#1a202c;color:#a0aec0;border:1px solid #2d3748;padding:7px 18px;font-size:12px;cursor:pointer;font-family:inherit;display:none}
.btn:hover{border-color:#718096;color:#e2e8f0}
</style>
</head>
<body>
<div class="wrap">
<div class="brand">EASY BEE</div>
<div class="subtitle">Hardware Audit LiveCD</div>
<div class="spinner" id="spin"></div>
<div class="status" id="st">Connecting to bee-web...</div>
<table id="tbl"></table>
<button class="btn" id="btn" onclick="go()">Open app now</button>
</div>
<script>
(function(){
var gone = false;
var pollStarted = false;
var fallbackOpenTimer = null;
var AUTO_OPEN_DELAY_MS = 15000;
function go(){ if(!gone){gone=true;window.location.replace('/');} }
function scheduleFallbackOpen(){
if(fallbackOpenTimer!==null) return;
fallbackOpenTimer=setTimeout(function(){
document.getElementById('spin').className='spinner hidden';
document.getElementById('st').textContent='Startup checks are taking too long — opening app...';
go();
},AUTO_OPEN_DELAY_MS);
}
function icon(s){
if(s==='active') return '<span class="ok">&#9679; active</span>';
if(s==='failed') return '<span class="fail">&#10005; failed</span>';
if(s==='activating'||s==='reloading') return '<span class="run">&#9675; starting</span>';
if(s==='inactive') return '<span class="dim">&#9675; inactive</span>';
return '<span class="dim">'+s+'</span>';
}
function allSettled(svcs){
for(var i=0;i<svcs.length;i++){
var s=svcs[i].state;
if(s!=='active'&&s!=='failed'&&s!=='inactive') return false;
}
return true;
}
var pollTimer=null;
function pollServices(){
fetch('/api/services',{cache:'no-store'})
.then(function(r){return r.json();})
.then(function(svcs){
if(!svcs||!svcs.length) return;
var tbl=document.getElementById('tbl');
tbl.style.display='';
var html='';
for(var i=0;i<svcs.length;i++)
html+='<tr><td>'+svcs[i].name+'</td><td>'+icon(svcs[i].state)+'</td></tr>';
tbl.innerHTML=html;
if(allSettled(svcs)){
clearInterval(pollTimer);
if(fallbackOpenTimer!==null) clearTimeout(fallbackOpenTimer);
document.getElementById('spin').className='spinner hidden';
document.getElementById('st').textContent='Ready \u2014 opening...';
setTimeout(go,800);
}
})
.catch(function(){});
}
function probe(){
fetch('/healthz',{cache:'no-store'})
.then(function(r){
if(r.ok){
document.getElementById('st').textContent='bee-web running \u2014 checking services...';
document.getElementById('btn').style.display='';
scheduleFallbackOpen();
if(!pollStarted){
pollStarted=true;
pollServices();
pollTimer=setInterval(pollServices,1500);
}
} else {
document.getElementById('st').textContent='bee-web starting (status '+r.status+')...';
setTimeout(probe,500);
}
})
.catch(function(){
document.getElementById('st').textContent='Waiting for bee-web to start...';
setTimeout(probe,500);
});
}
probe();
})();
</script>
</body>
</html>`
func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
page := strings.TrimPrefix(r.URL.Path, "/")
if page == "" {
page = "dashboard"
}
// Redirect legacy routes to new named pages
switch page {
case "validate", "tests":
http.Redirect(w, r, "/load", http.StatusMovedPermanently)
return
case "burn-in":
http.Redirect(w, r, "/burn", http.StatusMovedPermanently)
return
case "speed", "endurance":
http.Redirect(w, r, "/benchmark", http.StatusMovedPermanently)
return
}
body := renderPage(page, h.opts)
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = w.Write([]byte(body))
}
// ── Helpers ──────────────────────────────────────────────────────────────────
func loadSnapshot(path string) ([]byte, error) {
if strings.TrimSpace(path) == "" {
return nil, os.ErrNotExist
}
return os.ReadFile(path)
}
// writeJSON sends v as JSON with status 200.
func writeJSON(w http.ResponseWriter, v any) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.Header().Set("Cache-Control", "no-store")
_ = json.NewEncoder(w).Encode(v)
}
// writeError sends a JSON error response.
func writeError(w http.ResponseWriter, status int, msg string) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.Header().Set("Cache-Control", "no-store")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(map[string]string{"error": msg})
}