feat(webui): add POST /api/sat/abort + update bible-local runtime-flows
- jobState now has optional cancel func; abort() calls it if job is running - handleAPISATRun passes cancellable context to RunNvidiaAcceptancePackWithOptions - POST /api/sat/abort?job_id=... cancels the running SAT job - bible-local/runtime-flows.md: replace TUI SAT flow with Web UI flow Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -155,8 +155,11 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
id := newJobID("sat-" + target)
|
id := newJobID("sat-" + target)
|
||||||
j := globalJobs.create(id)
|
j := globalJobs.create(id)
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
j.cancel = cancel
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
defer cancel()
|
||||||
j.append(fmt.Sprintf("Starting %s acceptance test...", target))
|
j.append(fmt.Sprintf("Starting %s acceptance test...", target))
|
||||||
var (
|
var (
|
||||||
archive string
|
archive string
|
||||||
@@ -178,7 +181,7 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
|||||||
case "nvidia":
|
case "nvidia":
|
||||||
if len(body.GPUIndices) > 0 || body.DiagLevel > 0 {
|
if len(body.GPUIndices) > 0 || body.DiagLevel > 0 {
|
||||||
result, e := h.opts.App.RunNvidiaAcceptancePackWithOptions(
|
result, e := h.opts.App.RunNvidiaAcceptancePackWithOptions(
|
||||||
context.Background(), "", body.DiagLevel, body.GPUIndices,
|
ctx, "", body.DiagLevel, body.GPUIndices,
|
||||||
)
|
)
|
||||||
if e != nil {
|
if e != nil {
|
||||||
err = e
|
err = e
|
||||||
@@ -201,8 +204,13 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
j.append("ERROR: " + err.Error())
|
if ctx.Err() != nil {
|
||||||
j.finish(err.Error())
|
j.append("Aborted.")
|
||||||
|
j.finish("aborted")
|
||||||
|
} else {
|
||||||
|
j.append("ERROR: " + err.Error())
|
||||||
|
j.finish(err.Error())
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
j.append(fmt.Sprintf("Archive written: %s", archive))
|
j.append(fmt.Sprintf("Archive written: %s", archive))
|
||||||
@@ -223,6 +231,20 @@ func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
|
|||||||
streamJob(w, r, j)
|
streamJob(w, r, j)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPISATAbort(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := r.URL.Query().Get("job_id")
|
||||||
|
j, ok := globalJobs.get(id)
|
||||||
|
if !ok {
|
||||||
|
http.Error(w, "job not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if j.abort() {
|
||||||
|
writeJSON(w, map[string]string{"status": "aborted"})
|
||||||
|
} else {
|
||||||
|
writeJSON(w, map[string]string{"status": "not_running"})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ── Services ──────────────────────────────────────────────────────────────────
|
// ── Services ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
func (h *handler) handleAPIServicesList(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleAPIServicesList(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|||||||
@@ -7,12 +7,23 @@ import (
|
|||||||
|
|
||||||
// jobState holds the output lines and completion status of an async job.
|
// jobState holds the output lines and completion status of an async job.
|
||||||
type jobState struct {
|
type jobState struct {
|
||||||
lines []string
|
lines []string
|
||||||
done bool
|
done bool
|
||||||
err string
|
err string
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
// subs is a list of channels that receive new lines as they arrive.
|
subs []chan string
|
||||||
subs []chan string
|
cancel func() // optional cancel function; nil if job is not cancellable
|
||||||
|
}
|
||||||
|
|
||||||
|
// abort cancels the job if it has a cancel function and is not yet done.
|
||||||
|
func (j *jobState) abort() bool {
|
||||||
|
j.mu.Lock()
|
||||||
|
defer j.mu.Unlock()
|
||||||
|
if j.done || j.cancel == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
j.cancel()
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (j *jobState) append(line string) {
|
func (j *jobState) append(line string) {
|
||||||
|
|||||||
@@ -132,6 +132,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
|
mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
|
||||||
mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
|
mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
|
||||||
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
|
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
|
||||||
|
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
|
||||||
|
|
||||||
// Services
|
// Services
|
||||||
mux.HandleFunc("GET /api/services", h.handleAPIServicesList)
|
mux.HandleFunc("GET /api/services", h.handleAPIServicesList)
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ Key checks: NVIDIA modules loaded, `nvidia-smi` sees all GPUs, lib symlinks pres
|
|||||||
systemd services running, audit completed with NVIDIA enrichment, LAN reachability.
|
systemd services running, audit completed with NVIDIA enrichment, LAN reachability.
|
||||||
|
|
||||||
Current validation state:
|
Current validation state:
|
||||||
- local/libvirt VM boot path is validated for `systemd`, SSH, `bee audit`, `bee-network`, and TUI startup
|
- local/libvirt VM boot path is validated for `systemd`, SSH, `bee audit`, `bee-network`, and Web UI startup
|
||||||
- real hardware validation is still required before treating the ISO as release-ready
|
- real hardware validation is still required before treating the ISO as release-ready
|
||||||
|
|
||||||
## Overlay mechanism
|
## Overlay mechanism
|
||||||
@@ -168,33 +168,17 @@ Acceptance flows:
|
|||||||
- `BEE_MEMTESTER_SIZE_MB`
|
- `BEE_MEMTESTER_SIZE_MB`
|
||||||
- `BEE_MEMTESTER_PASSES`
|
- `BEE_MEMTESTER_PASSES`
|
||||||
|
|
||||||
## NVIDIA SAT TUI flow (v1.0.0+)
|
## NVIDIA SAT Web UI flow
|
||||||
|
|
||||||
```
|
```
|
||||||
TUI: Acceptance tests → NVIDIA command pack
|
Web UI: Acceptance Tests page → Run Test button
|
||||||
1. screenNvidiaSATSetup
|
1. POST /api/sat/nvidia/run → returns job_id
|
||||||
a. enumerate GPUs via `nvidia-smi --query-gpu=index,name,memory.total`
|
2. GET /api/sat/stream?job_id=... (SSE) — streams stdout/stderr lines live
|
||||||
b. user selects duration preset: 10 min / 1 h / 8 h / 24 h
|
3. After completion — archive written to /appdata/bee/export/bee-sat/
|
||||||
c. user selects GPUs via checkboxes (all selected by default)
|
summary.txt contains overall_status (OK / FAILED) and per-job status values
|
||||||
d. memory size = max(selected GPU memory) — auto-detected, not exposed to user
|
|
||||||
2. Start → screenNvidiaSATRunning
|
|
||||||
a. CUDA_VISIBLE_DEVICES set to selected GPU indices
|
|
||||||
b. tea.Batch: SAT goroutine + tea.ExecProcess(nvtop) launched concurrently
|
|
||||||
c. nvtop occupies full terminal; SAT result queues in background
|
|
||||||
d. [o] reopen nvtop at any time; [a] abort (cancels context → kills bee-gpu-stress)
|
|
||||||
3. GPU metrics collection (during bee-gpu-stress)
|
|
||||||
- background goroutine polls `nvidia-smi` every second
|
|
||||||
- per-second rows: elapsed, GPU index, temp°C, usage%, power W, clock MHz
|
|
||||||
- outputs: gpu-metrics.csv, gpu-metrics.html (offline SVG chart), gpu-metrics-term.txt
|
|
||||||
4. After SAT completes
|
|
||||||
- result shown in screenOutput with terminal line-chart (gpu-metrics-term.txt)
|
|
||||||
- chart is asciigraph-style: box-drawing chars (╭╮╰╯─│), 4 series per GPU,
|
|
||||||
Y axis with ticks, ANSI colours (red=temp, blue=usage, green=power, yellow=clock)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**Critical invariants:**
|
**Critical invariants:**
|
||||||
- `nvtop` must be in `iso/builder/config/package-lists/bee.list.chroot` (baked into ISO).
|
- `bee-gpu-stress` uses `exec.CommandContext` — killed on job context cancel.
|
||||||
- `bee-gpu-stress` uses `exec.CommandContext` — aborted on cancel.
|
|
||||||
- Metric goroutine uses stopCh/doneCh pattern; main goroutine waits `<-doneCh` before reading rows (no mutex needed).
|
- Metric goroutine uses stopCh/doneCh pattern; main goroutine waits `<-doneCh` before reading rows (no mutex needed).
|
||||||
- If `nvtop` is not found on PATH, SAT still runs without it (graceful degradation).
|
|
||||||
- SVG chart is fully offline: no JS, no external CSS, pure inline SVG.
|
- SVG chart is fully offline: no JS, no external CSS, pure inline SVG.
|
||||||
|
|||||||
Reference in New Issue
Block a user