feat: task queue, UI overhaul, burn tests, install-to-RAM

- Task queue: all SAT/audit jobs enqueue and run one-at-a-time;
  tasks persist past page navigation; new Tasks page with cancel/priority/log stream
- UI: consolidate nav (Validate, Burn, Tasks, Tools); Audit becomes modal;
  Dashboard hardware summary badges + split metrics charts (load/temp/power);
  Tools page consolidates network, services, install, support bundle
- AMD GPU: acceptance test and stress burn cards; GPU presence API greys
  out irrelevant SAT cards automatically
- Burn tests: Memory Stress (stress-ng --vm), SAT Stress (stressapptest)
- Install to RAM: copies squashfs to /dev/shm, re-associates loop devices
  via LOOP_CHANGE_FD ioctl so live media can be ejected
- Charts: relative time axis (0 = now, negative left)
- memtester: LimitMEMLOCK=infinity in bee-web.service; empty output → UNSUPPORTED
- SAT overlay applied dynamically on every /audit.json serve
- MIME panic guard for LiveCD ramdisk I/O errors
- ISO: add memtest86+, stressapptest packages; memtest86+ GRUB entry;
  disable screensaver/DPMS in bee-openbox-session
- Unknown SAT status severity = 1 (does not override OK)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-28 21:15:11 +03:00
parent 911745e4da
commit 0a98ed8ae9
22 changed files with 1964 additions and 326 deletions

View File

@@ -76,6 +76,58 @@ func SampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
return sampleGPUMetrics(gpuIndices)
}
// sampleAMDGPUMetrics queries rocm-smi for live GPU metrics.
func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
// --showtemp --showuse --showpower --csv — one row per GPU
out, err := runROCmSMI("--showtemp", "--showuse", "--showpower", "--showmemuse", "--csv")
if err != nil {
return nil, err
}
var rows []GPUMetricRow
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(strings.ToLower(line), "device") {
continue
}
// CSV format: device,temp_c,gpu_use%,mem_use%,power_w (order may vary by rocm-smi version)
// We parse by column header from the first line.
parts := strings.Split(line, ",")
if len(parts) < 2 {
continue
}
idx := len(rows)
row := GPUMetricRow{GPUIndex: idx}
// rocm-smi CSV columns vary; extract what we can
for i, p := range parts {
p = strings.TrimSpace(p)
switch {
case i == 0:
// device index like "card0" or "0"
case strings.Contains(strings.ToLower(p), "n/a"):
// skip N/A
default:
// Try to match by position heuristic: temp, use%, memuse%, power
v := parseGPUFloat(p)
switch {
case i == 1 && row.TempC == 0:
row.TempC = v
case i == 2 && row.UsagePct == 0:
row.UsagePct = v
case i == 3 && row.MemUsagePct == 0:
row.MemUsagePct = v
case i == 4 && row.PowerW == 0:
row.PowerW = v
}
}
}
rows = append(rows, row)
}
if len(rows) == 0 {
return nil, fmt.Errorf("rocm-smi: no GPU rows parsed")
}
return rows, nil
}
// WriteGPUMetricsCSV writes collected rows as a CSV file.
func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
var b bytes.Buffer