` + html.EscapeString(title) + `
`
}
func layoutNav(active string, buildLabel string) string {
items := []struct{ id, label, href, onclick string }{
{"dashboard", "Dashboard", "/", ""},
{"audit", "Audit", "/audit", ""},
{"validate", "Validate", "/validate", ""},
{"burn", "Burn", "/burn", ""},
{"benchmark", "Benchmark", "/benchmark", ""},
{"tasks", "Tasks", "/tasks", ""},
{"tools", "Tools", "/tools", ""},
}
var b strings.Builder
b.WriteString(``)
return b.String()
}
// renderPage dispatches to the appropriate page renderer.
func renderPage(page string, opts HandlerOptions) string {
var pageID, title, body string
switch page {
case "dashboard", "":
pageID = "dashboard"
title = "Dashboard"
body = renderDashboard(opts)
case "audit":
pageID = "audit"
title = "Audit"
body = renderAudit()
case "validate":
pageID = "validate"
title = "Validate"
body = renderValidate(opts)
case "burn":
pageID = "burn"
title = "Burn"
body = renderBurn()
case "benchmark":
pageID = "benchmark"
title = "Benchmark"
body = renderBenchmark(opts)
case "tasks":
pageID = "tasks"
title = "Tasks"
body = renderTasks()
case "tools":
pageID = "tools"
title = "Tools"
body = renderTools()
// Legacy routes kept accessible but not in nav
case "metrics":
pageID = "metrics"
title = "Live Metrics"
body = renderMetrics()
case "tests":
pageID = "validate"
title = "Acceptance Tests"
body = renderValidate(opts)
case "burn-in":
pageID = "burn"
title = "Burn-in Tests"
body = renderBurn()
case "network":
pageID = "network"
title = "Network"
body = renderNetwork()
case "services":
pageID = "services"
title = "Services"
body = renderServices()
case "export":
pageID = "export"
title = "Export"
body = renderExport(opts.ExportDir)
case "install":
pageID = "install"
title = "Install to Disk"
body = renderInstall()
default:
pageID = "dashboard"
title = "Not Found"
body = `
` +
renderAuditModal() +
`` +
``
}
// ── Dashboard ─────────────────────────────────────────────────────────────────
func renderDashboard(opts HandlerOptions) string {
var b strings.Builder
b.WriteString(renderAuditStatusBanner(opts))
b.WriteString(renderHardwareSummaryCard(opts))
b.WriteString(renderHealthCard(opts))
b.WriteString(renderMetrics())
return b.String()
}
// renderAuditStatusBanner shows a live progress banner when an audit task is
// running and auto-reloads the page when it completes.
func renderAuditStatusBanner(opts HandlerOptions) string {
// If audit data already exists, no banner needed — data is fresh.
// We still inject the polling script so a newly-triggered audit also reloads.
hasData := false
if _, err := loadSnapshot(opts.AuditPath); err == nil {
hasData = true
}
_ = hasData
return `
▶ Hardware audit is running — page will refresh automatically when complete.View in Tasks
`
}
var ingest schema.HardwareIngestRequest
if err := json.Unmarshal(data, &ingest); err != nil {
return `
Hardware Summary
Parse error
`
}
hw := ingest.Hardware
var records []app.ComponentStatusRecord
if db, err := app.OpenComponentStatusDB(filepath.Join(opts.ExportDir, "component-status.json")); err == nil {
records = db.All()
}
var b strings.Builder
b.WriteString(`
`
}
var health schema.RuntimeHealth
if err := json.Unmarshal(data, &health); err != nil {
return `
Runtime Health
Parse error
`
}
status := strings.TrimSpace(health.Status)
if status == "" {
status = "UNKNOWN"
}
badge := "badge-ok"
if status == "PARTIAL" {
badge = "badge-warn"
} else if status == "FAIL" || status == "FAILED" {
badge = "badge-err"
}
var b strings.Builder
b.WriteString(`
Non-destructive: Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.
Tasks continue in the background — view progress in Tasks.
Validate Profile
Runs validate modules sequentially with the selected cycle count and mode. Validate is quick (~5–15 min total); Stress is thorough (~30–60 min total).
` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
inv.CPU,
`Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`,
`lscpu, sensors, stress-ng`,
`60s in Validate, 30 min in Stress.`,
)) +
renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
inv.Memory,
`Runs a RAM validation pass and records memory state around the test.`,
`free, memtester`,
`256 MB / 1 pass in Validate, 1 GB / 3 passes in Stress.`,
)) +
renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
inv.Storage,
`Scans all storage devices and runs the matching health or self-test path for each device type.`,
`lsblk; NVMe: nvme; SATA/SAS: smartctl`,
`Short self-test in Validate, extended self-test in Stress.`,
)) +
`
NVIDIA GPU Selection
` + inv.NVIDIA + `
All NVIDIA validate tasks use only the GPUs selected here. The same selection is used by Validate one by one.
Loading NVIDIA GPUs...
Select at least one NVIDIA GPU to enable NVIDIA validate tasks.
` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody(
inv.NVIDIA,
`Runs NVIDIA diagnostics and board inventory checks.`,
`nvidia-smi, dmidecode, dcgmi diag`,
`Level 2 in Validate, Level 3 in Stress. Runs one GPU at a time on the selected NVIDIA GPUs.`,
)) +
`
` +
renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody(
inv.NVIDIA,
`Runs a controlled NVIDIA DCGM load to check stability under moderate stress.`,
`dcgmi diag targeted_stress`,
`Skipped in Validate mode. Runs after dcgmi diag in Stress mode. Runs one GPU at a time on the selected NVIDIA GPUs.
Only runs in Stress mode. Switch mode above to enable in Run All.
`,
)) +
`
` +
`
` +
renderSATCard("nvidia-targeted-power", "NVIDIA Targeted Power", "runNvidiaValidateSet('nvidia-targeted-power')", "", renderValidateCardBody(
inv.NVIDIA,
`Checks that the GPU can sustain its declared power delivery envelope. Pass/fail determined by DCGM.`,
`dcgmi diag targeted_power`,
`Skipped in Validate mode. Runs in Stress mode only. Runs one GPU at a time.
Only runs in Stress mode. Switch mode above to enable in Run All.
`,
)) +
`
` +
`
` +
renderSATCard("nvidia-pulse", "NVIDIA PSU Pulse Test", "runNvidiaFabricValidate('nvidia-pulse')", "", renderValidateCardBody(
inv.NVIDIA,
`Tests power supply transient response by pulsing all GPUs simultaneously between idle and full load. Synchronous pulses across all GPUs create worst-case PSU load spikes — running per-GPU would miss PSU-level failures.`,
`dcgmi diag pulse_test`,
`Skipped in Validate mode. Runs in Stress mode only. Runs all selected GPUs simultaneously — synchronous pulsing is required to stress the PSU.
Only runs in Stress mode. Switch mode above to enable in Run All.
`,
)) +
`
` +
`
` +
renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
inv.NVIDIA,
`Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs. Pass/fail based on achieved bandwidth vs. theoretical.`,
`all_reduce_perf (NCCL tests)`,
`Skipped in Validate mode. Runs in Stress mode only. Runs across all selected GPUs simultaneously (requires ≥2).
Only runs in Stress mode. Switch mode above to enable in Run All.
`,
)) +
`
` +
`
` +
renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
inv.NVIDIA,
`Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
`nvbandwidth`,
`Skipped in Validate mode. Runs in Stress mode only. Runs across all selected GPUs simultaneously.
Only runs in Stress mode. Switch mode above to enable in Run All.
`,
)) +
`
` +
`
` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody(
inv.AMD,
`Runs the selected AMD checks only. GPU Validate collects inventory; MEM Integrity uses the RVS MEM module; MEM Bandwidth uses rocm-bandwidth-test and the RVS BABEL module.`,
`GPU Validate: rocm-smi, dmidecode; MEM Integrity: rvs mem; MEM Bandwidth: rocm-bandwidth-test, rvs babel`,
``,
)) +
`
Test Output
`
}
func loadValidateInventory(opts HandlerOptions) validateInventory {
unknown := "Audit snapshot not loaded."
out := validateInventory{
CPU: unknown,
Memory: unknown,
Storage: unknown,
NVIDIA: unknown,
AMD: unknown,
}
data, err := loadSnapshot(opts.AuditPath)
if err != nil {
return out
}
var snap schema.HardwareIngestRequest
if err := json.Unmarshal(data, &snap); err != nil {
return out
}
cpuCounts := map[string]int{}
cpuTotal := 0
for _, cpu := range snap.Hardware.CPUs {
if cpu.Present != nil && !*cpu.Present {
continue
}
cpuTotal++
addValidateModel(cpuCounts, validateFirstNonEmpty(validateTrimPtr(cpu.Model), validateTrimPtr(cpu.Manufacturer), "unknown"))
}
memCounts := map[string]int{}
memTotal := 0
for _, dimm := range snap.Hardware.Memory {
if dimm.Present != nil && !*dimm.Present {
continue
}
memTotal++
addValidateModel(memCounts, validateFirstNonEmpty(validateTrimPtr(dimm.PartNumber), validateTrimPtr(dimm.Type), validateTrimPtr(dimm.Manufacturer), "unknown"))
}
storageCounts := map[string]int{}
storageTotal := 0
for _, dev := range snap.Hardware.Storage {
if dev.Present != nil && !*dev.Present {
continue
}
storageTotal++
addValidateModel(storageCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
}
nvidiaCounts := map[string]int{}
nvidiaTotal := 0
amdCounts := map[string]int{}
amdTotal := 0
for _, dev := range snap.Hardware.PCIeDevices {
if dev.Present != nil && !*dev.Present {
continue
}
if validateIsVendorGPU(dev, "nvidia") {
nvidiaTotal++
addValidateModel(nvidiaCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
}
if validateIsVendorGPU(dev, "amd") {
amdTotal++
addValidateModel(amdCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
}
}
out.CPU = formatValidateDeviceSummary(cpuTotal, cpuCounts, "CPU")
out.Memory = formatValidateDeviceSummary(memTotal, memCounts, "module")
out.Storage = formatValidateDeviceSummary(storageTotal, storageCounts, "device")
out.NVIDIA = formatValidateDeviceSummary(nvidiaTotal, nvidiaCounts, "GPU")
out.AMD = formatValidateDeviceSummary(amdTotal, amdCounts, "GPU")
return out
}
func renderValidateCardBody(devices, description, commands, settings string) string {
return `
` + devices + `
` +
`
` + description + `
` +
`
` + commands + `
` +
`
` + settings + `
`
}
func formatValidateDeviceSummary(total int, models map[string]int, unit string) string {
if total == 0 {
return "0 " + unit + "s detected."
}
keys := make([]string, 0, len(models))
for key := range models {
keys = append(keys, key)
}
sort.Strings(keys)
parts := make([]string, 0, len(keys))
for _, key := range keys {
parts = append(parts, fmt.Sprintf("%d x %s", models[key], html.EscapeString(key)))
}
label := unit
if total != 1 {
label += "s"
}
return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", "))
}
func addValidateModel(counts map[string]int, name string) {
name = strings.TrimSpace(name)
if name == "" {
name = "unknown"
}
counts[name]++
}
func validateTrimPtr(value *string) string {
if value == nil {
return ""
}
return strings.TrimSpace(*value)
}
func validateFirstNonEmpty(values ...string) string {
for _, value := range values {
value = strings.TrimSpace(value)
if value != "" {
return value
}
}
return ""
}
func validateIsVendorGPU(dev schema.HardwarePCIeDevice, vendor string) bool {
model := strings.ToLower(validateTrimPtr(dev.Model))
manufacturer := strings.ToLower(validateTrimPtr(dev.Manufacturer))
class := strings.ToLower(validateTrimPtr(dev.DeviceClass))
if strings.Contains(model, "aspeed") || strings.Contains(manufacturer, "aspeed") {
return false
}
switch vendor {
case "nvidia":
return strings.Contains(model, "nvidia") || strings.Contains(manufacturer, "nvidia")
case "amd":
isGPUClass := class == "processingaccelerator" || class == "displaycontroller" || class == "videocontroller"
isAMDVendor := strings.Contains(manufacturer, "advanced micro devices") || strings.Contains(manufacturer, "amd") || strings.Contains(manufacturer, "ati")
isAMDModel := strings.Contains(model, "instinct") || strings.Contains(model, "radeon") || strings.Contains(model, "amd")
return isGPUClass && (isAMDVendor || isAMDModel)
default:
return false
}
}
func renderSATCard(id, label, runAction, headerActions, body string) string {
actions := ``
if strings.TrimSpace(headerActions) != "" {
actions += headerActions
}
return fmt.Sprintf(`
%s
%s
%s
`,
label, actions, body)
}
// ── Benchmark ─────────────────────────────────────────────────────────────────
type benchmarkHistoryColumn struct {
key string
label string
name string
index int
parallel bool
}
type benchmarkHistoryCell struct {
score float64
present bool
}
type benchmarkHistoryRun struct {
generatedAt time.Time
displayTime string
cells map[string]benchmarkHistoryCell
}
func renderBenchmark(opts HandlerOptions) string {
return `
Benchmark runs generate a human-readable TXT report and machine-readable result bundle. Tasks continue in the background — view progress in Tasks.
NVIDIA Benchmark
Loading NVIDIA GPUs...
Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.
Method
Each benchmark run performs warmup, sustained compute, telemetry capture, cooldown, and optional NCCL interconnect checks.
Profile
Purpose
Standard
Fast, repeatable performance check for server-to-server comparison.
Stability
Longer run for thermal drift, power caps, and clock instability.
Overnight
Extended verification of long-run stability and late throttling.
Tasks continue in the background — view progress in Tasks.
Burn Profile
Run checked tests one by one. Tests run without cooldown. Each test duration is determined by the Burn Profile. Total test duration is the sum of all selected tests multiplied by the Burn Profile duration.
Run checked core test modules (CPU, MEM, GPU). Tests start at the same time and run for a period with short cooldown phases to stress the server cooling system.
NVIDIA GPU Selection
Official NVIDIA recipes and custom NVIDIA stressors use only the GPUs selected here. Multi-GPU interconnect tests are limited to this selection as well.
Loading NVIDIA GPUs...
Select at least one NVIDIA GPU to enable NVIDIA burn recipes.
Core Burn Paths
GPU Max Load
Combine vendor-backed and custom GPU max-load recipes in one run set. ` + "dcgmproftester" + ` is the primary official NVIDIA path; custom stressors remain available as parallel checkbox options.
Compute Stress
Select which subsystems to stress. Each checked item runs as a separate task.
Output
`
}
// ── Network ───────────────────────────────────────────────────────────────────
// renderNetworkInline returns the network UI without a wrapping card (for embedding in Tools).
func renderNetworkInline() string {
return `
⚠ Network change applied. Reverting in 60s unless confirmed.
` + html.EscapeString(`bee-selfheal.timer is expected to be active; the oneshot bee-selfheal.service itself is not shown as a long-running service.`) + `
Loading...
Output
`
}
func renderServices() string {
return `
Bee Services
` +
renderServicesInline() +
`
`
}
// ── Export ────────────────────────────────────────────────────────────────────
func renderExport(exportDir string) string {
entries, _ := listExportFiles(exportDir)
var rows strings.Builder
for _, e := range entries {
rows.WriteString(fmt.Sprintf(`
Downloads a tar.gz archive of all audit files, SAT results, and logs.
` + renderSupportBundleInline() + `
Export to USB
` + renderUSBExportInline() + `
Tool Check
Checking...
NVIDIA Self Heal
` +
renderNvidiaSelfHealInline() + `
Network
` +
renderNetworkInline() + `
Services
` +
renderServicesInline() + `
`
}
// ── Install to Disk ──────────────────────────────────────────────────────────
func renderInstallInline() string {
return `
Warning: Installing will completely erase the selected
disk and write the live system onto it. All existing data on the target disk will be lost.
This operation cannot be undone.