package webui import ( "encoding/json" "fmt" "html" "net/url" "os" "path/filepath" "sort" "strings" "bee/audit/internal/app" "bee/audit/internal/schema" ) // ── Layout ──────────────────────────────────────────────────────────────────── func layoutHead(title string) string { return ` ` + html.EscapeString(title) + ` ` } func layoutNav(active string, buildLabel string) string { items := []struct{ id, label, href, onclick string }{ {"dashboard", "Dashboard", "/", ""}, {"audit", "Audit", "/audit", ""}, {"validate", "Validate", "/validate", ""}, {"burn", "Burn", "/burn", ""}, {"benchmark", "Benchmark", "/benchmark", ""}, {"tasks", "Tasks", "/tasks", ""}, {"tools", "Tools", "/tools", ""}, } var b strings.Builder b.WriteString(``) return b.String() } // renderPage dispatches to the appropriate page renderer. func renderPage(page string, opts HandlerOptions) string { var pageID, title, body string switch page { case "dashboard", "": pageID = "dashboard" title = "Dashboard" body = renderDashboard(opts) case "audit": pageID = "audit" title = "Audit" body = renderAudit() case "validate": pageID = "validate" title = "Validate" body = renderValidate(opts) case "burn": pageID = "burn" title = "Burn" body = renderBurn() case "benchmark": pageID = "benchmark" title = "Benchmark" body = renderBenchmark() case "tasks": pageID = "tasks" title = "Tasks" body = renderTasks() case "tools": pageID = "tools" title = "Tools" body = renderTools() // Legacy routes kept accessible but not in nav case "metrics": pageID = "metrics" title = "Live Metrics" body = renderMetrics() case "tests": pageID = "validate" title = "Acceptance Tests" body = renderValidate(opts) case "burn-in": pageID = "burn" title = "Burn-in Tests" body = renderBurn() case "network": pageID = "network" title = "Network" body = renderNetwork() case "services": pageID = "services" title = "Services" body = renderServices() case "export": pageID = "export" title = "Export" body = renderExport(opts.ExportDir) case "install": pageID = "install" title = "Install to Disk" body = renderInstall() default: pageID = "dashboard" title = "Not Found" body = `

Page not found.

` } return layoutHead(opts.Title+" — "+title) + layoutNav(pageID, opts.BuildLabel) + `

` + body + `

` + renderAuditModal() + `` + `` } // ── Dashboard ───────────────────────────────────────────────────────────────── func renderDashboard(opts HandlerOptions) string { var b strings.Builder b.WriteString(renderAuditStatusBanner(opts)) b.WriteString(renderHardwareSummaryCard(opts)) b.WriteString(renderHealthCard(opts)) b.WriteString(renderMetrics()) return b.String() } // renderAuditStatusBanner shows a live progress banner when an audit task is // running and auto-reloads the page when it completes. func renderAuditStatusBanner(opts HandlerOptions) string { // If audit data already exists, no banner needed — data is fresh. // We still inject the polling script so a newly-triggered audit also reloads. hasData := false if _, err := loadSnapshot(opts.AuditPath); err == nil { hasData = true } _ = hasData return ` ` } func renderAudit() string { return `

Audit Viewer

` } func renderHardwareSummaryCard(opts HandlerOptions) string { data, err := loadSnapshot(opts.AuditPath) if err != nil { return `

Hardware Summary

` } // Parse just enough fields for the summary banner var snap struct { Summary struct { CPU struct{ Model string } Memory struct{ TotalGB float64 } Storage []struct{ Device, Model, Size string } GPUs []struct{ Model string } PSUs []struct{ Model string } } Network struct { Interfaces []struct { Name string IPv4 []string State string } } } // Try to extract top-level fields loosely var raw map[string]json.RawMessage if err := json.Unmarshal(data, &raw); err != nil { return `

Hardware Summary

Parse error

` } _ = snap // Also load runtime-health for badges type componentHealth struct { FailCount int `json:"fail_count"` WarnCount int `json:"warn_count"` } type healthSummary struct { CPU componentHealth `json:"cpu"` Memory componentHealth `json:"memory"` Storage componentHealth `json:"storage"` GPU componentHealth `json:"gpu"` PSU componentHealth `json:"psu"` Network componentHealth `json:"network"` } var health struct { HardwareHealth healthSummary `json:"hardware_health"` } if hdata, herr := loadSnapshot(filepath.Join(opts.ExportDir, "runtime-health.json")); herr == nil { _ = json.Unmarshal(hdata, &health) } badge := func(h componentHealth) string { if h.FailCount > 0 { return `FAIL` } if h.WarnCount > 0 { return `WARN` } return `OK` } // Extract readable strings from raw JSON getString := func(key string) string { v, ok := raw[key] if !ok { return "" } var s string if err := json.Unmarshal(v, &s); err == nil { return s } return "" } cpuModel := getString("cpu_model") memStr := getString("memory_summary") gpuSummary := getString("gpu_summary") var b strings.Builder b.WriteString(`

Hardware Summary

`) b.WriteString(``) writeRow := func(label, value, badgeHTML string) { b.WriteString(fmt.Sprintf(``, html.EscapeString(label), html.EscapeString(value), badgeHTML)) } if cpuModel != "" { writeRow("CPU", cpuModel, badge(health.HardwareHealth.CPU)) } else { writeRow("CPU", "—", badge(health.HardwareHealth.CPU)) } if memStr != "" { writeRow("Memory", memStr, badge(health.HardwareHealth.Memory)) } else { writeRow("Memory", "—", badge(health.HardwareHealth.Memory)) } if gpuSummary != "" { writeRow("GPU", gpuSummary, badge(health.HardwareHealth.GPU)) } else { writeRow("GPU", "—", badge(health.HardwareHealth.GPU)) } writeRow("Storage", "—", badge(health.HardwareHealth.Storage)) writeRow("PSU", "—", badge(health.HardwareHealth.PSU)) b.WriteString(`

`) b.WriteString(`

`) return b.String() } func renderAuditModal() string { return ` ` } func renderHealthCard(opts HandlerOptions) string { data, err := loadSnapshot(filepath.Join(opts.ExportDir, "runtime-health.json")) if err != nil { return `

Runtime Health

No data

` } var health schema.RuntimeHealth if err := json.Unmarshal(data, &health); err != nil { return `

Runtime Health

Parse error

` } status := strings.TrimSpace(health.Status) if status == "" { status = "UNKNOWN" } badge := "badge-ok" if status == "PARTIAL" { badge = "badge-warn" } else if status == "FAIL" || status == "FAILED" { badge = "badge-err" } var b strings.Builder b.WriteString(`

Runtime Health

`) b.WriteString(fmt.Sprintf(`

`, badge, html.EscapeString(status))) if checkedAt := strings.TrimSpace(health.CheckedAt); checkedAt != "" { b.WriteString(`

Checked at: ` + html.EscapeString(checkedAt) + `

`) } rows := []runtimeHealthRow{ buildRuntimeExportRow(health), buildRuntimeNetworkRow(health), buildRuntimeDriverRow(health), buildRuntimeAccelerationRow(health), buildRuntimeToolsRow(health), buildRuntimeServicesRow(health), } rows = append(rows, buildHardwareComponentRows(opts.ExportDir)...) b.WriteString(``) for _, row := range rows { b.WriteString(``) } b.WriteString(`

Check	Status	Source	Issue
` + html.EscapeString(row.Title) + `	` + runtimeStatusBadge(row.Status) + `	` + html.EscapeString(row.Source) + `	` + rowIssueHTML(row.Issue) + `

`) b.WriteString(`

`) return b.String() } type runtimeHealthRow struct { Title string Status string Source string Issue string } func buildRuntimeExportRow(health schema.RuntimeHealth) runtimeHealthRow { issue := runtimeIssueDescriptions(health.Issues, "export_dir_unavailable") status := "UNKNOWN" switch { case issue != "": status = "FAILED" case strings.TrimSpace(health.ExportDir) != "": status = "OK" } source := "os.MkdirAll" if dir := strings.TrimSpace(health.ExportDir); dir != "" { source += " " + dir } return runtimeHealthRow{Title: "Export Directory", Status: status, Source: source, Issue: issue} } func buildRuntimeNetworkRow(health schema.RuntimeHealth) runtimeHealthRow { status := strings.TrimSpace(health.NetworkStatus) if status == "" { status = "UNKNOWN" } issue := runtimeIssueDescriptions(health.Issues, "dhcp_partial", "dhcp_failed") return runtimeHealthRow{Title: "Network", Status: status, Source: "ListInterfaces / DHCP", Issue: issue} } func buildRuntimeDriverRow(health schema.RuntimeHealth) runtimeHealthRow { issue := runtimeIssueDescriptions(health.Issues, "nvidia_kernel_module_missing", "nvidia_modeset_failed", "amdgpu_kernel_module_missing") status := "UNKNOWN" switch { case health.DriverReady && issue == "": status = "OK" case health.DriverReady: status = "PARTIAL" case issue != "": status = "FAILED" } return runtimeHealthRow{Title: "NVIDIA/AMD Driver", Status: status, Source: "lsmod / vendor probe", Issue: issue} } func buildRuntimeAccelerationRow(health schema.RuntimeHealth) runtimeHealthRow { issue := runtimeIssueDescriptions(health.Issues, "cuda_runtime_not_ready", "rocm_smi_unavailable") status := "UNKNOWN" switch { case health.CUDAReady && issue == "": status = "OK" case health.CUDAReady: status = "PARTIAL" case issue != "": status = "FAILED" } return runtimeHealthRow{Title: "CUDA / ROCm", Status: status, Source: "bee-gpu-burn / rocm-smi", Issue: issue} } func buildRuntimeToolsRow(health schema.RuntimeHealth) runtimeHealthRow { if len(health.Tools) == 0 { return runtimeHealthRow{Title: "Required Utilities", Status: "UNKNOWN", Source: "CheckTools", Issue: "No tool status data."} } missing := make([]string, 0) for _, tool := range health.Tools { if !tool.OK { missing = append(missing, tool.Name) } } status := "OK" issue := "" if len(missing) > 0 { status = "PARTIAL" issue = "Missing: " + strings.Join(missing, ", ") } return runtimeHealthRow{Title: "Required Utilities", Status: status, Source: "CheckTools", Issue: issue} } func buildRuntimeServicesRow(health schema.RuntimeHealth) runtimeHealthRow { if len(health.Services) == 0 { return runtimeHealthRow{Title: "Bee Services", Status: "UNKNOWN", Source: "systemctl is-active", Issue: "No service status data."} } nonActive := make([]string, 0) for _, svc := range health.Services { state := strings.TrimSpace(strings.ToLower(svc.Status)) if state != "active" { nonActive = append(nonActive, svc.Name+"="+svc.Status) } } status := "OK" issue := "" if len(nonActive) > 0 { status = "PARTIAL" issue = strings.Join(nonActive, ", ") } return runtimeHealthRow{Title: "Bee Services", Status: status, Source: "ServiceState", Issue: issue} } func buildHardwareComponentRows(exportDir string) []runtimeHealthRow { path := filepath.Join(exportDir, "component-status.json") db, err := app.OpenComponentStatusDB(path) if err != nil { return []runtimeHealthRow{ {Title: "CPU Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "Component status DB not available."}, {Title: "Memory Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "Component status DB not available."}, {Title: "Storage Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "Component status DB not available."}, {Title: "GPU Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "Component status DB not available."}, {Title: "PSU Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "No PSU component checks recorded."}, } } records := db.All() return []runtimeHealthRow{ aggregateComponentStatus("CPU", records, []string{"cpu:all"}, nil), aggregateComponentStatus("Memory", records, []string{"memory:all"}, []string{"memory:"}), aggregateComponentStatus("Storage", records, []string{"storage:all"}, []string{"storage:"}), aggregateComponentStatus("GPU", records, nil, []string{"pcie:gpu:"}), aggregateComponentStatus("PSU", records, nil, []string{"psu:"}), } } func aggregateComponentStatus(title string, records []app.ComponentStatusRecord, exact []string, prefixes []string) runtimeHealthRow { matched := make([]app.ComponentStatusRecord, 0) for _, rec := range records { key := strings.TrimSpace(rec.ComponentKey) if key == "" { continue } if containsExactKey(key, exact) || hasAnyPrefix(key, prefixes) { matched = append(matched, rec) } } if len(matched) == 0 { return runtimeHealthRow{Title: title, Status: "UNKNOWN", Source: "component-status.json", Issue: "No component status data."} } maxSev := -1 for _, rec := range matched { if sev := runtimeComponentSeverity(rec.Status); sev > maxSev { maxSev = sev } } status := "UNKNOWN" switch maxSev { case 3: status = "CRITICAL" case 2: status = "WARNING" case 1: status = "OK" } sources := make([]string, 0) sourceSeen := map[string]struct{}{} issues := make([]string, 0) issueSeen := map[string]struct{}{} for _, rec := range matched { if runtimeComponentSeverity(rec.Status) != maxSev { continue } source := latestComponentSource(rec) if source == "" { source = "component-status.json" } if _, ok := sourceSeen[source]; !ok { sourceSeen[source] = struct{}{} sources = append(sources, source) } issue := strings.TrimSpace(rec.ErrorSummary) if issue == "" { issue = latestComponentDetail(rec) } if issue == "" { continue } if _, ok := issueSeen[issue]; ok { continue } issueSeen[issue] = struct{}{} issues = append(issues, issue) } if len(sources) == 0 { sources = append(sources, "component-status.json") } issue := strings.Join(issues, "; ") if issue == "" { issue = "—" } return runtimeHealthRow{ Title: title, Status: status, Source: strings.Join(sources, ", "), Issue: issue, } } func containsExactKey(key string, exact []string) bool { for _, candidate := range exact { if key == candidate { return true } } return false } func hasAnyPrefix(key string, prefixes []string) bool { for _, prefix := range prefixes { if strings.HasPrefix(key, prefix) { return true } } return false } func runtimeComponentSeverity(status string) int { switch strings.TrimSpace(strings.ToLower(status)) { case "critical": return 3 case "warning": return 2 case "ok": return 1 default: return 0 } } func latestComponentSource(rec app.ComponentStatusRecord) string { if len(rec.History) == 0 { return "" } return strings.TrimSpace(rec.History[len(rec.History)-1].Source) } func latestComponentDetail(rec app.ComponentStatusRecord) string { if len(rec.History) == 0 { return "" } return strings.TrimSpace(rec.History[len(rec.History)-1].Detail) } func runtimeIssueDescriptions(issues []schema.RuntimeIssue, codes ...string) string { if len(issues) == 0 || len(codes) == 0 { return "" } allowed := make(map[string]struct{}, len(codes)) for _, code := range codes { allowed[code] = struct{}{} } messages := make([]string, 0) for _, issue := range issues { if _, ok := allowed[issue.Code]; !ok { continue } desc := strings.TrimSpace(issue.Description) if desc == "" { desc = issue.Code } messages = append(messages, desc) } return strings.Join(messages, "; ") } func runtimeStatusBadge(status string) string { status = strings.ToUpper(strings.TrimSpace(status)) badge := "badge-unknown" switch status { case "OK": badge = "badge-ok" case "PARTIAL", "WARNING", "WARN": badge = "badge-warn" case "FAIL", "FAILED", "CRITICAL": badge = "badge-err" } return `` + html.EscapeString(status) + `` } func rowIssueHTML(issue string) string { issue = strings.TrimSpace(issue) if issue == "" { return `—` } return html.EscapeString(issue) } // ── Metrics ─────────────────────────────────────────────────────────────────── func renderMetrics() string { return `

Live metrics — updated every 2 seconds.

Server — Load

Temperature — CPU

Temperature — Ambient Sensors

Server — Power

` } // ── Validate (Acceptance Tests) ─────────────────────────────────────────────── type validateInventory struct { CPU string Memory string Storage string NVIDIA string AMD string } func renderValidate(opts HandlerOptions) string { inv := loadValidateInventory(opts) return `

Non-destructive: Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.

Tasks continue in the background — view progress in Tasks.

Validate Profile

Cycles

Diag level

Runs validate modules sequentially with the selected cycle count. NVIDIA dcgmi diag uses the selected diag level from this profile.

` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody( inv.CPU, `Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`, `lscpu, sensors, stress-ng`, `Duration is taken from Validate Profile diag level: Level 1 = 60s, Level 2 = 5m, Level 3 = 1h, Level 4 = 1h.`, )) + renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody( inv.Memory, `Runs a short RAM validation pass and records memory state around the test.`, `free, memtester`, `No extra settings.`, )) + renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody( inv.Storage, `Scans all storage devices and runs the matching health or self-test path for each device type.`, `lsblk; NVMe: nvme; SATA/SAS: smartctl`, `No extra settings.`, )) + `

` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody( inv.NVIDIA, `Runs NVIDIA diagnostics and board inventory checks.`, `nvidia-smi, dmidecode, dcgmi diag`, `Runs one GPU at a time. Diag level is taken from Validate Profile.`, )) + renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody( inv.NVIDIA, `Runs a controlled NVIDIA DCGM load in Validate to check stability under moderate stress.`, `dcgmi diag targeted_stress`, `Runs one GPU at a time with the fixed DCGM targeted stress recipe.`, )) + `

` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody( inv.AMD, `Runs the selected AMD checks only. GPU Validate collects inventory; MEM Integrity uses the RVS MEM module; MEM Bandwidth uses rocm-bandwidth-test and the RVS BABEL module.`, `GPU Validate: rocm-smi, dmidecode; MEM Integrity: rvs mem; MEM Bandwidth: rocm-bandwidth-test, rvs babel`, `

GPU ValidateMEM IntegrityMEM Bandwidth

`, )) + `

` } func loadValidateInventory(opts HandlerOptions) validateInventory { unknown := "Audit snapshot not loaded." out := validateInventory{ CPU: unknown, Memory: unknown, Storage: unknown, NVIDIA: unknown, AMD: unknown, } data, err := loadSnapshot(opts.AuditPath) if err != nil { return out } var snap schema.HardwareIngestRequest if err := json.Unmarshal(data, &snap); err != nil { return out } cpuCounts := map[string]int{} cpuTotal := 0 for _, cpu := range snap.Hardware.CPUs { if cpu.Present != nil && !*cpu.Present { continue } cpuTotal++ addValidateModel(cpuCounts, validateFirstNonEmpty(validateTrimPtr(cpu.Model), validateTrimPtr(cpu.Manufacturer), "unknown")) } memCounts := map[string]int{} memTotal := 0 for _, dimm := range snap.Hardware.Memory { if dimm.Present != nil && !*dimm.Present { continue } memTotal++ addValidateModel(memCounts, validateFirstNonEmpty(validateTrimPtr(dimm.PartNumber), validateTrimPtr(dimm.Type), validateTrimPtr(dimm.Manufacturer), "unknown")) } storageCounts := map[string]int{} storageTotal := 0 for _, dev := range snap.Hardware.Storage { if dev.Present != nil && !*dev.Present { continue } storageTotal++ addValidateModel(storageCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown")) } nvidiaCounts := map[string]int{} nvidiaTotal := 0 amdCounts := map[string]int{} amdTotal := 0 for _, dev := range snap.Hardware.PCIeDevices { if dev.Present != nil && !*dev.Present { continue } if validateIsVendorGPU(dev, "nvidia") { nvidiaTotal++ addValidateModel(nvidiaCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown")) } if validateIsVendorGPU(dev, "amd") { amdTotal++ addValidateModel(amdCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown")) } } out.CPU = formatValidateDeviceSummary(cpuTotal, cpuCounts, "CPU") out.Memory = formatValidateDeviceSummary(memTotal, memCounts, "module") out.Storage = formatValidateDeviceSummary(storageTotal, storageCounts, "device") out.NVIDIA = formatValidateDeviceSummary(nvidiaTotal, nvidiaCounts, "GPU") out.AMD = formatValidateDeviceSummary(amdTotal, amdCounts, "GPU") return out } func renderValidateCardBody(devices, description, commands, settings string) string { return `

` + devices + `

` + `

` + description + `

` + `

` + commands + `

` + `

` + settings + `

` } func formatValidateDeviceSummary(total int, models map[string]int, unit string) string { if total == 0 { return "0 " + unit + "s detected." } keys := make([]string, 0, len(models)) for key := range models { keys = append(keys, key) } sort.Strings(keys) parts := make([]string, 0, len(keys)) for _, key := range keys { parts = append(parts, fmt.Sprintf("%d x %s", models[key], html.EscapeString(key))) } label := unit if total != 1 { label += "s" } return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", ")) } func addValidateModel(counts map[string]int, name string) { name = strings.TrimSpace(name) if name == "" { name = "unknown" } counts[name]++ } func validateTrimPtr(value *string) string { if value == nil { return "" } return strings.TrimSpace(*value) } func validateFirstNonEmpty(values ...string) string { for _, value := range values { value = strings.TrimSpace(value) if value != "" { return value } } return "" } func validateIsVendorGPU(dev schema.HardwarePCIeDevice, vendor string) bool { model := strings.ToLower(validateTrimPtr(dev.Model)) manufacturer := strings.ToLower(validateTrimPtr(dev.Manufacturer)) class := strings.ToLower(validateTrimPtr(dev.DeviceClass)) if strings.Contains(model, "aspeed") || strings.Contains(manufacturer, "aspeed") { return false } switch vendor { case "nvidia": return strings.Contains(model, "nvidia") || strings.Contains(manufacturer, "nvidia") case "amd": isGPUClass := class == "processingaccelerator" || class == "displaycontroller" || class == "videocontroller" isAMDVendor := strings.Contains(manufacturer, "advanced micro devices") || strings.Contains(manufacturer, "amd") || strings.Contains(manufacturer, "ati") isAMDModel := strings.Contains(model, "instinct") || strings.Contains(model, "radeon") || strings.Contains(model, "amd") return isGPUClass && (isAMDVendor || isAMDModel) default: return false } } func renderSATCard(id, label, runAction, headerActions, body string) string { actions := `` if strings.TrimSpace(headerActions) != "" { actions += headerActions } return fmt.Sprintf(`

`, label, actions, body) } // ── Benchmark ───────────────────────────────────────────────────────────────── func renderBenchmark() string { return `

Benchmark runs generate a human-readable TXT report and machine-readable result bundle. Tasks continue in the background — view progress in Tasks.

NVIDIA Benchmark

Profile

GPU Selection

Loading NVIDIA GPUs...

Run multi-GPU interconnect step (NCCL) only on the selected GPUs

Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.

Method

Each benchmark run performs warmup, sustained compute, telemetry capture, cooldown, and optional NCCL interconnect checks.

Profile	Purpose
Standard	Fast, repeatable performance check for server-to-server comparison.
Stability	Longer run for thermal drift, power caps, and clock instability.
Overnight	Extended verification of long-run stability and late throttling.

` } // ── Burn ────────────────────────────────────────────────────────────────────── func renderBurn() string { return `

⚠ Warning: Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.

Scope: DCGM diagnostics and ` + "targeted_stress" + ` remain in Validate. Burn exposes official NVIDIA load recipes by test goal plus separate custom stress tools.

Tasks continue in the background — view progress in Tasks.

Burn Profile

Preset

Smoke — quick check (~5 min) Acceptance — 1 hour Overnight — 8 hours

Run checked tests one by one. Tests run without cooldown. Each test duration is determined by the Burn Profile. Total test duration is the sum of all selected tests multiplied by the Burn Profile duration.

Run checked core test modules (CPU, MEM, GPU). Tests start at the same time and run for a period with short cooldown phases to stress the server cooling system.

NVIDIA GPU Selection

Official NVIDIA recipes and custom NVIDIA stressors use only the GPUs selected here. Multi-GPU interconnect tests are limited to this selection as well.

Loading NVIDIA GPUs...

Select at least one NVIDIA GPU to enable NVIDIA burn recipes.

Core Burn Paths

GPU Max Load

Combine vendor-backed and custom GPU max-load recipes in one run set. ` + "dcgmproftester" + ` is the primary official NVIDIA path; custom stressors remain available as parallel checkbox options.

NVIDIA Max Compute Load (dcgmproftester) GPU Burn (bee-gpu-burn) John GPU Stress (john/OpenCL) AMD GPU Stress (rvs gst)

Compute Stress

Select which subsystems to stress. Each checked item runs as a separate task.

CPU stress (stress-ng) Memory stress (stress-ng --vm) stressapptest (CPU + memory bus)

GPU-Specific Tests

Power Delivery / Power Budget

Official NVIDIA power-oriented recipes. ` + "targeted_power" + ` checks sustained delivery; ` + "pulse_test" + ` checks transient behavior.

NVIDIA Targeted Power (dcgmi diag targeted_power) NVIDIA Pulse Test (dcgmi diag pulse_test)

Interconnect / Bandwidth

Official NVIDIA fabric paths. NCCL is interconnect-only and is not a compute burn. NVBandwidth validates copy and bandwidth paths.

NVIDIA Interconnect Test (NCCL all_reduce_perf) NVIDIA Bandwidth Test (NVBandwidth)

` } // ── Network ─────────────────────────────────────────────────────────────────── // renderNetworkInline returns the network UI without a wrapping card (for embedding in Tools). func renderNetworkInline() string { return `

⚠ Network change applied. Reverting in 60s unless confirmed.

DHCP

Interface (leave empty for all)

Static IPv4

Interface

Address

Prefix length

Gateway

DNS (comma-separated)

` } func renderNetwork() string { return `

Network Interfaces

` + renderNetworkInline() + `

` } // ── Services ────────────────────────────────────────────────────────────────── func renderServicesInline() string { return `

` + html.EscapeString(`bee-selfheal.timer is expected to be active; the oneshot bee-selfheal.service itself is not shown as a long-running service.`) + `

` } func renderServices() string { return `

Bee Services

` + renderServicesInline() + `

` } // ── Export ──────────────────────────────────────────────────────────────────── func renderExport(exportDir string) string { entries, _ := listExportFiles(exportDir) var rows strings.Builder for _, e := range entries { rows.WriteString(fmt.Sprintf(`%s`, url.QueryEscape(e), html.EscapeString(e))) } if len(entries) == 0 { rows.WriteString(`No export files found.`) } return `

Support Bundle

Creates a tar.gz archive of all audit files, SAT results, and logs.

` + renderSupportBundleInline() + `

Export Files

` + rows.String() + `

File

` + renderUSBExportCard() } func listExportFiles(exportDir string) ([]string, error) { var entries []string err := filepath.Walk(strings.TrimSpace(exportDir), func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { return nil } rel, err := filepath.Rel(exportDir, path) if err != nil { return err } entries = append(entries, rel) return nil }) if err != nil && !os.IsNotExist(err) { return nil, err } sort.Strings(entries) return entries, nil } func renderSupportBundleInline() string { return `

` } func renderUSBExportCard() string { return `

Export to USB

` + renderUSBExportInline() + `

` } func renderUSBExportInline() string { return `

Write audit JSON or support bundle directly to a removable USB drive.

Scanning for USB devices...

` } // ── Display Resolution ──────────────────────────────────────────────────────── func renderDisplayInline() string { return `

Loading displays...

` } // ── Tools ───────────────────────────────────────────────────────────────────── func renderTools() string { return `

System Install

Install to RAM

Detecting boot source...

Checking...

Install to Disk

` + renderInstallInline() + `

Support Bundle

Downloads a tar.gz archive of all audit files, SAT results, and logs.

` + renderSupportBundleInline() + `

Export to USB

` + renderUSBExportInline() + `

Tool Check

Checking...

Network

` + renderNetworkInline() + `

Services

` + renderServicesInline() + `

Display Resolution

` + renderDisplayInline() + `

` } // ── Install to Disk ────────────────────────────────────────────────────────── func renderInstallInline() string { return `

Warning: Installing will completely erase the selected disk and write the live system onto it. All existing data on the target disk will be lost. This operation cannot be undone.

Loading disk list…

` } func renderInstall() string { return `

Install Live System to Disk

` + renderInstallInline() + `

` } // ── Tasks ───────────────────────────────────────────────────────────────────── func renderTasks() string { return `

Open a task to view its saved logs and charts.

` } func renderExportIndex(exportDir string) (string, error) { entries, err := listExportFiles(exportDir) if err != nil { return "", err } var body strings.Builder body.WriteString(`Bee Export Files`) body.WriteString(`

Bee Export Files

` + html.EscapeString(entry) + `
No export files found.

`) return body.String(), nil }