package webui import ( "encoding/json" "fmt" "html" "sort" "strings" "bee/audit/internal/platform" "bee/audit/internal/schema" ) type validateInventory struct { CPU string Memory string Storage string NVIDIA string AMD string NvidiaGPUCount int AMDGPUCount int } func validateFmtDur(secs int) string { if secs < 120 { return fmt.Sprintf("~%d s", secs) } mins := (secs + 29) / 60 return fmt.Sprintf("~%d min", mins) } func validateTotalValidateSec(n int) int { if n < 0 { n = 0 } total := platform.SATEstimatedCPUValidateSec + platform.SATEstimatedMemoryValidateSec + n*platform.SATEstimatedNvidiaGPUValidatePerGPUSec + platform.SATEstimatedNvidiaInterconnectSec + platform.SATEstimatedNvidiaBandwidthSec return total } func validateTotalStressSec(n int) int { if n < 0 { n = 0 } total := platform.SATEstimatedCPUStressSec + platform.SATEstimatedMemoryStressSec + n*platform.SATEstimatedNvidiaGPUStressPerGPUSec + n*platform.SATEstimatedNvidiaTargetedStressPerGPUSec + n*platform.SATEstimatedNvidiaTargetedPowerPerGPUSec + platform.SATEstimatedNvidiaPulseTestSec + platform.SATEstimatedNvidiaInterconnectSec + platform.SATEstimatedNvidiaBandwidthSec return total } func renderValidate(opts HandlerOptions) string { inv := loadValidateInventory(opts) n := inv.NvidiaGPUCount validateTotalStr := validateFmtDur(validateTotalValidateSec(n)) stressTotalStr := validateFmtDur(validateTotalStressSec(n)) gpuNote := "" if n > 0 { gpuNote = fmt.Sprintf(" (%d GPU)", n) } return `
Tasks continue in the background — view progress in Tasks.
Runs validate modules sequentially. Validate: ` + validateTotalStr + gpuNote + `; Stress: ` + stressTotalStr + gpuNote + `. Estimates are based on real log data and scale with GPU count.
lscpu, sensors, stress-ng`,
validateFmtDur(platform.SATEstimatedCPUValidateSec)+` in Validate (stress-ng 60 s). `+validateFmtDur(platform.SATEstimatedCPUStressSec)+` in Stress (stress-ng 30 min).`,
)) +
renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
inv.Memory,
`Runs a RAM validation pass and records memory state around the test.`,
`free, memtester`,
validateFmtDur(platform.SATEstimatedMemoryValidateSec)+` in Validate (256 MB × 1 pass). `+validateFmtDur(platform.SATEstimatedMemoryStressSec)+` in Stress (512 MB × 1 pass).`,
)) +
renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
inv.Storage,
`Scans all storage devices and runs the matching health or self-test path for each device type.`,
`lsblk; NVMe: nvme; SATA/SAS: smartctl`,
`Seconds in Validate (NVMe: instant device query; SATA/SAS: short self-test). Up to ~1 h per device in Stress (extended self-test, device-dependent).`,
)) +
`` + inv.NVIDIA + `
All NVIDIA validate tasks use only the GPUs selected here. The same selection is used by Validate one by one.
Loading NVIDIA GPUs...
Select at least one NVIDIA GPU to enable NVIDIA validate tasks.
nvidia-smi, dmidecode, dcgmi diag`,
func() string {
perV := platform.SATEstimatedNvidiaGPUValidatePerGPUSec
perS := platform.SATEstimatedNvidiaGPUStressPerGPUSec
if n > 0 {
return fmt.Sprintf("Validate: %s/GPU × %d = %s (Level 2, sequential). Stress: %s/GPU × %d = %s (Level 3, sequential).",
validateFmtDur(perV), n, validateFmtDur(perV*n),
validateFmtDur(perS), n, validateFmtDur(perS*n))
}
return fmt.Sprintf("Validate: %s/GPU (Level 2, sequential). Stress: %s/GPU (Level 3, sequential).",
validateFmtDur(perV), validateFmtDur(perS))
}(),
)) +
`dcgmi diag targeted_stress`,
func() string {
per := platform.SATEstimatedNvidiaTargetedStressPerGPUSec
s := "Skipped in Validate. "
if n > 0 {
s += fmt.Sprintf("Stress: %s/GPU × %d = %s sequential.", validateFmtDur(per), n, validateFmtDur(per*n))
} else {
s += fmt.Sprintf("Stress: %s/GPU sequential.", validateFmtDur(per))
}
return s + `Only runs in Stress mode. Switch mode above to enable in Run All.
` }(), )) + `dcgmi diag targeted_power`,
func() string {
per := platform.SATEstimatedNvidiaTargetedPowerPerGPUSec
s := "Skipped in Validate. "
if n > 0 {
s += fmt.Sprintf("Stress: %s/GPU × %d = %s sequential.", validateFmtDur(per), n, validateFmtDur(per*n))
} else {
s += fmt.Sprintf("Stress: %s/GPU sequential.", validateFmtDur(per))
}
return s + `Only runs in Stress mode. Switch mode above to enable in Run All.
` }(), )) + `dcgmi diag pulse_test`,
`Skipped in Validate. Stress: `+validateFmtDur(platform.SATEstimatedNvidiaPulseTestSec)+` (all GPUs simultaneously; measured on 8-GPU system).`+`Only runs in Stress mode. Switch mode above to enable in Run All.
`, )) + `all_reduce_perf (NCCL tests)`,
`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
)) +
`nvbandwidth`,
`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously; nvbandwidth runs all built-in tests without a time limit — duration set by the tool).`,
)) +
`rocm-smi, dmidecode; MEM Integrity: rvs mem; MEM Bandwidth: rocm-bandwidth-test, rvs babel`,
``,
)) +
`