storage SAT: split collect/self-test modes, add per-disk text reports

Check mode: read-only SMART/NVMe data collection, no self-test.
Load mode: same collection + short self-test (nvme device-self-test -s 1,
smartctl -t short). Card descriptions updated accordingly.

After each storage SAT run, a disk-N-devname-report.txt is written
per device into the runDir (auto-included in support bundles).
Web UI task page renders one card per disk directly below Task Report.

Also fixes pre-existing TestDashboardRendersRuntimeHealthTable failure:
test fixture used "inactive" status but code now treats inactive as OK
for completed oneshot services; updated to "failed" to match intent.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-06-30 19:07:36 +03:00
parent ea68318744
commit 386c0738ee
7 changed files with 591 additions and 26 deletions
+6 -6
View File
@@ -143,9 +143,9 @@ func renderValidateMode(opts HandlerOptions, stressDefault bool) string {
)) +
renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
inv.Storage,
`Scans all storage devices and runs the matching health or self-test path for each device type.`,
`<code>lsblk</code>; NVMe: <code>nvme</code>; SATA/SAS: <code>smartctl</code>`,
`Seconds in Validate (NVMe: instant device query; SATA/SAS: short self-test). Up to ~1 h per device in Stress (extended self-test, device-dependent).`,
`Collects SMART data and runs a short self-test on each storage device.`,
`<code>lsblk</code>; NVMe: <code>nvme id-ctrl</code>, <code>nvme smart-log</code>, <code>nvme device-self-test -s 1</code>; SATA/SAS: <code>smartctl -H -A</code>, <code>smartctl -t short</code>`,
`~2 min per device (NVMe short self-test; SATA/SAS short self-test — duration device-dependent).`,
)) +
`</div>
<div style="height:1px;background:var(--border);margin:16px 0"></div>
@@ -672,9 +672,9 @@ func renderCheck(opts HandlerOptions) string {
)) +
renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
inv.Storage,
`Scans all storage devices and runs the matching health or self-test path for each.`,
`<code>lsblk</code>; NVMe: <code>nvme</code>; SATA/SAS: <code>smartctl</code>`,
`Seconds (NVMe: instant device query; SATA/SAS: short self-test).`,
`Collects SMART health and attributes for each storage device. No self-test is triggered — read-only query only.`,
`<code>lsblk</code>; NVMe: <code>nvme id-ctrl</code>, <code>nvme smart-log</code>; SATA/SAS: <code>smartctl -H -A</code>`,
`Seconds instantaneous device query, no wear counters incremented.`,
)) +
`</div>
<div style="height:1px;background:var(--border);margin:16px 0"></div>
+2 -2
View File
@@ -1227,7 +1227,7 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
],
"services":[
{"name":"bee-web","status":"active"},
{"name":"bee-nvidia","status":"inactive"}
{"name":"bee-nvidia","status":"failed"}
]
}`
if err := os.WriteFile(filepath.Join(exportDir, "runtime-health.json"), []byte(health), 0644); err != nil {
@@ -1281,7 +1281,7 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
`Bee Services`,
`CUDA runtime is not ready for GPU SAT.`,
`Missing: nvidia-smi`,
`bee-nvidia=inactive`,
`bee-nvidia=failed`,
// Hardware Summary card — component health badges
`Hardware Summary`,
`>CPU<`,
+60
View File
@@ -232,6 +232,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
if powerCard := renderTaskPowerResultsCard(report.Target, logText); powerCard != "" {
b.WriteString(powerCard)
}
if report.Target == "storage" {
b.WriteString(renderStorageDiskReportCards(logText))
}
if len(report.Charts) > 0 {
for _, chart := range report.Charts {
@@ -369,3 +372,60 @@ func formatTaskDuration(sec int) string {
}
return fmt.Sprintf("%dh %02dm %02ds", sec/3600, (sec%3600)/60, sec%60)
}
// renderStorageDiskReportCards reads disk-*-report.txt files from the storage
// SAT run directory and renders one card per disk.
func renderStorageDiskReportCards(logText string) string {
runDir := taskStorageRunDirFromLog(logText)
if runDir == "" {
return ""
}
entries, err := os.ReadDir(runDir)
if err != nil {
return ""
}
var cards []string
for _, entry := range entries {
name := entry.Name()
if !strings.HasPrefix(name, "disk-") || !strings.HasSuffix(name, "-report.txt") {
continue
}
data, err := os.ReadFile(filepath.Join(runDir, name))
if err != nil || len(data) == 0 {
continue
}
// Extract disk label from filename: "disk-01-nvme0n1-report.txt" → "Disk 01 — nvme0n1"
stem := strings.TrimPrefix(strings.TrimSuffix(name, "-report.txt"), "disk-")
// stem is like "01-nvme0n1"
parts := strings.SplitN(stem, "-", 2)
title := "Disk " + stem
if len(parts) == 2 {
title = "Disk " + parts[0] + " — " + parts[1]
}
card := `<div class="card">` +
`<div class="card-head">` + html.EscapeString(title) + `</div>` +
`<div class="card-body" style="padding:0">` +
`<pre style="margin:0;padding:16px;font-size:12px;line-height:1.6;overflow-x:auto;white-space:pre">` +
html.EscapeString(string(data)) +
`</pre></div></div>`
cards = append(cards, card)
}
return strings.Join(cards, "\n")
}
// taskStorageRunDirFromLog finds the storage SAT run directory path logged as
// "Archive: /path/to/storage-YYYYMMDD-HHMMSS".
func taskStorageRunDirFromLog(logText string) string {
for _, line := range strings.Split(logText, "\n") {
line = strings.TrimSpace(line)
if !strings.HasPrefix(line, "Archive:") {
continue
}
path := strings.TrimSpace(strings.TrimPrefix(line, "Archive:"))
if strings.Contains(filepath.Base(path), "storage-") && !strings.HasSuffix(path, ".tar.gz") {
return path
}
}
return ""
}