smartctl -t short only launches the self-test and returns immediately
("Testing has begun"); unlike nvme device-self-test --wait, it has no
blocking mode. Validate/Load runs closed the task and produced reports
before the drive actually finished the test. Now poll smartctl -a until
the test completes (or times out) and report the real result.
Also add a per-disk "Resource" section with pseudographic progress bars
for uptime (vs 5y design life), bytes written (vs 1 DWPD x 5y budget),
and bytes read (percent from SMART attribute 242), all rendered in
human-scaled units (days/years, TB/PB) instead of raw hour/byte counts.
Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
518 lines
16 KiB
Go
518 lines
16 KiB
Go
package platform
|
||
|
||
import (
|
||
"encoding/json"
|
||
"fmt"
|
||
"math"
|
||
"path/filepath"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
// GenerateDiskReportText builds a human-readable text report for one storage
|
||
// device from the raw command outputs collected during storage SAT.
|
||
//
|
||
// outputs keys match satJob.name: "nvme-id-ctrl", "nvme-smart-log",
|
||
// "smartctl-health", "smartctl-self-test-short".
|
||
func GenerateDiskReportText(index int, devPath string, outputs map[string][]byte, ts time.Time) string {
|
||
var b strings.Builder
|
||
devName := filepath.Base(devPath)
|
||
line := strings.Repeat("=", 80)
|
||
b.WriteString(line + "\n")
|
||
fmt.Fprintf(&b, "Disk %-3d %s\n", index, devPath)
|
||
b.WriteString(line + "\n")
|
||
|
||
isNVMe := strings.Contains(devName, "nvme")
|
||
if isNVMe {
|
||
writeNVMeReport(&b, outputs)
|
||
} else {
|
||
writeSATAReport(&b, outputs)
|
||
}
|
||
|
||
b.WriteString("\n")
|
||
fmt.Fprintf(&b, "Collected : %s\n", ts.UTC().Format("2006-01-02 15:04:05 UTC"))
|
||
b.WriteString(line + "\n")
|
||
return b.String()
|
||
}
|
||
|
||
// ── NVMe ─────────────────────────────────────────────────────────────────────
|
||
|
||
type nvmeIdCtrl struct {
|
||
ModelNumber string `json:"mn"`
|
||
SerialNumber string `json:"sn"`
|
||
Firmware string `json:"fr"`
|
||
TotalCap uint64 `json:"tnvmcap"`
|
||
NVMCap uint64 `json:"nvmcap"`
|
||
}
|
||
|
||
// nvmeU64 handles both plain JSON numbers and {"lo":n,"hi":n} objects that
|
||
// some nvme-cli versions emit for 128-bit counters.
|
||
func nvmeU64(raw json.RawMessage) uint64 {
|
||
if len(raw) == 0 {
|
||
return 0
|
||
}
|
||
var n uint64
|
||
if json.Unmarshal(raw, &n) == nil {
|
||
return n
|
||
}
|
||
var obj struct {
|
||
Lo uint64 `json:"lo"`
|
||
Hi uint64 `json:"hi"`
|
||
}
|
||
if json.Unmarshal(raw, &obj) == nil {
|
||
return obj.Lo
|
||
}
|
||
return 0
|
||
}
|
||
|
||
type nvmeSmartLogRaw struct {
|
||
CriticalWarning uint64 `json:"critical_warning"`
|
||
Temperature json.RawMessage `json:"temperature"`
|
||
AvailSpare uint64 `json:"avail_spare"`
|
||
SpareThresh uint64 `json:"spare_thresh"`
|
||
PercentUsed uint64 `json:"percent_used"`
|
||
DataUnitsRead json.RawMessage `json:"data_units_read"`
|
||
DataUnitsWritten json.RawMessage `json:"data_units_written"`
|
||
PowerCycles json.RawMessage `json:"power_cycles"`
|
||
PowerOnHours json.RawMessage `json:"power_on_hours"`
|
||
UnsafeShutdowns json.RawMessage `json:"unsafe_shutdowns"`
|
||
MediaErrors json.RawMessage `json:"media_errors"`
|
||
NumErrLogEntries json.RawMessage `json:"num_err_log_entries"`
|
||
}
|
||
|
||
func writeNVMeReport(b *strings.Builder, outputs map[string][]byte) {
|
||
// id-ctrl
|
||
var ctrl nvmeIdCtrl
|
||
if data := outputs["nvme-id-ctrl"]; len(data) > 0 {
|
||
_ = json.Unmarshal(data, &ctrl)
|
||
}
|
||
|
||
model := strings.TrimSpace(ctrl.ModelNumber)
|
||
serial := strings.TrimSpace(ctrl.SerialNumber)
|
||
firmware := strings.TrimSpace(ctrl.Firmware)
|
||
|
||
capacityGB := ""
|
||
if ctrl.TotalCap > 0 {
|
||
capacityGB = formatCapacityGB(ctrl.TotalCap)
|
||
} else if ctrl.NVMCap > 0 {
|
||
capacityGB = formatCapacityGB(ctrl.NVMCap)
|
||
}
|
||
|
||
writeField(b, "Model", model)
|
||
writeField(b, "Serial", serial)
|
||
writeField(b, "Firmware", firmware)
|
||
if capacityGB != "" {
|
||
writeField(b, "Capacity", capacityGB)
|
||
}
|
||
|
||
// smart-log
|
||
data := outputs["nvme-smart-log"]
|
||
if len(data) == 0 {
|
||
b.WriteString("\n(no SMART data)\n")
|
||
return
|
||
}
|
||
var sl nvmeSmartLogRaw
|
||
if err := json.Unmarshal(data, &sl); err != nil {
|
||
fmt.Fprintf(b, "\n(SMART parse error: %v)\n", err)
|
||
return
|
||
}
|
||
|
||
tempK := nvmeU64(sl.Temperature)
|
||
tempC := int(tempK) - 273
|
||
if tempC < 0 {
|
||
tempC = 0
|
||
}
|
||
|
||
critWarn := sl.CriticalWarning
|
||
critWarnStr := "OK"
|
||
if critWarn != 0 {
|
||
critWarnStr = fmt.Sprintf("0x%02X", critWarn)
|
||
}
|
||
|
||
poh := nvmeU64(sl.PowerOnHours)
|
||
pc := nvmeU64(sl.PowerCycles)
|
||
us := nvmeU64(sl.UnsafeShutdowns)
|
||
me := nvmeU64(sl.MediaErrors)
|
||
nel := nvmeU64(sl.NumErrLogEntries)
|
||
|
||
// data_units are in 1000 × 512-byte sectors = 512,000 bytes each
|
||
dataRead := float64(nvmeU64(sl.DataUnitsRead)) * 512000 / 1e9
|
||
dataWritten := float64(nvmeU64(sl.DataUnitsWritten)) * 512000 / 1e9
|
||
|
||
writeSectionHeader(b, "Health")
|
||
writeField(b, "Temperature", fmt.Sprintf("%d °C", tempC))
|
||
writeField(b, "Critical Warning", critWarnStr)
|
||
writeField(b, "Percentage Used", fmt.Sprintf("%d %%", sl.PercentUsed))
|
||
writeField(b, "Available Spare", fmt.Sprintf("%d %% (threshold: %d %%)", sl.AvailSpare, sl.SpareThresh))
|
||
|
||
writeSectionHeader(b, "Usage")
|
||
writeField(b, "Power On Hours", fmt.Sprintf("%s h", formatUint(poh)))
|
||
writeField(b, "Power Cycles", formatUint(pc))
|
||
writeField(b, "Unsafe Shutdowns", formatUint(us))
|
||
writeField(b, "Data Written", fmt.Sprintf("%.1f GB", dataWritten))
|
||
writeField(b, "Data Read", fmt.Sprintf("%.1f GB", dataRead))
|
||
|
||
writeSectionHeader(b, "Errors")
|
||
writeField(b, "Media Errors", formatUint(me))
|
||
writeField(b, "Error Log Entries", formatUint(nel))
|
||
|
||
capacityBytes := ctrl.TotalCap
|
||
if capacityBytes == 0 {
|
||
capacityBytes = ctrl.NVMCap
|
||
}
|
||
writeResourceSection(b, resourceInfo{
|
||
powerOnHours: poh,
|
||
writtenBytes: uint64(nvmeU64(sl.DataUnitsWritten)) * 512000,
|
||
readBytes: uint64(nvmeU64(sl.DataUnitsRead)) * 512000,
|
||
capacityBytes: capacityBytes,
|
||
})
|
||
|
||
if selfTest := outputs["nvme-device-self-test"]; len(selfTest) > 0 {
|
||
writeSectionHeader(b, "Self-Test")
|
||
result := parseSelfTestResult(string(selfTest))
|
||
writeField(b, "Result", result)
|
||
}
|
||
}
|
||
|
||
// ── SATA / SAS (smartctl) ────────────────────────────────────────────────────
|
||
|
||
var (
|
||
smartHealthRE = regexp.MustCompile(`(?i)SMART overall-health self-assessment test result:\s*(\S+)`)
|
||
smartAttrLineRE = regexp.MustCompile(
|
||
`^\s*(\d{1,3})\s+(\S+)\s+0x[0-9a-fA-F]+\s+(\d{1,3})\s+(\d{1,3})\s+(\d{1,3})\s+\S+\s+\S+\s+\S+\s+(.+?)\s*$`,
|
||
)
|
||
smartModelRE = regexp.MustCompile(`(?im)^Device Model:\s*(.+)$`)
|
||
smartSerialRE = regexp.MustCompile(`(?im)^Serial Number:\s*(.+)$`)
|
||
smartFirmwareRE = regexp.MustCompile(`(?im)^Firmware Version:\s*(.+)$`)
|
||
smartCapacityRE = regexp.MustCompile(`(?im)^User Capacity:\s*(.+)$`)
|
||
)
|
||
|
||
type smartAttr struct {
|
||
ID int
|
||
Name string
|
||
Value int
|
||
Worst int
|
||
Threshold int
|
||
Raw string
|
||
}
|
||
|
||
func writeSATAReport(b *strings.Builder, outputs map[string][]byte) {
|
||
data := outputs["smartctl-health"]
|
||
if len(data) == 0 {
|
||
b.WriteString("\n(no SMART data)\n")
|
||
return
|
||
}
|
||
text := string(data)
|
||
|
||
// Identity
|
||
if m := smartModelRE.FindStringSubmatch(text); m != nil {
|
||
writeField(b, "Model", strings.TrimSpace(m[1]))
|
||
}
|
||
if m := smartSerialRE.FindStringSubmatch(text); m != nil {
|
||
writeField(b, "Serial", strings.TrimSpace(m[1]))
|
||
}
|
||
if m := smartFirmwareRE.FindStringSubmatch(text); m != nil {
|
||
writeField(b, "Firmware", strings.TrimSpace(m[1]))
|
||
}
|
||
var capacityBytes uint64
|
||
if m := smartCapacityRE.FindStringSubmatch(text); m != nil {
|
||
cap := strings.TrimSpace(m[1])
|
||
capacityBytes = parseLeadingUint(cap)
|
||
// trim everything after "[" if present (e.g. "500,107,862,016 bytes [500 GB]")
|
||
if idx := strings.Index(cap, "["); idx > 0 {
|
||
cap = strings.TrimSpace(cap[idx+1:])
|
||
cap = strings.TrimSuffix(cap, "]")
|
||
}
|
||
writeField(b, "Capacity", cap)
|
||
}
|
||
|
||
writeSectionHeader(b, "Health")
|
||
health := "unknown"
|
||
if m := smartHealthRE.FindStringSubmatch(text); m != nil {
|
||
health = strings.TrimSpace(m[1])
|
||
}
|
||
writeField(b, "SMART Overall Health", health)
|
||
|
||
attrs := parseSMARTAttrs(text)
|
||
if len(attrs) > 0 {
|
||
writeSectionHeader(b, "SMART Attributes")
|
||
fmt.Fprintf(b, " %-4s %-32s %5s %5s %5s %s\n", "ID", "Attribute", "Value", "Worst", "Thresh", "Raw")
|
||
b.WriteString(" " + strings.Repeat("-", 72) + "\n")
|
||
for _, a := range attrs {
|
||
fmt.Fprintf(b, " %-4d %-32s %5d %5d %5d %s\n",
|
||
a.ID, a.Name, a.Value, a.Worst, a.Threshold, a.Raw)
|
||
}
|
||
}
|
||
|
||
var poh, writtenLBAs, readLBAs uint64
|
||
var readValue int
|
||
hasReadValue := false
|
||
for _, a := range attrs {
|
||
switch a.ID {
|
||
case 9: // Power_On_Hours
|
||
poh = parseLeadingUint(a.Raw)
|
||
case 241: // Total_LBAs_Written
|
||
writtenLBAs = parseLeadingUint(a.Raw)
|
||
case 242: // Total_LBAs_Read
|
||
readLBAs = parseLeadingUint(a.Raw)
|
||
readValue = a.Value
|
||
hasReadValue = true
|
||
}
|
||
}
|
||
const sataSectorBytes = 512
|
||
writeResourceSection(b, resourceInfo{
|
||
powerOnHours: poh,
|
||
writtenBytes: writtenLBAs * sataSectorBytes,
|
||
readBytes: readLBAs * sataSectorBytes,
|
||
capacityBytes: capacityBytes,
|
||
readPercent: 100 - readValue,
|
||
hasReadPercent: hasReadValue,
|
||
})
|
||
|
||
selfTest := outputs["smartctl-self-test-status"]
|
||
if len(selfTest) == 0 {
|
||
selfTest = outputs["smartctl-self-test-short"]
|
||
}
|
||
if len(selfTest) > 0 {
|
||
writeSectionHeader(b, "Self-Test")
|
||
result := parseSelfTestResult(string(selfTest))
|
||
writeField(b, "Result", result)
|
||
}
|
||
}
|
||
|
||
func parseSMARTAttrs(text string) []smartAttr {
|
||
var attrs []smartAttr
|
||
inTable := false
|
||
for _, line := range strings.Split(text, "\n") {
|
||
if strings.Contains(line, "ATTRIBUTE_NAME") {
|
||
inTable = true
|
||
continue
|
||
}
|
||
if !inTable {
|
||
continue
|
||
}
|
||
m := smartAttrLineRE.FindStringSubmatch(line)
|
||
if m == nil {
|
||
if strings.TrimSpace(line) == "" {
|
||
inTable = false
|
||
}
|
||
continue
|
||
}
|
||
id, _ := strconv.Atoi(m[1])
|
||
val, _ := strconv.Atoi(m[3])
|
||
worst, _ := strconv.Atoi(m[4])
|
||
thresh, _ := strconv.Atoi(m[5])
|
||
attrs = append(attrs, smartAttr{
|
||
ID: id,
|
||
Name: m[2],
|
||
Value: val,
|
||
Worst: worst,
|
||
Threshold: thresh,
|
||
Raw: strings.TrimSpace(m[6]),
|
||
})
|
||
}
|
||
return attrs
|
||
}
|
||
|
||
// parseSelfTestResult extracts a one-line summary from nvme device-self-test,
|
||
// smartctl -a (post-completion status), or smartctl -t short (launch ack) output.
|
||
func parseSelfTestResult(text string) string {
|
||
text = strings.TrimSpace(text)
|
||
if text == "" {
|
||
return "no output"
|
||
}
|
||
lines := strings.Split(text, "\n")
|
||
// smartctl -a: "Self-test execution status: ( 0)\n\tThe previous
|
||
// self-test routine completed\n\twithout error ..." — the description
|
||
// wraps onto following indented, colon-free continuation lines.
|
||
for i, line := range lines {
|
||
if strings.Contains(strings.ToLower(line), "self-test execution status") {
|
||
parts := []string{strings.TrimSpace(line)}
|
||
for j := i + 1; j < len(lines) && j < i+4; j++ {
|
||
cont := strings.TrimSpace(lines[j])
|
||
if cont == "" || strings.Contains(cont, ":") {
|
||
break
|
||
}
|
||
parts = append(parts, cont)
|
||
}
|
||
return strings.Join(parts, " ")
|
||
}
|
||
}
|
||
// nvme device-self-test: look for "Short Device Self-Test Status : 0x0" or similar
|
||
for _, line := range lines {
|
||
l := strings.ToLower(line)
|
||
if strings.Contains(l, "self-test status") || strings.Contains(l, "self test status") {
|
||
return strings.TrimSpace(line)
|
||
}
|
||
}
|
||
// smartctl -t short: "Testing has begun" or "Short BGST started"
|
||
for _, line := range lines {
|
||
l := strings.ToLower(line)
|
||
if strings.Contains(l, "testing has begun") || strings.Contains(l, "started") || strings.Contains(l, "complete") {
|
||
return strings.TrimSpace(line)
|
||
}
|
||
}
|
||
// fallback: last non-empty line
|
||
for i := len(lines) - 1; i >= 0; i-- {
|
||
if s := strings.TrimSpace(lines[i]); s != "" {
|
||
return s
|
||
}
|
||
}
|
||
return "done"
|
||
}
|
||
|
||
// ── Resource (pseudographic usage bars) ────────────────────────────────────────
|
||
|
||
// designLifeYears/dwpd model the drive's rated endurance: 1 drive-write-per-day
|
||
// for 5 years, the baseline enterprise endurance spec used when the vendor's
|
||
// own TBW/DWPD rating isn't available from SMART/NVMe data.
|
||
const (
|
||
designLifeYears = 5
|
||
dwpd = 1.0
|
||
)
|
||
|
||
type resourceInfo struct {
|
||
powerOnHours uint64
|
||
writtenBytes uint64
|
||
readBytes uint64
|
||
capacityBytes uint64
|
||
readPercent int // only meaningful when hasReadPercent
|
||
hasReadPercent bool // true when the source SMART attribute exposes a normalized read-wear value
|
||
}
|
||
|
||
func writeResourceSection(b *strings.Builder, r resourceInfo) {
|
||
writeSectionHeader(b, "Resource")
|
||
|
||
const maxLifeHours = designLifeYears * 365 * 24
|
||
upFrac := float64(r.powerOnHours) / float64(maxLifeHours)
|
||
fmt.Fprintf(b, " %-9s %s %s / %s (%s)\n",
|
||
"Uptime", progressBar(upFrac, 24), formatHoursHuman(r.powerOnHours), formatHoursHuman(maxLifeHours), formatPercent(upFrac*100))
|
||
|
||
if r.capacityBytes > 0 {
|
||
maxWritten := float64(r.capacityBytes) * dwpd * designLifeYears * 365
|
||
wFrac := float64(r.writtenBytes) / maxWritten
|
||
fmt.Fprintf(b, " %-9s %s %s / %s (%s, %g DWPD×%dy)\n",
|
||
"Written", progressBar(wFrac, 24), formatBytesHuman(float64(r.writtenBytes)), formatBytesHuman(maxWritten), formatPercent(wFrac*100), dwpd, designLifeYears)
|
||
} else {
|
||
fmt.Fprintf(b, " %-9s %s\n", "Written", formatBytesHuman(float64(r.writtenBytes)))
|
||
}
|
||
|
||
if r.hasReadPercent {
|
||
fmt.Fprintf(b, " %-9s %s %s (%d%%)\n",
|
||
"Read", progressBar(float64(r.readPercent)/100, 24), formatBytesHuman(float64(r.readBytes)), r.readPercent)
|
||
} else {
|
||
fmt.Fprintf(b, " %-9s %s\n", "Read", formatBytesHuman(float64(r.readBytes)))
|
||
}
|
||
}
|
||
|
||
// progressBar renders a fixed-width pseudographic bar, e.g. "[######------]".
|
||
func progressBar(frac float64, width int) string {
|
||
if math.IsNaN(frac) || frac < 0 {
|
||
frac = 0
|
||
}
|
||
if frac > 1 {
|
||
frac = 1
|
||
}
|
||
filled := int(math.Round(frac * float64(width)))
|
||
return "[" + strings.Repeat("#", filled) + strings.Repeat("-", width-filled) + "]"
|
||
}
|
||
|
||
// formatBytesHuman renders a decimal (SI) human-readable byte size, e.g. "1.23 TB".
|
||
func formatBytesHuman(n float64) string {
|
||
units := []string{"B", "KB", "MB", "GB", "TB", "PB"}
|
||
i := 0
|
||
for n >= 1000 && i < len(units)-1 {
|
||
n /= 1000
|
||
i++
|
||
}
|
||
if i == 0 {
|
||
return fmt.Sprintf("%.0f %s", n, units[i])
|
||
}
|
||
return fmt.Sprintf("%.2f %s", n, units[i])
|
||
}
|
||
|
||
// formatHoursHuman renders an hour count as a human-scaled duration (hours,
|
||
// days, or years) so uptimes don't show as raw four/five-digit hour counts.
|
||
func formatHoursHuman(hours uint64) string {
|
||
if hours < 48 {
|
||
return fmt.Sprintf("%d h", hours)
|
||
}
|
||
days := float64(hours) / 24
|
||
if days < 365 {
|
||
return fmt.Sprintf("%.0f d", days)
|
||
}
|
||
years := days / 365
|
||
if years == math.Trunc(years) {
|
||
return fmt.Sprintf("%.0f y", years)
|
||
}
|
||
return fmt.Sprintf("%.1f y", years)
|
||
}
|
||
|
||
// formatPercent renders a percentage with extra precision below 1% (e.g.
|
||
// "0.03%"), where a rounded "0%" would hide any usage at all.
|
||
func formatPercent(pct float64) string {
|
||
if pct > 0 && pct < 1 {
|
||
return fmt.Sprintf("%.2f%%", pct)
|
||
}
|
||
return fmt.Sprintf("%.0f%%", pct)
|
||
}
|
||
|
||
// parseLeadingUint parses the leading run of digits/commas in s (e.g. from a
|
||
// SMART raw value or "500,107,862,016 bytes") into a uint64, ignoring the rest.
|
||
func parseLeadingUint(s string) uint64 {
|
||
s = strings.TrimSpace(s)
|
||
end := 0
|
||
for end < len(s) && (s[end] >= '0' && s[end] <= '9' || s[end] == ',') {
|
||
end++
|
||
}
|
||
digits := strings.ReplaceAll(s[:end], ",", "")
|
||
n, _ := strconv.ParseUint(digits, 10, 64)
|
||
return n
|
||
}
|
||
|
||
// ── Formatting helpers ────────────────────────────────────────────────────────
|
||
|
||
func writeSectionHeader(b *strings.Builder, title string) {
|
||
b.WriteString("\n")
|
||
header := "-- " + title + " "
|
||
header += strings.Repeat("-", max(0, 76-len(header)))
|
||
b.WriteString(header + "\n")
|
||
}
|
||
|
||
func writeField(b *strings.Builder, label, value string) {
|
||
fmt.Fprintf(b, " %-20s : %s\n", label, value)
|
||
}
|
||
|
||
func formatCapacityGB(bytes uint64) string {
|
||
gb := float64(bytes) / 1e9
|
||
if gb >= 1000 {
|
||
return fmt.Sprintf("%.2g TB", gb/1000)
|
||
}
|
||
return fmt.Sprintf("%.0f GB", math.Round(gb))
|
||
}
|
||
|
||
func formatUint(n uint64) string {
|
||
if n == 0 {
|
||
return "0"
|
||
}
|
||
s := strconv.FormatUint(n, 10)
|
||
// insert thousand separators
|
||
var out []byte
|
||
for i, c := range s {
|
||
if i > 0 && (len(s)-i)%3 == 0 {
|
||
out = append(out, ',')
|
||
}
|
||
out = append(out, byte(c))
|
||
}
|
||
return string(out)
|
||
}
|
||
|
||
func max(a, b int) int {
|
||
if a > b {
|
||
return a
|
||
}
|
||
return b
|
||
}
|