From 33bc275da27024f2b7f00c70917dfa7f6cb8f439 Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Thu, 2 Jul 2026 11:58:37 +0300 Subject: [PATCH] storage SAT: fix NVMe SMART counters showing 0 for power-on hours/read/write nvme-cli emits large 64-bit counters as JSON-quoted strings on some versions; the disk-report text generator only handled bare numbers and {lo,hi} objects, so power_on_hours/data_units_read/data_units_written etc. silently parsed as 0 while the structured collector path already handled this correctly. Unify both paths on a single exported JSONInt64/NVMeSmartLog/NVMeIDCtrl type in collector/storage.go instead of keeping two independent nvme-cli JSON parsers in sync. Co-Authored-By: Claude Sonnet 5 --- audit/internal/collector/raid.go | 2 +- audit/internal/collector/storage.go | 99 +++++++++++-------- .../collector/storage_discovery_test.go | 2 +- .../internal/collector/storage_health_test.go | 4 +- audit/internal/platform/storage_report.go | 92 +++++------------ 5 files changed, 85 insertions(+), 114 deletions(-) diff --git a/audit/internal/collector/raid.go b/audit/internal/collector/raid.go index f2bc71b..e605e7f 100644 --- a/audit/internal/collector/raid.go +++ b/audit/internal/collector/raid.go @@ -766,7 +766,7 @@ func parseMDAdmPlatformLicense(raw string) *string { func queryDeviceSerial(devPath string) string { if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil { - var ctrl nvmeIDCtrl + var ctrl NVMeIDCtrl if json.Unmarshal(out, &ctrl) == nil { if v := cleanDMIValue(strings.TrimSpace(ctrl.SerialNumber)); v != "" { return v diff --git a/audit/internal/collector/storage.go b/audit/internal/collector/storage.go index 008528d..0d93b39 100644 --- a/audit/internal/collector/storage.go +++ b/audit/internal/collector/storage.go @@ -84,16 +84,19 @@ func collectStorage() []schema.HardwareStorage { return result } -// jsonInt64 accepts both a bare JSON number and a JSON-quoted number string. -// lsblk -J emits LOG-SEC / PHY-SEC as integers on util-linux ≥ 2.37 (Debian 12) -// but older versions emit them as strings. This type handles both. -type jsonInt64 int64 +// JSONInt64 accepts a bare JSON number (512), a JSON-quoted number string +// ("512" — lsblk -J on util-linux < 2.37, and nvme-cli for large 64-bit +// counters that would lose precision as JS numbers), or a {"lo":n,"hi":n} +// object (128-bit NVMe counters on some nvme-cli versions; hi is ignored as +// no real counter exceeds 64 bits). Shared by lsblk and nvme-cli JSON output +// across the collector and the human-readable disk report. +type JSONInt64 int64 -func (j *jsonInt64) UnmarshalJSON(data []byte) error { +func (j *JSONInt64) UnmarshalJSON(data []byte) error { // bare number: 512 var n int64 if err := json.Unmarshal(data, &n); err == nil { - *j = jsonInt64(n) + *j = JSONInt64(n) return nil } // quoted string: "512" @@ -101,24 +104,32 @@ func (j *jsonInt64) UnmarshalJSON(data []byte) error { if err := json.Unmarshal(data, &s); err == nil { n, err := strconv.ParseInt(strings.TrimSpace(s), 10, 64) if err == nil { - *j = jsonInt64(n) + *j = JSONInt64(n) } return nil } + // {"lo":n,"hi":n} 128-bit counter object + var obj struct { + Lo int64 `json:"lo"` + } + if err := json.Unmarshal(data, &obj); err == nil { + *j = JSONInt64(obj.Lo) + return nil + } return nil // null or unexpected type — leave zero } // lsblkDevice is a minimal lsblk JSON record. type lsblkDevice struct { - Name string `json:"name"` - Type string `json:"type"` - Size string `json:"size"` - Serial string `json:"serial"` - Model string `json:"model"` - Tran string `json:"tran"` - Hctl string `json:"hctl"` - LogSec jsonInt64 `json:"log-sec"` - PhySec jsonInt64 `json:"phy-sec"` + Name string `json:"name"` + Type string `json:"type"` + Size string `json:"size"` + Serial string `json:"serial"` + Model string `json:"model"` + Tran string `json:"tran"` + Hctl string `json:"hctl"` + LogSec JSONInt64 `json:"log-sec"` + PhySec JSONInt64 `json:"phy-sec"` } type lsblkRoot struct { @@ -423,32 +434,36 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage { return s } -// nvmeSmartLog is the subset of `nvme smart-log -o json` output we care about. -// nvme-cli emits most counters as JSON strings (e.g. "power_on_hours":"49"), -// so all numeric fields use jsonInt64 which accepts both bare numbers and -// quoted strings. Field names match nvme-cli JSON output, not NVMe spec prose. -type nvmeSmartLog struct { - CriticalWarning jsonInt64 `json:"critical_warning"` - PercentageUsed jsonInt64 `json:"percent_used"` - AvailableSpare jsonInt64 `json:"avail_spare"` - SpareThreshold jsonInt64 `json:"spare_thresh"` - Temperature jsonInt64 `json:"temperature"` - PowerOnHours jsonInt64 `json:"power_on_hours"` - PowerCycles jsonInt64 `json:"power_cycles"` - UnsafeShutdowns jsonInt64 `json:"unsafe_shutdowns"` - DataUnitsRead jsonInt64 `json:"data_units_read"` - DataUnitsWritten jsonInt64 `json:"data_units_written"` - ControllerBusy jsonInt64 `json:"controller_busy_time"` - MediaErrors jsonInt64 `json:"media_errors"` - NumErrLogEntries jsonInt64 `json:"num_err_log_entries"` +// NVMeSmartLog is the subset of `nvme smart-log -o json` output shared by the +// structured collector and the human-readable disk report. nvme-cli emits +// most counters as JSON strings (e.g. "power_on_hours":"49") or, on some +// versions, as {"lo":n,"hi":n} objects — all numeric fields use JSONInt64, +// which accepts bare numbers, quoted strings, and lo/hi objects. Field names +// match nvme-cli JSON output, not NVMe spec prose. +type NVMeSmartLog struct { + CriticalWarning JSONInt64 `json:"critical_warning"` + PercentageUsed JSONInt64 `json:"percent_used"` + AvailableSpare JSONInt64 `json:"avail_spare"` + SpareThreshold JSONInt64 `json:"spare_thresh"` + Temperature JSONInt64 `json:"temperature"` + PowerOnHours JSONInt64 `json:"power_on_hours"` + PowerCycles JSONInt64 `json:"power_cycles"` + UnsafeShutdowns JSONInt64 `json:"unsafe_shutdowns"` + DataUnitsRead JSONInt64 `json:"data_units_read"` + DataUnitsWritten JSONInt64 `json:"data_units_written"` + ControllerBusy JSONInt64 `json:"controller_busy_time"` + MediaErrors JSONInt64 `json:"media_errors"` + NumErrLogEntries JSONInt64 `json:"num_err_log_entries"` } -// nvmeIDCtrl is the subset of `nvme id-ctrl -o json` output. -type nvmeIDCtrl struct { - ModelNumber string `json:"mn"` - SerialNumber string `json:"sn"` - FirmwareRev string `json:"fr"` - TotalCapacity int64 `json:"tnvmcap"` +// NVMeIDCtrl is the subset of `nvme id-ctrl -o json` output shared by the +// structured collector and the human-readable disk report. +type NVMeIDCtrl struct { + ModelNumber string `json:"mn"` + SerialNumber string `json:"sn"` + FirmwareRev string `json:"fr"` + TotalCapacity JSONInt64 `json:"tnvmcap"` + NVMCapacity JSONInt64 `json:"nvmcap"` } func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage { @@ -481,7 +496,7 @@ func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage { // id-ctrl: model, serial, firmware, capacity if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil { - var ctrl nvmeIDCtrl + var ctrl NVMeIDCtrl if json.Unmarshal(out, &ctrl) == nil { if v := cleanDMIValue(strings.TrimSpace(ctrl.ModelNumber)); v != "" { s.Model = &v @@ -502,7 +517,7 @@ func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage { // smart-log: wear telemetry if out, err := exec.Command("nvme", "smart-log", devPath, "-o", "json").Output(); err == nil { - var log nvmeSmartLog + var log NVMeSmartLog if json.Unmarshal(out, &log) == nil { if log.PowerOnHours > 0 { v := int64(log.PowerOnHours) diff --git a/audit/internal/collector/storage_discovery_test.go b/audit/internal/collector/storage_discovery_test.go index bb259da..52f6c31 100644 --- a/audit/internal/collector/storage_discovery_test.go +++ b/audit/internal/collector/storage_discovery_test.go @@ -56,7 +56,7 @@ func TestJsonInt64UnmarshalBothFormats(t *testing.T) { {`null`, 0}, } for _, tc := range cases { - var v jsonInt64 + var v JSONInt64 if err := v.UnmarshalJSON([]byte(tc.json)); err != nil { t.Fatalf("UnmarshalJSON(%s): unexpected error %v", tc.json, err) } diff --git a/audit/internal/collector/storage_health_test.go b/audit/internal/collector/storage_health_test.go index c8dfc31..57f4c23 100644 --- a/audit/internal/collector/storage_health_test.go +++ b/audit/internal/collector/storage_health_test.go @@ -9,7 +9,7 @@ import ( // TestNVMeSmartLogUnmarshal verifies that nvme-cli JSON output (where most // counters are quoted strings and field names differ from NVMe spec prose) -// is correctly parsed into nvmeSmartLog. +// is correctly parsed into NVMeSmartLog. func TestNVMeSmartLogUnmarshal(t *testing.T) { t.Parallel() @@ -30,7 +30,7 @@ func TestNVMeSmartLogUnmarshal(t *testing.T) { "media_errors": "0", "num_err_log_entries": "0" }` - var log nvmeSmartLog + var log NVMeSmartLog if err := json.Unmarshal([]byte(raw), &log); err != nil { t.Fatalf("json.Unmarshal failed: %v", err) } diff --git a/audit/internal/platform/storage_report.go b/audit/internal/platform/storage_report.go index 807ef1c..94efc58 100644 --- a/audit/internal/platform/storage_report.go +++ b/audit/internal/platform/storage_report.go @@ -1,6 +1,7 @@ package platform import ( + "bee/audit/internal/collector" "encoding/json" "fmt" "math" @@ -39,65 +40,22 @@ func GenerateDiskReportText(index int, devPath string, outputs map[string][]byte // ── NVMe ───────────────────────────────────────────────────────────────────── -type nvmeIdCtrl struct { - ModelNumber string `json:"mn"` - SerialNumber string `json:"sn"` - Firmware string `json:"fr"` - TotalCap uint64 `json:"tnvmcap"` - NVMCap uint64 `json:"nvmcap"` -} - -// nvmeU64 handles both plain JSON numbers and {"lo":n,"hi":n} objects that -// some nvme-cli versions emit for 128-bit counters. -func nvmeU64(raw json.RawMessage) uint64 { - if len(raw) == 0 { - return 0 - } - var n uint64 - if json.Unmarshal(raw, &n) == nil { - return n - } - var obj struct { - Lo uint64 `json:"lo"` - Hi uint64 `json:"hi"` - } - if json.Unmarshal(raw, &obj) == nil { - return obj.Lo - } - return 0 -} - -type nvmeSmartLogRaw struct { - CriticalWarning uint64 `json:"critical_warning"` - Temperature json.RawMessage `json:"temperature"` - AvailSpare uint64 `json:"avail_spare"` - SpareThresh uint64 `json:"spare_thresh"` - PercentUsed uint64 `json:"percent_used"` - DataUnitsRead json.RawMessage `json:"data_units_read"` - DataUnitsWritten json.RawMessage `json:"data_units_written"` - PowerCycles json.RawMessage `json:"power_cycles"` - PowerOnHours json.RawMessage `json:"power_on_hours"` - UnsafeShutdowns json.RawMessage `json:"unsafe_shutdowns"` - MediaErrors json.RawMessage `json:"media_errors"` - NumErrLogEntries json.RawMessage `json:"num_err_log_entries"` -} - func writeNVMeReport(b *strings.Builder, outputs map[string][]byte) { // id-ctrl - var ctrl nvmeIdCtrl + var ctrl collector.NVMeIDCtrl if data := outputs["nvme-id-ctrl"]; len(data) > 0 { _ = json.Unmarshal(data, &ctrl) } model := strings.TrimSpace(ctrl.ModelNumber) serial := strings.TrimSpace(ctrl.SerialNumber) - firmware := strings.TrimSpace(ctrl.Firmware) + firmware := strings.TrimSpace(ctrl.FirmwareRev) capacityGB := "" - if ctrl.TotalCap > 0 { - capacityGB = formatCapacityGB(ctrl.TotalCap) - } else if ctrl.NVMCap > 0 { - capacityGB = formatCapacityGB(ctrl.NVMCap) + if ctrl.TotalCapacity > 0 { + capacityGB = formatCapacityGB(uint64(ctrl.TotalCapacity)) + } else if ctrl.NVMCapacity > 0 { + capacityGB = formatCapacityGB(uint64(ctrl.NVMCapacity)) } writeField(b, "Model", model) @@ -113,39 +71,37 @@ func writeNVMeReport(b *strings.Builder, outputs map[string][]byte) { b.WriteString("\n(no SMART data)\n") return } - var sl nvmeSmartLogRaw + var sl collector.NVMeSmartLog if err := json.Unmarshal(data, &sl); err != nil { fmt.Fprintf(b, "\n(SMART parse error: %v)\n", err) return } - tempK := nvmeU64(sl.Temperature) - tempC := int(tempK) - 273 + tempC := int(sl.Temperature) - 273 if tempC < 0 { tempC = 0 } - critWarn := sl.CriticalWarning critWarnStr := "OK" - if critWarn != 0 { - critWarnStr = fmt.Sprintf("0x%02X", critWarn) + if sl.CriticalWarning != 0 { + critWarnStr = fmt.Sprintf("0x%02X", sl.CriticalWarning) } - poh := nvmeU64(sl.PowerOnHours) - pc := nvmeU64(sl.PowerCycles) - us := nvmeU64(sl.UnsafeShutdowns) - me := nvmeU64(sl.MediaErrors) - nel := nvmeU64(sl.NumErrLogEntries) + poh := uint64(sl.PowerOnHours) + pc := uint64(sl.PowerCycles) + us := uint64(sl.UnsafeShutdowns) + me := uint64(sl.MediaErrors) + nel := uint64(sl.NumErrLogEntries) // data_units are in 1000 × 512-byte sectors = 512,000 bytes each - dataRead := float64(nvmeU64(sl.DataUnitsRead)) * 512000 / 1e9 - dataWritten := float64(nvmeU64(sl.DataUnitsWritten)) * 512000 / 1e9 + dataRead := float64(sl.DataUnitsRead) * 512000 / 1e9 + dataWritten := float64(sl.DataUnitsWritten) * 512000 / 1e9 writeSectionHeader(b, "Health") writeField(b, "Temperature", fmt.Sprintf("%d °C", tempC)) writeField(b, "Critical Warning", critWarnStr) - writeField(b, "Percentage Used", fmt.Sprintf("%d %%", sl.PercentUsed)) - writeField(b, "Available Spare", fmt.Sprintf("%d %% (threshold: %d %%)", sl.AvailSpare, sl.SpareThresh)) + writeField(b, "Percentage Used", fmt.Sprintf("%d %%", sl.PercentageUsed)) + writeField(b, "Available Spare", fmt.Sprintf("%d %% (threshold: %d %%)", sl.AvailableSpare, sl.SpareThreshold)) writeSectionHeader(b, "Usage") writeField(b, "Power On Hours", fmt.Sprintf("%s h", formatUint(poh))) @@ -158,14 +114,14 @@ func writeNVMeReport(b *strings.Builder, outputs map[string][]byte) { writeField(b, "Media Errors", formatUint(me)) writeField(b, "Error Log Entries", formatUint(nel)) - capacityBytes := ctrl.TotalCap + capacityBytes := uint64(ctrl.TotalCapacity) if capacityBytes == 0 { - capacityBytes = ctrl.NVMCap + capacityBytes = uint64(ctrl.NVMCapacity) } writeResourceSection(b, resourceInfo{ powerOnHours: poh, - writtenBytes: uint64(nvmeU64(sl.DataUnitsWritten)) * 512000, - readBytes: uint64(nvmeU64(sl.DataUnitsRead)) * 512000, + writtenBytes: uint64(sl.DataUnitsWritten) * 512000, + readBytes: uint64(sl.DataUnitsRead) * 512000, capacityBytes: capacityBytes, })