add: NVMe wear telemetry via nvme smart-log (1.8b)
This commit is contained in:
@@ -12,7 +12,12 @@ func collectStorage() []schema.HardwareStorage {
|
||||
devs := lsblkDevices()
|
||||
result := make([]schema.HardwareStorage, 0, len(devs))
|
||||
for _, dev := range devs {
|
||||
s := enrichWithSmartctl(dev)
|
||||
var s schema.HardwareStorage
|
||||
if strings.HasPrefix(dev.Name, "nvme") {
|
||||
s = enrichWithNVMe(dev)
|
||||
} else {
|
||||
s = enrichWithSmartctl(dev)
|
||||
}
|
||||
result = append(result, s)
|
||||
}
|
||||
slog.Info("storage: collected", "count", len(result))
|
||||
@@ -175,3 +180,87 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
||||
s.Status = &status
|
||||
return s
|
||||
}
|
||||
|
||||
// nvmeSmartLog is the subset of `nvme smart-log -o json` output we care about.
|
||||
type nvmeSmartLog struct {
|
||||
PercentageUsed int `json:"percentage_used"`
|
||||
PowerOnHours int64 `json:"power_on_hours"`
|
||||
PowerCycles int64 `json:"power_cycles"`
|
||||
UnsafeShutdowns int64 `json:"unsafe_shutdowns"`
|
||||
DataUnitsWritten int64 `json:"data_units_written"`
|
||||
ControllerBusy int64 `json:"controller_busy_time"`
|
||||
}
|
||||
|
||||
// nvmeIDCtrl is the subset of `nvme id-ctrl -o json` output.
|
||||
type nvmeIDCtrl struct {
|
||||
ModelNumber string `json:"mn"`
|
||||
SerialNumber string `json:"sn"`
|
||||
FirmwareRev string `json:"fr"`
|
||||
TotalCapacity int64 `json:"tnvmcap"`
|
||||
}
|
||||
|
||||
func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage {
|
||||
present := true
|
||||
devType := "NVMe"
|
||||
iface := "NVMe"
|
||||
status := "OK"
|
||||
s := schema.HardwareStorage{
|
||||
Present: &present,
|
||||
Type: &devType,
|
||||
Interface: &iface,
|
||||
Status: &status,
|
||||
}
|
||||
|
||||
devPath := "/dev/" + dev.Name
|
||||
|
||||
// id-ctrl: model, serial, firmware, capacity
|
||||
if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil {
|
||||
var ctrl nvmeIDCtrl
|
||||
if json.Unmarshal(out, &ctrl) == nil {
|
||||
if v := cleanDMIValue(strings.TrimSpace(ctrl.ModelNumber)); v != "" {
|
||||
s.Model = &v
|
||||
}
|
||||
if v := cleanDMIValue(strings.TrimSpace(ctrl.SerialNumber)); v != "" {
|
||||
s.SerialNumber = &v
|
||||
}
|
||||
if v := cleanDMIValue(strings.TrimSpace(ctrl.FirmwareRev)); v != "" {
|
||||
s.Firmware = &v
|
||||
}
|
||||
if ctrl.TotalCapacity > 0 {
|
||||
gb := int(ctrl.TotalCapacity / 1_000_000_000)
|
||||
s.SizeGB = &gb
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// smart-log: wear telemetry
|
||||
if out, err := exec.Command("nvme", "smart-log", devPath, "-o", "json").Output(); err == nil {
|
||||
var log nvmeSmartLog
|
||||
if json.Unmarshal(out, &log) == nil {
|
||||
tel := map[string]any{}
|
||||
if log.PowerOnHours > 0 {
|
||||
tel["power_on_hours"] = log.PowerOnHours
|
||||
}
|
||||
if log.PowerCycles > 0 {
|
||||
tel["power_cycles"] = log.PowerCycles
|
||||
}
|
||||
if log.UnsafeShutdowns > 0 {
|
||||
tel["unsafe_shutdowns"] = log.UnsafeShutdowns
|
||||
}
|
||||
if log.PercentageUsed > 0 {
|
||||
tel["percentage_used"] = log.PercentageUsed
|
||||
}
|
||||
if log.DataUnitsWritten > 0 {
|
||||
tel["data_units_written"] = log.DataUnitsWritten
|
||||
}
|
||||
if log.ControllerBusy > 0 {
|
||||
tel["controller_busy_time"] = log.ControllerBusy
|
||||
}
|
||||
if len(tel) > 0 {
|
||||
s.Telemetry = tel
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user