package collector import ( "bee/audit/internal/schema" "encoding/json" "log/slog" "os/exec" "path/filepath" "strconv" "strings" ) func collectStorage() []schema.HardwareStorage { devs := discoverStorageDevices() result := make([]schema.HardwareStorage, 0, len(devs)) for _, dev := range devs { var s schema.HardwareStorage if strings.HasPrefix(dev.Name, "nvme") { s = enrichWithNVMe(dev) } else { s = enrichWithSmartctl(dev) } result = append(result, s) } slog.Info("storage: collected", "count", len(result)) return result } // lsblkDevice is a minimal lsblk JSON record. type lsblkDevice struct { Name string `json:"name"` Type string `json:"type"` Size string `json:"size"` Serial string `json:"serial"` Model string `json:"model"` Tran string `json:"tran"` Hctl string `json:"hctl"` } type lsblkRoot struct { Blockdevices []lsblkDevice `json:"blockdevices"` } type nvmeListRoot struct { Devices []nvmeListDevice `json:"Devices"` } type nvmeListDevice struct { DevicePath string `json:"DevicePath"` ModelNumber string `json:"ModelNumber"` SerialNumber string `json:"SerialNumber"` Firmware string `json:"Firmware"` PhysicalSize int64 `json:"PhysicalSize"` } func discoverStorageDevices() []lsblkDevice { merged := map[string]lsblkDevice{} for _, dev := range lsblkDevices() { if dev.Name == "" { continue } merged[dev.Name] = dev } for _, dev := range nvmeListDevices() { if dev.Name == "" { continue } current := merged[dev.Name] merged[dev.Name] = mergeStorageDevice(current, dev) } disks := make([]lsblkDevice, 0, len(merged)) for _, dev := range merged { if dev.Type == "" { dev.Type = "disk" } if dev.Type != "disk" { continue } disks = append(disks, dev) } return disks } func lsblkDevices() []lsblkDevice { out, err := exec.Command("lsblk", "-J", "-d", "-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL").Output() if err != nil { slog.Warn("storage: lsblk failed", "err", err) return nil } var root lsblkRoot if err := json.Unmarshal(out, &root); err != nil { slog.Warn("storage: lsblk parse failed", "err", err) return nil } var disks []lsblkDevice for _, d := range root.Blockdevices { if d.Type == "disk" { disks = append(disks, d) } } return disks } func nvmeListDevices() []lsblkDevice { out, err := exec.Command("nvme", "list", "-o", "json").Output() if err != nil { return nil } var root nvmeListRoot if err := json.Unmarshal(out, &root); err != nil { slog.Warn("storage: nvme list parse failed", "err", err) return nil } devices := make([]lsblkDevice, 0, len(root.Devices)) for _, dev := range root.Devices { name := filepath.Base(strings.TrimSpace(dev.DevicePath)) if name == "" { continue } devices = append(devices, lsblkDevice{ Name: name, Type: "disk", Size: strconv.FormatInt(dev.PhysicalSize, 10), Serial: strings.TrimSpace(dev.SerialNumber), Model: strings.TrimSpace(dev.ModelNumber), Tran: "nvme", }) } return devices } func mergeStorageDevice(existing, incoming lsblkDevice) lsblkDevice { if existing.Name == "" { return incoming } if existing.Type == "" { existing.Type = incoming.Type } if strings.TrimSpace(existing.Size) == "" { existing.Size = incoming.Size } if strings.TrimSpace(existing.Serial) == "" { existing.Serial = incoming.Serial } if strings.TrimSpace(existing.Model) == "" { existing.Model = incoming.Model } if strings.TrimSpace(existing.Tran) == "" { existing.Tran = incoming.Tran } if strings.TrimSpace(existing.Hctl) == "" { existing.Hctl = incoming.Hctl } return existing } // smartctlInfo is the subset of smartctl -j -a output we care about. type smartctlInfo struct { ModelFamily string `json:"model_family"` ModelName string `json:"model_name"` SerialNumber string `json:"serial_number"` FirmwareVer string `json:"firmware_version"` RotationRate int `json:"rotation_rate"` Temperature struct { Current int `json:"current"` } `json:"temperature"` SmartStatus struct { Passed bool `json:"passed"` } `json:"smart_status"` UserCapacity struct { Bytes int64 `json:"bytes"` } `json:"user_capacity"` AtaSmartAttributes struct { Table []struct { ID int `json:"id"` Name string `json:"name"` Raw struct { Value int64 `json:"value"` } `json:"raw"` } `json:"table"` } `json:"ata_smart_attributes"` PowerOnTime struct { Hours int `json:"hours"` } `json:"power_on_time"` PowerCycleCount int `json:"power_cycle_count"` } func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage { present := true s := schema.HardwareStorage{Present: &present} s.Telemetry = map[string]any{"linux_device": "/dev/" + dev.Name} tran := strings.ToLower(dev.Tran) devPath := "/dev/" + dev.Name // determine device type (refined by smartctl rotation_rate below) var devType string switch { case strings.HasPrefix(dev.Name, "nvme"): devType = "NVMe" case tran == "usb": devType = "USB" case tran == "sata" || tran == "sas": devType = "HDD" // refined to SSD below if rotation_rate==0 default: devType = "Unknown" } iface := strings.ToUpper(tran) if iface != "" { s.Interface = &iface } // slot from HCTL (host:channel:target:lun) if dev.Hctl != "" { s.Slot = &dev.Hctl } // run smartctl out, err := exec.Command("smartctl", "-j", "-a", devPath).Output() if err != nil { // still fill what lsblk gave us if v := strings.TrimSpace(dev.Model); v != "" { s.Model = &v } if v := strings.TrimSpace(dev.Serial); v != "" { s.SerialNumber = &v } s.Type = &devType return s } var info smartctlInfo if err := json.Unmarshal(out, &info); err == nil { if v := cleanDMIValue(info.ModelName); v != "" { s.Model = &v } if v := cleanDMIValue(info.SerialNumber); v != "" { s.SerialNumber = &v } if v := cleanDMIValue(info.FirmwareVer); v != "" { s.Firmware = &v } if info.UserCapacity.Bytes > 0 { gb := int(info.UserCapacity.Bytes / 1_000_000_000) s.SizeGB = &gb } // refine type from rotation_rate if info.RotationRate == 0 && devType != "NVMe" && devType != "USB" { devType = "SSD" } else if info.RotationRate > 0 { devType = "HDD" } s.Type = &devType if info.Temperature.Current > 0 { t := float64(info.Temperature.Current) s.TemperatureC = &t } if info.PowerOnTime.Hours > 0 { v := int64(info.PowerOnTime.Hours) s.PowerOnHours = &v } if info.PowerCycleCount > 0 { v := int64(info.PowerCycleCount) s.PowerCycles = &v } reallocated := int64(0) pending := int64(0) uncorrectable := int64(0) lifeRemaining := int64(0) for _, attr := range info.AtaSmartAttributes.Table { switch attr.ID { case 5: reallocated = attr.Raw.Value s.ReallocatedSectors = &reallocated case 177: value := float64(attr.Raw.Value) s.LifeUsedPct = &value case 231: lifeRemaining = attr.Raw.Value value := float64(attr.Raw.Value) s.LifeRemainingPct = &value case 241: value := attr.Raw.Value s.WrittenBytes = &value case 197: pending = attr.Raw.Value s.CurrentPendingSectors = &pending case 198: uncorrectable = attr.Raw.Value s.OfflineUncorrectable = &uncorrectable } } status := storageHealthStatus{ overallPassed: info.SmartStatus.Passed, hasOverall: true, reallocatedSectors: reallocated, pendingSectors: pending, offlineUncorrectable: uncorrectable, lifeRemainingPct: lifeRemaining, } setStorageHealthStatus(&s, status) return s } s.Type = &devType status := statusUnknown s.Status = &status return s } // nvmeSmartLog is the subset of `nvme smart-log -o json` output we care about. type nvmeSmartLog struct { CriticalWarning int `json:"critical_warning"` PercentageUsed int `json:"percentage_used"` AvailableSpare int `json:"available_spare"` SpareThreshold int `json:"spare_thresh"` Temperature int64 `json:"temperature"` PowerOnHours int64 `json:"power_on_hours"` PowerCycles int64 `json:"power_cycles"` UnsafeShutdowns int64 `json:"unsafe_shutdowns"` DataUnitsRead int64 `json:"data_units_read"` DataUnitsWritten int64 `json:"data_units_written"` ControllerBusy int64 `json:"controller_busy_time"` MediaErrors int64 `json:"media_errors"` NumErrLogEntries int64 `json:"num_err_log_entries"` } // nvmeIDCtrl is the subset of `nvme id-ctrl -o json` output. type nvmeIDCtrl struct { ModelNumber string `json:"mn"` SerialNumber string `json:"sn"` FirmwareRev string `json:"fr"` TotalCapacity int64 `json:"tnvmcap"` } func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage { present := true devType := "NVMe" iface := "NVMe" status := statusOK s := schema.HardwareStorage{ HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}, Present: &present, Type: &devType, Interface: &iface, Telemetry: map[string]any{"linux_device": "/dev/" + dev.Name}, } devPath := "/dev/" + dev.Name if v := cleanDMIValue(strings.TrimSpace(dev.Model)); v != "" { s.Model = &v } if v := cleanDMIValue(strings.TrimSpace(dev.Serial)); v != "" { s.SerialNumber = &v } if size := parseStorageBytes(dev.Size); size > 0 { gb := int(size / 1_000_000_000) if gb > 0 { s.SizeGB = &gb } } // id-ctrl: model, serial, firmware, capacity if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil { var ctrl nvmeIDCtrl if json.Unmarshal(out, &ctrl) == nil { if v := cleanDMIValue(strings.TrimSpace(ctrl.ModelNumber)); v != "" { s.Model = &v } if v := cleanDMIValue(strings.TrimSpace(ctrl.SerialNumber)); v != "" { s.SerialNumber = &v } if v := cleanDMIValue(strings.TrimSpace(ctrl.FirmwareRev)); v != "" { s.Firmware = &v } if ctrl.TotalCapacity > 0 { gb := int(ctrl.TotalCapacity / 1_000_000_000) s.SizeGB = &gb } } } // smart-log: wear telemetry if out, err := exec.Command("nvme", "smart-log", devPath, "-o", "json").Output(); err == nil { var log nvmeSmartLog if json.Unmarshal(out, &log) == nil { if log.PowerOnHours > 0 { s.PowerOnHours = &log.PowerOnHours } if log.PowerCycles > 0 { s.PowerCycles = &log.PowerCycles } if log.UnsafeShutdowns > 0 { s.UnsafeShutdowns = &log.UnsafeShutdowns } if log.PercentageUsed > 0 { v := float64(log.PercentageUsed) s.LifeUsedPct = &v remaining := 100 - v s.LifeRemainingPct = &remaining } if log.DataUnitsWritten > 0 { v := nvmeDataUnitsToBytes(log.DataUnitsWritten) s.WrittenBytes = &v } if log.DataUnitsRead > 0 { v := nvmeDataUnitsToBytes(log.DataUnitsRead) s.ReadBytes = &v } if log.AvailableSpare > 0 { v := float64(log.AvailableSpare) s.AvailableSparePct = &v } if log.MediaErrors > 0 { s.MediaErrors = &log.MediaErrors } if log.NumErrLogEntries > 0 { s.ErrorLogEntries = &log.NumErrLogEntries } if log.Temperature > 0 { v := float64(log.Temperature - 273) s.TemperatureC = &v } setStorageHealthStatus(&s, storageHealthStatus{ criticalWarning: log.CriticalWarning, percentageUsed: int64(log.PercentageUsed), availableSpare: int64(log.AvailableSpare), spareThreshold: int64(log.SpareThreshold), unsafeShutdowns: log.UnsafeShutdowns, mediaErrors: log.MediaErrors, errorLogEntries: log.NumErrLogEntries, }) return s } } status = statusUnknown s.Status = &status return s } func parseStorageBytes(raw string) int64 { value, err := strconv.ParseInt(strings.TrimSpace(raw), 10, 64) if err == nil && value > 0 { return value } return 0 } func nvmeDataUnitsToBytes(units int64) int64 { if units <= 0 { return 0 } return units * 512000 } type storageHealthStatus struct { hasOverall bool overallPassed bool reallocatedSectors int64 pendingSectors int64 offlineUncorrectable int64 lifeRemainingPct int64 criticalWarning int percentageUsed int64 availableSpare int64 spareThreshold int64 unsafeShutdowns int64 mediaErrors int64 errorLogEntries int64 } func setStorageHealthStatus(s *schema.HardwareStorage, health storageHealthStatus) { status := statusOK var description *string switch { case health.hasOverall && !health.overallPassed: status = statusCritical description = stringPtr("SMART overall self-assessment failed") case health.criticalWarning > 0: status = statusCritical description = stringPtr("NVMe critical warning is set") case health.pendingSectors > 0 || health.offlineUncorrectable > 0: status = statusCritical description = stringPtr("Pending or offline uncorrectable sectors detected") case health.mediaErrors > 0: status = statusWarning description = stringPtr("Media errors reported") case health.reallocatedSectors > 0: status = statusWarning description = stringPtr("Reallocated sectors detected") case health.errorLogEntries > 0: status = statusWarning description = stringPtr("Device error log contains entries") case health.lifeRemainingPct > 0 && health.lifeRemainingPct <= 10: status = statusWarning description = stringPtr("Life remaining is low") case health.percentageUsed >= 95: status = statusWarning description = stringPtr("Drive wear level is high") case health.availableSpare > 0 && health.spareThreshold > 0 && health.availableSpare <= health.spareThreshold: status = statusWarning description = stringPtr("Available spare is at or below threshold") case health.unsafeShutdowns > 100: status = statusWarning description = stringPtr("Unsafe shutdown count is high") } s.Status = &status s.ErrorDescription = description } func stringPtr(value string) *string { return &value }