From 2163017a986aaea6dabc87dfa339b9daeb5a21f8 Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Wed, 29 Apr 2026 09:40:58 +0300 Subject: [PATCH] Collect and report storage telemetry --- audit/internal/collector/storage.go | 129 +++++++++++++++++- audit/internal/collector/storage_scsi_test.go | 89 ++++++++++++ .../collector/storage_telemetry_test.go | 25 ++++ internal/chart | 2 +- 4 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 audit/internal/collector/storage_scsi_test.go create mode 100644 audit/internal/collector/storage_telemetry_test.go diff --git a/audit/internal/collector/storage.go b/audit/internal/collector/storage.go index 7e371ab..c280f52 100644 --- a/audit/internal/collector/storage.go +++ b/audit/internal/collector/storage.go @@ -250,6 +250,8 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage { } var info smartctlInfo + var raw map[string]any + _ = json.Unmarshal(out, &raw) if err := json.Unmarshal(out, &info); err == nil { if v := cleanDMIValue(info.ModelName); v != "" { s.Model = &v @@ -302,8 +304,11 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage { value := float64(attr.Raw.Value) s.LifeRemainingPct = &value case 241: - value := attr.Raw.Value + value := smartLBAsToBytes(attr.Raw.Value) s.WrittenBytes = &value + case 242: + value := smartLBAsToBytes(attr.Raw.Value) + s.ReadBytes = &value case 197: pending = attr.Raw.Value s.CurrentPendingSectors = &pending @@ -321,6 +326,7 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage { offlineUncorrectable: uncorrectable, lifeRemainingPct: lifeRemaining, } + applySCSISmartctlTelemetry(&s, raw, &status) setStorageHealthStatus(&s, status) return s } @@ -477,6 +483,127 @@ func nvmeDataUnitsToBytes(units int64) int64 { return units * 512000 } +func smartLBAsToBytes(lbas int64) int64 { + if lbas <= 0 { + return 0 + } + return lbas * 512 +} + +func applySCSISmartctlTelemetry(s *schema.HardwareStorage, raw map[string]any, status *storageHealthStatus) { + if s == nil || len(raw) == 0 { + return + } + if v, ok := firstInt64(raw, + "path:power_on_time.hours", + "path:accumulated_power_on_time.hours", + "path:power_on_time.hour", + "path:accumulated_power_on_time.hour", + ); ok && v > 0 && s.PowerOnHours == nil { + s.PowerOnHours = &v + } + if v, ok := firstInt64(raw, + "path:power_cycle_count", + "path:start_stop_cycle_count", + "path:accumulated_start_stop_cycles", + ); ok && v > 0 && s.PowerCycles == nil { + s.PowerCycles = &v + } + if v, ok := firstInt64(raw, + "path:scsi_grown_defect_list", + "path:grown_defect_list", + ); ok && v > 0 && s.ReallocatedSectors == nil { + s.ReallocatedSectors = &v + if status != nil && status.reallocatedSectors == 0 { + status.reallocatedSectors = v + } + } + if v, ok := firstInt64(raw, + "path:percentage_used_endurance_indicator", + "path:scsi_percentage_used_endurance_indicator", + ); ok && v > 0 { + if s.LifeUsedPct == nil { + fv := float64(v) + s.LifeUsedPct = &fv + } + if s.LifeRemainingPct == nil && v <= 100 { + remaining := float64(100 - v) + s.LifeRemainingPct = &remaining + if status != nil && status.lifeRemainingPct == 0 { + status.lifeRemainingPct = int64(remaining) + } + } + } + blockSize, hasBlockSize := firstInt64(raw, + "path:logical_block_size", + "path:block_size", + "path:user_capacity.block_size", + ) + if hasBlockSize && blockSize > 0 { + if v, ok := firstInt64(raw, + "path:logical_blocks_written", + "path:total_lbas_written", + ); ok && v > 0 && s.WrittenBytes == nil { + bytes := v * blockSize + s.WrittenBytes = &bytes + } + if v, ok := firstInt64(raw, + "path:logical_blocks_read", + "path:total_lbas_read", + ); ok && v > 0 && s.ReadBytes == nil { + bytes := v * blockSize + s.ReadBytes = &bytes + } + } +} + +func firstInt64(root map[string]any, candidates ...string) (int64, bool) { + for _, candidate := range candidates { + if !strings.HasPrefix(candidate, "path:") { + continue + } + path := strings.TrimPrefix(candidate, "path:") + if v, ok := nestedInt64(root, strings.Split(path, ".")); ok { + return v, true + } + } + return 0, false +} + +func nestedInt64(root map[string]any, path []string) (int64, bool) { + var current any = root + for _, key := range path { + obj, ok := current.(map[string]any) + if !ok { + return 0, false + } + current, ok = obj[key] + if !ok { + return 0, false + } + } + switch v := current.(type) { + case float64: + return int64(v), true + case float32: + return int64(v), true + case int: + return int64(v), true + case int64: + return v, true + case int32: + return int64(v), true + case json.Number: + n, err := v.Int64() + return n, err == nil + case string: + n, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64) + return n, err == nil + default: + return 0, false + } +} + type storageHealthStatus struct { hasOverall bool overallPassed bool diff --git a/audit/internal/collector/storage_scsi_test.go b/audit/internal/collector/storage_scsi_test.go new file mode 100644 index 0000000..f07f4cf --- /dev/null +++ b/audit/internal/collector/storage_scsi_test.go @@ -0,0 +1,89 @@ +package collector + +import ( + "testing" + + "bee/audit/internal/schema" +) + +func TestApplySCSISmartctlTelemetry(t *testing.T) { + t.Parallel() + + raw := map[string]any{ + "power_on_time": map[string]any{ + "hours": float64(32123), + }, + "accumulated_start_stop_cycles": float64(17), + "scsi_grown_defect_list": float64(4), + "percentage_used_endurance_indicator": float64(12), + "logical_block_size": float64(4096), + "logical_blocks_written": float64(1000), + "logical_blocks_read": float64(2000), + } + + var disk schema.HardwareStorage + status := storageHealthStatus{} + applySCSISmartctlTelemetry(&disk, raw, &status) + + if disk.PowerOnHours == nil || *disk.PowerOnHours != 32123 { + t.Fatalf("power_on_hours=%v want 32123", disk.PowerOnHours) + } + if disk.PowerCycles == nil || *disk.PowerCycles != 17 { + t.Fatalf("power_cycles=%v want 17", disk.PowerCycles) + } + if disk.ReallocatedSectors == nil || *disk.ReallocatedSectors != 4 { + t.Fatalf("reallocated=%v want 4", disk.ReallocatedSectors) + } + if disk.WrittenBytes == nil || *disk.WrittenBytes != 4096000 { + t.Fatalf("written_bytes=%v want 4096000", disk.WrittenBytes) + } + if disk.ReadBytes == nil || *disk.ReadBytes != 8192000 { + t.Fatalf("read_bytes=%v want 8192000", disk.ReadBytes) + } + if disk.LifeUsedPct == nil || *disk.LifeUsedPct != 12 { + t.Fatalf("life_used_pct=%v want 12", disk.LifeUsedPct) + } + if disk.LifeRemainingPct == nil || *disk.LifeRemainingPct != 88 { + t.Fatalf("life_remaining_pct=%v want 88", disk.LifeRemainingPct) + } + if status.reallocatedSectors != 4 { + t.Fatalf("status.reallocated=%d want 4", status.reallocatedSectors) + } + if status.lifeRemainingPct != 88 { + t.Fatalf("status.life_remaining_pct=%d want 88", status.lifeRemainingPct) + } +} + +func TestApplySCSISmartctlTelemetryDoesNotOverwriteExistingValues(t *testing.T) { + t.Parallel() + + powerOnHours := int64(10) + writtenBytes := int64(20) + lifeRemaining := 30.0 + disk := schema.HardwareStorage{ + PowerOnHours: &powerOnHours, + WrittenBytes: &writtenBytes, + LifeRemainingPct: &lifeRemaining, + } + raw := map[string]any{ + "power_on_time": map[string]any{"hours": float64(999)}, + "logical_block_size": float64(512), + "logical_blocks_written": float64(999), + "percentage_used_endurance_indicator": float64(50), + } + + applySCSISmartctlTelemetry(&disk, raw, nil) + + if *disk.PowerOnHours != 10 { + t.Fatalf("power_on_hours overwritten: got %d want 10", *disk.PowerOnHours) + } + if *disk.WrittenBytes != 20 { + t.Fatalf("written_bytes overwritten: got %d want 20", *disk.WrittenBytes) + } + if *disk.LifeRemainingPct != 30 { + t.Fatalf("life_remaining_pct overwritten: got %v want 30", *disk.LifeRemainingPct) + } + if disk.LifeUsedPct == nil || *disk.LifeUsedPct != 50 { + t.Fatalf("life_used_pct=%v want 50", disk.LifeUsedPct) + } +} diff --git a/audit/internal/collector/storage_telemetry_test.go b/audit/internal/collector/storage_telemetry_test.go new file mode 100644 index 0000000..53e9fb2 --- /dev/null +++ b/audit/internal/collector/storage_telemetry_test.go @@ -0,0 +1,25 @@ +package collector + +import "testing" + +func TestSmartLBAsToBytes(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + lbas int64 + want int64 + }{ + {name: "zero", lbas: 0, want: 0}, + {name: "single lba", lbas: 1, want: 512}, + {name: "multiple lbas", lbas: 2048, want: 1048576}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := smartLBAsToBytes(tt.lbas); got != tt.want { + t.Fatalf("smartLBAsToBytes(%d)=%d want %d", tt.lbas, got, tt.want) + } + }) + } +} diff --git a/internal/chart b/internal/chart index ac8120c..92efd04 160000 --- a/internal/chart +++ b/internal/chart @@ -1 +1 @@ -Subproject commit ac8120c8ab800bb3067efcada50bc4272dc8f76a +Subproject commit 92efd047b8a31ad97669dd2f84bb19b2def1ae61