Collect and report storage telemetry

This commit is contained in:
Mikhail Chusavitin
2026-04-29 09:40:58 +03:00
parent 29179917c3
commit 2163017a98
4 changed files with 243 additions and 2 deletions

View File

@@ -250,6 +250,8 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
}
var info smartctlInfo
var raw map[string]any
_ = json.Unmarshal(out, &raw)
if err := json.Unmarshal(out, &info); err == nil {
if v := cleanDMIValue(info.ModelName); v != "" {
s.Model = &v
@@ -302,8 +304,11 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
value := float64(attr.Raw.Value)
s.LifeRemainingPct = &value
case 241:
value := attr.Raw.Value
value := smartLBAsToBytes(attr.Raw.Value)
s.WrittenBytes = &value
case 242:
value := smartLBAsToBytes(attr.Raw.Value)
s.ReadBytes = &value
case 197:
pending = attr.Raw.Value
s.CurrentPendingSectors = &pending
@@ -321,6 +326,7 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
offlineUncorrectable: uncorrectable,
lifeRemainingPct: lifeRemaining,
}
applySCSISmartctlTelemetry(&s, raw, &status)
setStorageHealthStatus(&s, status)
return s
}
@@ -477,6 +483,127 @@ func nvmeDataUnitsToBytes(units int64) int64 {
return units * 512000
}
func smartLBAsToBytes(lbas int64) int64 {
if lbas <= 0 {
return 0
}
return lbas * 512
}
func applySCSISmartctlTelemetry(s *schema.HardwareStorage, raw map[string]any, status *storageHealthStatus) {
if s == nil || len(raw) == 0 {
return
}
if v, ok := firstInt64(raw,
"path:power_on_time.hours",
"path:accumulated_power_on_time.hours",
"path:power_on_time.hour",
"path:accumulated_power_on_time.hour",
); ok && v > 0 && s.PowerOnHours == nil {
s.PowerOnHours = &v
}
if v, ok := firstInt64(raw,
"path:power_cycle_count",
"path:start_stop_cycle_count",
"path:accumulated_start_stop_cycles",
); ok && v > 0 && s.PowerCycles == nil {
s.PowerCycles = &v
}
if v, ok := firstInt64(raw,
"path:scsi_grown_defect_list",
"path:grown_defect_list",
); ok && v > 0 && s.ReallocatedSectors == nil {
s.ReallocatedSectors = &v
if status != nil && status.reallocatedSectors == 0 {
status.reallocatedSectors = v
}
}
if v, ok := firstInt64(raw,
"path:percentage_used_endurance_indicator",
"path:scsi_percentage_used_endurance_indicator",
); ok && v > 0 {
if s.LifeUsedPct == nil {
fv := float64(v)
s.LifeUsedPct = &fv
}
if s.LifeRemainingPct == nil && v <= 100 {
remaining := float64(100 - v)
s.LifeRemainingPct = &remaining
if status != nil && status.lifeRemainingPct == 0 {
status.lifeRemainingPct = int64(remaining)
}
}
}
blockSize, hasBlockSize := firstInt64(raw,
"path:logical_block_size",
"path:block_size",
"path:user_capacity.block_size",
)
if hasBlockSize && blockSize > 0 {
if v, ok := firstInt64(raw,
"path:logical_blocks_written",
"path:total_lbas_written",
); ok && v > 0 && s.WrittenBytes == nil {
bytes := v * blockSize
s.WrittenBytes = &bytes
}
if v, ok := firstInt64(raw,
"path:logical_blocks_read",
"path:total_lbas_read",
); ok && v > 0 && s.ReadBytes == nil {
bytes := v * blockSize
s.ReadBytes = &bytes
}
}
}
func firstInt64(root map[string]any, candidates ...string) (int64, bool) {
for _, candidate := range candidates {
if !strings.HasPrefix(candidate, "path:") {
continue
}
path := strings.TrimPrefix(candidate, "path:")
if v, ok := nestedInt64(root, strings.Split(path, ".")); ok {
return v, true
}
}
return 0, false
}
func nestedInt64(root map[string]any, path []string) (int64, bool) {
var current any = root
for _, key := range path {
obj, ok := current.(map[string]any)
if !ok {
return 0, false
}
current, ok = obj[key]
if !ok {
return 0, false
}
}
switch v := current.(type) {
case float64:
return int64(v), true
case float32:
return int64(v), true
case int:
return int64(v), true
case int64:
return v, true
case int32:
return int64(v), true
case json.Number:
n, err := v.Int64()
return n, err == nil
case string:
n, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64)
return n, err == nil
default:
return 0, false
}
}
type storageHealthStatus struct {
hasOverall bool
overallPassed bool

View File

@@ -0,0 +1,89 @@
package collector
import (
"testing"
"bee/audit/internal/schema"
)
func TestApplySCSISmartctlTelemetry(t *testing.T) {
t.Parallel()
raw := map[string]any{
"power_on_time": map[string]any{
"hours": float64(32123),
},
"accumulated_start_stop_cycles": float64(17),
"scsi_grown_defect_list": float64(4),
"percentage_used_endurance_indicator": float64(12),
"logical_block_size": float64(4096),
"logical_blocks_written": float64(1000),
"logical_blocks_read": float64(2000),
}
var disk schema.HardwareStorage
status := storageHealthStatus{}
applySCSISmartctlTelemetry(&disk, raw, &status)
if disk.PowerOnHours == nil || *disk.PowerOnHours != 32123 {
t.Fatalf("power_on_hours=%v want 32123", disk.PowerOnHours)
}
if disk.PowerCycles == nil || *disk.PowerCycles != 17 {
t.Fatalf("power_cycles=%v want 17", disk.PowerCycles)
}
if disk.ReallocatedSectors == nil || *disk.ReallocatedSectors != 4 {
t.Fatalf("reallocated=%v want 4", disk.ReallocatedSectors)
}
if disk.WrittenBytes == nil || *disk.WrittenBytes != 4096000 {
t.Fatalf("written_bytes=%v want 4096000", disk.WrittenBytes)
}
if disk.ReadBytes == nil || *disk.ReadBytes != 8192000 {
t.Fatalf("read_bytes=%v want 8192000", disk.ReadBytes)
}
if disk.LifeUsedPct == nil || *disk.LifeUsedPct != 12 {
t.Fatalf("life_used_pct=%v want 12", disk.LifeUsedPct)
}
if disk.LifeRemainingPct == nil || *disk.LifeRemainingPct != 88 {
t.Fatalf("life_remaining_pct=%v want 88", disk.LifeRemainingPct)
}
if status.reallocatedSectors != 4 {
t.Fatalf("status.reallocated=%d want 4", status.reallocatedSectors)
}
if status.lifeRemainingPct != 88 {
t.Fatalf("status.life_remaining_pct=%d want 88", status.lifeRemainingPct)
}
}
func TestApplySCSISmartctlTelemetryDoesNotOverwriteExistingValues(t *testing.T) {
t.Parallel()
powerOnHours := int64(10)
writtenBytes := int64(20)
lifeRemaining := 30.0
disk := schema.HardwareStorage{
PowerOnHours: &powerOnHours,
WrittenBytes: &writtenBytes,
LifeRemainingPct: &lifeRemaining,
}
raw := map[string]any{
"power_on_time": map[string]any{"hours": float64(999)},
"logical_block_size": float64(512),
"logical_blocks_written": float64(999),
"percentage_used_endurance_indicator": float64(50),
}
applySCSISmartctlTelemetry(&disk, raw, nil)
if *disk.PowerOnHours != 10 {
t.Fatalf("power_on_hours overwritten: got %d want 10", *disk.PowerOnHours)
}
if *disk.WrittenBytes != 20 {
t.Fatalf("written_bytes overwritten: got %d want 20", *disk.WrittenBytes)
}
if *disk.LifeRemainingPct != 30 {
t.Fatalf("life_remaining_pct overwritten: got %v want 30", *disk.LifeRemainingPct)
}
if disk.LifeUsedPct == nil || *disk.LifeUsedPct != 50 {
t.Fatalf("life_used_pct=%v want 50", disk.LifeUsedPct)
}
}

View File

@@ -0,0 +1,25 @@
package collector
import "testing"
func TestSmartLBAsToBytes(t *testing.T) {
t.Parallel()
tests := []struct {
name string
lbas int64
want int64
}{
{name: "zero", lbas: 0, want: 0},
{name: "single lba", lbas: 1, want: 512},
{name: "multiple lbas", lbas: 2048, want: 1048576},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := smartLBAsToBytes(tt.lbas); got != tt.want {
t.Fatalf("smartLBAsToBytes(%d)=%d want %d", tt.lbas, got, tt.want)
}
})
}
}