Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11d00b9442 | ||
|
|
6defa5ae15 | ||
|
|
c76658ed00 | ||
|
|
2163017a98 |
@@ -250,6 +250,8 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
||||
}
|
||||
|
||||
var info smartctlInfo
|
||||
var raw map[string]any
|
||||
_ = json.Unmarshal(out, &raw)
|
||||
if err := json.Unmarshal(out, &info); err == nil {
|
||||
if v := cleanDMIValue(info.ModelName); v != "" {
|
||||
s.Model = &v
|
||||
@@ -302,8 +304,11 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
||||
value := float64(attr.Raw.Value)
|
||||
s.LifeRemainingPct = &value
|
||||
case 241:
|
||||
value := attr.Raw.Value
|
||||
value := smartLBAsToBytes(attr.Raw.Value)
|
||||
s.WrittenBytes = &value
|
||||
case 242:
|
||||
value := smartLBAsToBytes(attr.Raw.Value)
|
||||
s.ReadBytes = &value
|
||||
case 197:
|
||||
pending = attr.Raw.Value
|
||||
s.CurrentPendingSectors = &pending
|
||||
@@ -321,6 +326,7 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
||||
offlineUncorrectable: uncorrectable,
|
||||
lifeRemainingPct: lifeRemaining,
|
||||
}
|
||||
applySCSISmartctlTelemetry(&s, raw, &status)
|
||||
setStorageHealthStatus(&s, status)
|
||||
return s
|
||||
}
|
||||
@@ -477,6 +483,127 @@ func nvmeDataUnitsToBytes(units int64) int64 {
|
||||
return units * 512000
|
||||
}
|
||||
|
||||
func smartLBAsToBytes(lbas int64) int64 {
|
||||
if lbas <= 0 {
|
||||
return 0
|
||||
}
|
||||
return lbas * 512
|
||||
}
|
||||
|
||||
func applySCSISmartctlTelemetry(s *schema.HardwareStorage, raw map[string]any, status *storageHealthStatus) {
|
||||
if s == nil || len(raw) == 0 {
|
||||
return
|
||||
}
|
||||
if v, ok := firstInt64(raw,
|
||||
"path:power_on_time.hours",
|
||||
"path:accumulated_power_on_time.hours",
|
||||
"path:power_on_time.hour",
|
||||
"path:accumulated_power_on_time.hour",
|
||||
); ok && v > 0 && s.PowerOnHours == nil {
|
||||
s.PowerOnHours = &v
|
||||
}
|
||||
if v, ok := firstInt64(raw,
|
||||
"path:power_cycle_count",
|
||||
"path:start_stop_cycle_count",
|
||||
"path:accumulated_start_stop_cycles",
|
||||
); ok && v > 0 && s.PowerCycles == nil {
|
||||
s.PowerCycles = &v
|
||||
}
|
||||
if v, ok := firstInt64(raw,
|
||||
"path:scsi_grown_defect_list",
|
||||
"path:grown_defect_list",
|
||||
); ok && v > 0 && s.ReallocatedSectors == nil {
|
||||
s.ReallocatedSectors = &v
|
||||
if status != nil && status.reallocatedSectors == 0 {
|
||||
status.reallocatedSectors = v
|
||||
}
|
||||
}
|
||||
if v, ok := firstInt64(raw,
|
||||
"path:percentage_used_endurance_indicator",
|
||||
"path:scsi_percentage_used_endurance_indicator",
|
||||
); ok && v > 0 {
|
||||
if s.LifeUsedPct == nil {
|
||||
fv := float64(v)
|
||||
s.LifeUsedPct = &fv
|
||||
}
|
||||
if s.LifeRemainingPct == nil && v <= 100 {
|
||||
remaining := float64(100 - v)
|
||||
s.LifeRemainingPct = &remaining
|
||||
if status != nil && status.lifeRemainingPct == 0 {
|
||||
status.lifeRemainingPct = int64(remaining)
|
||||
}
|
||||
}
|
||||
}
|
||||
blockSize, hasBlockSize := firstInt64(raw,
|
||||
"path:logical_block_size",
|
||||
"path:block_size",
|
||||
"path:user_capacity.block_size",
|
||||
)
|
||||
if hasBlockSize && blockSize > 0 {
|
||||
if v, ok := firstInt64(raw,
|
||||
"path:logical_blocks_written",
|
||||
"path:total_lbas_written",
|
||||
); ok && v > 0 && s.WrittenBytes == nil {
|
||||
bytes := v * blockSize
|
||||
s.WrittenBytes = &bytes
|
||||
}
|
||||
if v, ok := firstInt64(raw,
|
||||
"path:logical_blocks_read",
|
||||
"path:total_lbas_read",
|
||||
); ok && v > 0 && s.ReadBytes == nil {
|
||||
bytes := v * blockSize
|
||||
s.ReadBytes = &bytes
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func firstInt64(root map[string]any, candidates ...string) (int64, bool) {
|
||||
for _, candidate := range candidates {
|
||||
if !strings.HasPrefix(candidate, "path:") {
|
||||
continue
|
||||
}
|
||||
path := strings.TrimPrefix(candidate, "path:")
|
||||
if v, ok := nestedInt64(root, strings.Split(path, ".")); ok {
|
||||
return v, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func nestedInt64(root map[string]any, path []string) (int64, bool) {
|
||||
var current any = root
|
||||
for _, key := range path {
|
||||
obj, ok := current.(map[string]any)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
current, ok = obj[key]
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
switch v := current.(type) {
|
||||
case float64:
|
||||
return int64(v), true
|
||||
case float32:
|
||||
return int64(v), true
|
||||
case int:
|
||||
return int64(v), true
|
||||
case int64:
|
||||
return v, true
|
||||
case int32:
|
||||
return int64(v), true
|
||||
case json.Number:
|
||||
n, err := v.Int64()
|
||||
return n, err == nil
|
||||
case string:
|
||||
n, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64)
|
||||
return n, err == nil
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
type storageHealthStatus struct {
|
||||
hasOverall bool
|
||||
overallPassed bool
|
||||
|
||||
89
audit/internal/collector/storage_scsi_test.go
Normal file
89
audit/internal/collector/storage_scsi_test.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"bee/audit/internal/schema"
|
||||
)
|
||||
|
||||
func TestApplySCSISmartctlTelemetry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
raw := map[string]any{
|
||||
"power_on_time": map[string]any{
|
||||
"hours": float64(32123),
|
||||
},
|
||||
"accumulated_start_stop_cycles": float64(17),
|
||||
"scsi_grown_defect_list": float64(4),
|
||||
"percentage_used_endurance_indicator": float64(12),
|
||||
"logical_block_size": float64(4096),
|
||||
"logical_blocks_written": float64(1000),
|
||||
"logical_blocks_read": float64(2000),
|
||||
}
|
||||
|
||||
var disk schema.HardwareStorage
|
||||
status := storageHealthStatus{}
|
||||
applySCSISmartctlTelemetry(&disk, raw, &status)
|
||||
|
||||
if disk.PowerOnHours == nil || *disk.PowerOnHours != 32123 {
|
||||
t.Fatalf("power_on_hours=%v want 32123", disk.PowerOnHours)
|
||||
}
|
||||
if disk.PowerCycles == nil || *disk.PowerCycles != 17 {
|
||||
t.Fatalf("power_cycles=%v want 17", disk.PowerCycles)
|
||||
}
|
||||
if disk.ReallocatedSectors == nil || *disk.ReallocatedSectors != 4 {
|
||||
t.Fatalf("reallocated=%v want 4", disk.ReallocatedSectors)
|
||||
}
|
||||
if disk.WrittenBytes == nil || *disk.WrittenBytes != 4096000 {
|
||||
t.Fatalf("written_bytes=%v want 4096000", disk.WrittenBytes)
|
||||
}
|
||||
if disk.ReadBytes == nil || *disk.ReadBytes != 8192000 {
|
||||
t.Fatalf("read_bytes=%v want 8192000", disk.ReadBytes)
|
||||
}
|
||||
if disk.LifeUsedPct == nil || *disk.LifeUsedPct != 12 {
|
||||
t.Fatalf("life_used_pct=%v want 12", disk.LifeUsedPct)
|
||||
}
|
||||
if disk.LifeRemainingPct == nil || *disk.LifeRemainingPct != 88 {
|
||||
t.Fatalf("life_remaining_pct=%v want 88", disk.LifeRemainingPct)
|
||||
}
|
||||
if status.reallocatedSectors != 4 {
|
||||
t.Fatalf("status.reallocated=%d want 4", status.reallocatedSectors)
|
||||
}
|
||||
if status.lifeRemainingPct != 88 {
|
||||
t.Fatalf("status.life_remaining_pct=%d want 88", status.lifeRemainingPct)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplySCSISmartctlTelemetryDoesNotOverwriteExistingValues(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
powerOnHours := int64(10)
|
||||
writtenBytes := int64(20)
|
||||
lifeRemaining := 30.0
|
||||
disk := schema.HardwareStorage{
|
||||
PowerOnHours: &powerOnHours,
|
||||
WrittenBytes: &writtenBytes,
|
||||
LifeRemainingPct: &lifeRemaining,
|
||||
}
|
||||
raw := map[string]any{
|
||||
"power_on_time": map[string]any{"hours": float64(999)},
|
||||
"logical_block_size": float64(512),
|
||||
"logical_blocks_written": float64(999),
|
||||
"percentage_used_endurance_indicator": float64(50),
|
||||
}
|
||||
|
||||
applySCSISmartctlTelemetry(&disk, raw, nil)
|
||||
|
||||
if *disk.PowerOnHours != 10 {
|
||||
t.Fatalf("power_on_hours overwritten: got %d want 10", *disk.PowerOnHours)
|
||||
}
|
||||
if *disk.WrittenBytes != 20 {
|
||||
t.Fatalf("written_bytes overwritten: got %d want 20", *disk.WrittenBytes)
|
||||
}
|
||||
if *disk.LifeRemainingPct != 30 {
|
||||
t.Fatalf("life_remaining_pct overwritten: got %v want 30", *disk.LifeRemainingPct)
|
||||
}
|
||||
if disk.LifeUsedPct == nil || *disk.LifeUsedPct != 50 {
|
||||
t.Fatalf("life_used_pct=%v want 50", disk.LifeUsedPct)
|
||||
}
|
||||
}
|
||||
25
audit/internal/collector/storage_telemetry_test.go
Normal file
25
audit/internal/collector/storage_telemetry_test.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package collector
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSmartLBAsToBytes(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
lbas int64
|
||||
want int64
|
||||
}{
|
||||
{name: "zero", lbas: 0, want: 0},
|
||||
{name: "single lba", lbas: 1, want: 512},
|
||||
{name: "multiple lbas", lbas: 2048, want: 1048576},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := smartLBAsToBytes(tt.lbas); got != tt.want {
|
||||
t.Fatalf("smartLBAsToBytes(%d)=%d want %d", tt.lbas, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -44,3 +44,48 @@ func TestHardwareSnapshotMarshalsNewContractFields(t *testing.T) {
|
||||
t.Fatalf("missing event_logs payload: %s", text)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHardwareSnapshotMarshalsStorageTelemetryFields(t *testing.T) {
|
||||
powerOnHours := int64(12450)
|
||||
writtenBytes := int64(9876543210)
|
||||
readBytes := int64(1234567890)
|
||||
lifeRemainingPct := 91.0
|
||||
|
||||
payload := HardwareIngestRequest{
|
||||
CollectedAt: "2026-03-15T15:00:00Z",
|
||||
Hardware: HardwareSnapshot{
|
||||
Board: HardwareBoard{SerialNumber: "SRV-001"},
|
||||
Storage: []HardwareStorage{
|
||||
{
|
||||
SerialNumber: stringPtr("DISK-001"),
|
||||
Model: stringPtr("TestDisk"),
|
||||
PowerOnHours: &powerOnHours,
|
||||
WrittenBytes: &writtenBytes,
|
||||
ReadBytes: &readBytes,
|
||||
LifeRemainingPct: &lifeRemainingPct,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
data, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal: %v", err)
|
||||
}
|
||||
text := string(data)
|
||||
for _, needle := range []string{
|
||||
`"storage":[{`,
|
||||
`"power_on_hours":12450`,
|
||||
`"written_bytes":9876543210`,
|
||||
`"read_bytes":1234567890`,
|
||||
`"life_remaining_pct":91`,
|
||||
} {
|
||||
if !strings.Contains(text, needle) {
|
||||
t.Fatalf("missing %q in payload: %s", needle, text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func stringPtr(v string) *string {
|
||||
return &v
|
||||
}
|
||||
|
||||
2
bible
2
bible
Submodule bible updated: 1d89a4918e...98448c993f
@@ -10,4 +10,4 @@ Generic engineering rules live in `bible/rules/patterns/`.
|
||||
| `architecture/system-overview.md` | What bee does, scope, tech stack |
|
||||
| `architecture/runtime-flows.md` | Boot sequence, audit flow, service order |
|
||||
| `docs/hardware-ingest-contract.md` | Current Reanimator hardware ingest JSON contract |
|
||||
| `decisions/` | Architectural decision log |
|
||||
| `decisions/` | Architectural decision log, including read-only submodule policy |
|
||||
|
||||
@@ -58,6 +58,8 @@ Fills gaps where Redfish/logpile is blind:
|
||||
- `bee` should populate current component state, hardware inventory, telemetry, and `status_checked_at`.
|
||||
- Historical status transitions and component replacement logic belong to the centralized ingest/lifecycle system, not to `bee`.
|
||||
- Contract fields that have no honest local source on a generic Linux host may remain empty.
|
||||
- Embedded submodules such as `internal/chart/` and `bible/` are read-only for `bee` feature work.
|
||||
- If the UI needs extra information, `bee` must emit it through the standard audit JSON contract rather than patching `chart`.
|
||||
|
||||
## Tech stack
|
||||
|
||||
@@ -101,7 +103,7 @@ Fills gaps where Redfish/logpile is blind:
|
||||
| `iso/builder/` | ISO build scripts and `live-build` profile |
|
||||
| `iso/overlay/` | Source overlay copied into a staged build overlay |
|
||||
| `iso/vendor/` | Optional pre-built vendor binaries (storcli64, sas2ircu, sas3ircu, arcconf, ssacli, …) |
|
||||
| `internal/chart/` | Git submodule with `reanimator/chart`, embedded into `bee web` |
|
||||
| `internal/chart/` | Git submodule with `reanimator/chart`, embedded into `bee web`; update by submodule pointer only, never by local `bee`-specific edits |
|
||||
| `iso/builder/VERSIONS` | Pinned versions: Debian, Go, NVIDIA driver, kernel ABI |
|
||||
| `iso/builder/smoketest.sh` | Post-boot smoke test — run via SSH to verify live ISO |
|
||||
| `iso/overlay/etc/profile.d/bee.sh` | tty1 welcome message with web UI URLs |
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
# Decision: Treat embedded submodules as read-only
|
||||
|
||||
## Context
|
||||
|
||||
`bee` embeds external git submodules such as:
|
||||
|
||||
- `internal/chart/` — `reanimator/chart`, a generic read-only viewer for Reanimator JSON snapshots
|
||||
- `bible/` — shared engineering rules and contracts
|
||||
|
||||
These repositories are reused by other projects. A local feature request in `bee`
|
||||
must not be solved by silently changing shared submodule behavior.
|
||||
|
||||
The concrete failure mode here was attempting to add project-specific storage
|
||||
telemetry presentation by editing `internal/chart/`. That couples a shared viewer
|
||||
to one host application's needs and creates hidden cross-project regressions.
|
||||
|
||||
## Decision
|
||||
|
||||
Embedded submodules are read-only from the point of view of `bee`.
|
||||
|
||||
- Do not implement `bee`-specific behavior by editing `internal/chart/`.
|
||||
- Do not implement `bee`-specific behavior by editing `bible/`.
|
||||
- If `bee` needs new data in the report, produce it in the standard audit JSON
|
||||
emitted by `bee` itself.
|
||||
- `chart` must continue to consume the canonical snapshot as an external viewer,
|
||||
without host-specific forks.
|
||||
- Updating a submodule pointer to an upstream commit is allowed.
|
||||
- Carrying local unmerged submodule commits as part of a `bee` feature is forbidden.
|
||||
|
||||
## Consequences
|
||||
|
||||
- Audit/report features must be expressed through the contract in
|
||||
`bible-local/docs/hardware-ingest-contract.md`.
|
||||
- `bee` owns collection, normalization, and serialization of storage telemetry in
|
||||
`hardware.storage[]`.
|
||||
- `chart` remains a pure visualization module that reads the snapshot it is given.
|
||||
- If a capability is genuinely missing in a shared submodule, it must be proposed
|
||||
and landed upstream as a generic change first, then pulled into `bee` via a
|
||||
normal submodule update.
|
||||
@@ -6,3 +6,4 @@ One file per decision, named `YYYY-MM-DD-short-topic.md`.
|
||||
|---|---|---|
|
||||
| 2026-03-05 | Use NVIDIA proprietary driver | active |
|
||||
| 2026-04-01 | Treat memtest as explicit ISO content | active |
|
||||
| 2026-04-29 | Treat embedded submodules as read-only | active |
|
||||
|
||||
Reference in New Issue
Block a user