Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11d00b9442 | ||
|
|
6defa5ae15 | ||
|
|
c76658ed00 | ||
|
|
2163017a98 |
@@ -250,6 +250,8 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var info smartctlInfo
|
var info smartctlInfo
|
||||||
|
var raw map[string]any
|
||||||
|
_ = json.Unmarshal(out, &raw)
|
||||||
if err := json.Unmarshal(out, &info); err == nil {
|
if err := json.Unmarshal(out, &info); err == nil {
|
||||||
if v := cleanDMIValue(info.ModelName); v != "" {
|
if v := cleanDMIValue(info.ModelName); v != "" {
|
||||||
s.Model = &v
|
s.Model = &v
|
||||||
@@ -302,8 +304,11 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
|||||||
value := float64(attr.Raw.Value)
|
value := float64(attr.Raw.Value)
|
||||||
s.LifeRemainingPct = &value
|
s.LifeRemainingPct = &value
|
||||||
case 241:
|
case 241:
|
||||||
value := attr.Raw.Value
|
value := smartLBAsToBytes(attr.Raw.Value)
|
||||||
s.WrittenBytes = &value
|
s.WrittenBytes = &value
|
||||||
|
case 242:
|
||||||
|
value := smartLBAsToBytes(attr.Raw.Value)
|
||||||
|
s.ReadBytes = &value
|
||||||
case 197:
|
case 197:
|
||||||
pending = attr.Raw.Value
|
pending = attr.Raw.Value
|
||||||
s.CurrentPendingSectors = &pending
|
s.CurrentPendingSectors = &pending
|
||||||
@@ -321,6 +326,7 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
|||||||
offlineUncorrectable: uncorrectable,
|
offlineUncorrectable: uncorrectable,
|
||||||
lifeRemainingPct: lifeRemaining,
|
lifeRemainingPct: lifeRemaining,
|
||||||
}
|
}
|
||||||
|
applySCSISmartctlTelemetry(&s, raw, &status)
|
||||||
setStorageHealthStatus(&s, status)
|
setStorageHealthStatus(&s, status)
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
@@ -477,6 +483,127 @@ func nvmeDataUnitsToBytes(units int64) int64 {
|
|||||||
return units * 512000
|
return units * 512000
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func smartLBAsToBytes(lbas int64) int64 {
|
||||||
|
if lbas <= 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return lbas * 512
|
||||||
|
}
|
||||||
|
|
||||||
|
func applySCSISmartctlTelemetry(s *schema.HardwareStorage, raw map[string]any, status *storageHealthStatus) {
|
||||||
|
if s == nil || len(raw) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if v, ok := firstInt64(raw,
|
||||||
|
"path:power_on_time.hours",
|
||||||
|
"path:accumulated_power_on_time.hours",
|
||||||
|
"path:power_on_time.hour",
|
||||||
|
"path:accumulated_power_on_time.hour",
|
||||||
|
); ok && v > 0 && s.PowerOnHours == nil {
|
||||||
|
s.PowerOnHours = &v
|
||||||
|
}
|
||||||
|
if v, ok := firstInt64(raw,
|
||||||
|
"path:power_cycle_count",
|
||||||
|
"path:start_stop_cycle_count",
|
||||||
|
"path:accumulated_start_stop_cycles",
|
||||||
|
); ok && v > 0 && s.PowerCycles == nil {
|
||||||
|
s.PowerCycles = &v
|
||||||
|
}
|
||||||
|
if v, ok := firstInt64(raw,
|
||||||
|
"path:scsi_grown_defect_list",
|
||||||
|
"path:grown_defect_list",
|
||||||
|
); ok && v > 0 && s.ReallocatedSectors == nil {
|
||||||
|
s.ReallocatedSectors = &v
|
||||||
|
if status != nil && status.reallocatedSectors == 0 {
|
||||||
|
status.reallocatedSectors = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if v, ok := firstInt64(raw,
|
||||||
|
"path:percentage_used_endurance_indicator",
|
||||||
|
"path:scsi_percentage_used_endurance_indicator",
|
||||||
|
); ok && v > 0 {
|
||||||
|
if s.LifeUsedPct == nil {
|
||||||
|
fv := float64(v)
|
||||||
|
s.LifeUsedPct = &fv
|
||||||
|
}
|
||||||
|
if s.LifeRemainingPct == nil && v <= 100 {
|
||||||
|
remaining := float64(100 - v)
|
||||||
|
s.LifeRemainingPct = &remaining
|
||||||
|
if status != nil && status.lifeRemainingPct == 0 {
|
||||||
|
status.lifeRemainingPct = int64(remaining)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
blockSize, hasBlockSize := firstInt64(raw,
|
||||||
|
"path:logical_block_size",
|
||||||
|
"path:block_size",
|
||||||
|
"path:user_capacity.block_size",
|
||||||
|
)
|
||||||
|
if hasBlockSize && blockSize > 0 {
|
||||||
|
if v, ok := firstInt64(raw,
|
||||||
|
"path:logical_blocks_written",
|
||||||
|
"path:total_lbas_written",
|
||||||
|
); ok && v > 0 && s.WrittenBytes == nil {
|
||||||
|
bytes := v * blockSize
|
||||||
|
s.WrittenBytes = &bytes
|
||||||
|
}
|
||||||
|
if v, ok := firstInt64(raw,
|
||||||
|
"path:logical_blocks_read",
|
||||||
|
"path:total_lbas_read",
|
||||||
|
); ok && v > 0 && s.ReadBytes == nil {
|
||||||
|
bytes := v * blockSize
|
||||||
|
s.ReadBytes = &bytes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstInt64(root map[string]any, candidates ...string) (int64, bool) {
|
||||||
|
for _, candidate := range candidates {
|
||||||
|
if !strings.HasPrefix(candidate, "path:") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
path := strings.TrimPrefix(candidate, "path:")
|
||||||
|
if v, ok := nestedInt64(root, strings.Split(path, ".")); ok {
|
||||||
|
return v, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func nestedInt64(root map[string]any, path []string) (int64, bool) {
|
||||||
|
var current any = root
|
||||||
|
for _, key := range path {
|
||||||
|
obj, ok := current.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
current, ok = obj[key]
|
||||||
|
if !ok {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch v := current.(type) {
|
||||||
|
case float64:
|
||||||
|
return int64(v), true
|
||||||
|
case float32:
|
||||||
|
return int64(v), true
|
||||||
|
case int:
|
||||||
|
return int64(v), true
|
||||||
|
case int64:
|
||||||
|
return v, true
|
||||||
|
case int32:
|
||||||
|
return int64(v), true
|
||||||
|
case json.Number:
|
||||||
|
n, err := v.Int64()
|
||||||
|
return n, err == nil
|
||||||
|
case string:
|
||||||
|
n, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64)
|
||||||
|
return n, err == nil
|
||||||
|
default:
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type storageHealthStatus struct {
|
type storageHealthStatus struct {
|
||||||
hasOverall bool
|
hasOverall bool
|
||||||
overallPassed bool
|
overallPassed bool
|
||||||
|
|||||||
89
audit/internal/collector/storage_scsi_test.go
Normal file
89
audit/internal/collector/storage_scsi_test.go
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"bee/audit/internal/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestApplySCSISmartctlTelemetry(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
raw := map[string]any{
|
||||||
|
"power_on_time": map[string]any{
|
||||||
|
"hours": float64(32123),
|
||||||
|
},
|
||||||
|
"accumulated_start_stop_cycles": float64(17),
|
||||||
|
"scsi_grown_defect_list": float64(4),
|
||||||
|
"percentage_used_endurance_indicator": float64(12),
|
||||||
|
"logical_block_size": float64(4096),
|
||||||
|
"logical_blocks_written": float64(1000),
|
||||||
|
"logical_blocks_read": float64(2000),
|
||||||
|
}
|
||||||
|
|
||||||
|
var disk schema.HardwareStorage
|
||||||
|
status := storageHealthStatus{}
|
||||||
|
applySCSISmartctlTelemetry(&disk, raw, &status)
|
||||||
|
|
||||||
|
if disk.PowerOnHours == nil || *disk.PowerOnHours != 32123 {
|
||||||
|
t.Fatalf("power_on_hours=%v want 32123", disk.PowerOnHours)
|
||||||
|
}
|
||||||
|
if disk.PowerCycles == nil || *disk.PowerCycles != 17 {
|
||||||
|
t.Fatalf("power_cycles=%v want 17", disk.PowerCycles)
|
||||||
|
}
|
||||||
|
if disk.ReallocatedSectors == nil || *disk.ReallocatedSectors != 4 {
|
||||||
|
t.Fatalf("reallocated=%v want 4", disk.ReallocatedSectors)
|
||||||
|
}
|
||||||
|
if disk.WrittenBytes == nil || *disk.WrittenBytes != 4096000 {
|
||||||
|
t.Fatalf("written_bytes=%v want 4096000", disk.WrittenBytes)
|
||||||
|
}
|
||||||
|
if disk.ReadBytes == nil || *disk.ReadBytes != 8192000 {
|
||||||
|
t.Fatalf("read_bytes=%v want 8192000", disk.ReadBytes)
|
||||||
|
}
|
||||||
|
if disk.LifeUsedPct == nil || *disk.LifeUsedPct != 12 {
|
||||||
|
t.Fatalf("life_used_pct=%v want 12", disk.LifeUsedPct)
|
||||||
|
}
|
||||||
|
if disk.LifeRemainingPct == nil || *disk.LifeRemainingPct != 88 {
|
||||||
|
t.Fatalf("life_remaining_pct=%v want 88", disk.LifeRemainingPct)
|
||||||
|
}
|
||||||
|
if status.reallocatedSectors != 4 {
|
||||||
|
t.Fatalf("status.reallocated=%d want 4", status.reallocatedSectors)
|
||||||
|
}
|
||||||
|
if status.lifeRemainingPct != 88 {
|
||||||
|
t.Fatalf("status.life_remaining_pct=%d want 88", status.lifeRemainingPct)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplySCSISmartctlTelemetryDoesNotOverwriteExistingValues(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
powerOnHours := int64(10)
|
||||||
|
writtenBytes := int64(20)
|
||||||
|
lifeRemaining := 30.0
|
||||||
|
disk := schema.HardwareStorage{
|
||||||
|
PowerOnHours: &powerOnHours,
|
||||||
|
WrittenBytes: &writtenBytes,
|
||||||
|
LifeRemainingPct: &lifeRemaining,
|
||||||
|
}
|
||||||
|
raw := map[string]any{
|
||||||
|
"power_on_time": map[string]any{"hours": float64(999)},
|
||||||
|
"logical_block_size": float64(512),
|
||||||
|
"logical_blocks_written": float64(999),
|
||||||
|
"percentage_used_endurance_indicator": float64(50),
|
||||||
|
}
|
||||||
|
|
||||||
|
applySCSISmartctlTelemetry(&disk, raw, nil)
|
||||||
|
|
||||||
|
if *disk.PowerOnHours != 10 {
|
||||||
|
t.Fatalf("power_on_hours overwritten: got %d want 10", *disk.PowerOnHours)
|
||||||
|
}
|
||||||
|
if *disk.WrittenBytes != 20 {
|
||||||
|
t.Fatalf("written_bytes overwritten: got %d want 20", *disk.WrittenBytes)
|
||||||
|
}
|
||||||
|
if *disk.LifeRemainingPct != 30 {
|
||||||
|
t.Fatalf("life_remaining_pct overwritten: got %v want 30", *disk.LifeRemainingPct)
|
||||||
|
}
|
||||||
|
if disk.LifeUsedPct == nil || *disk.LifeUsedPct != 50 {
|
||||||
|
t.Fatalf("life_used_pct=%v want 50", disk.LifeUsedPct)
|
||||||
|
}
|
||||||
|
}
|
||||||
25
audit/internal/collector/storage_telemetry_test.go
Normal file
25
audit/internal/collector/storage_telemetry_test.go
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestSmartLBAsToBytes(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
lbas int64
|
||||||
|
want int64
|
||||||
|
}{
|
||||||
|
{name: "zero", lbas: 0, want: 0},
|
||||||
|
{name: "single lba", lbas: 1, want: 512},
|
||||||
|
{name: "multiple lbas", lbas: 2048, want: 1048576},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if got := smartLBAsToBytes(tt.lbas); got != tt.want {
|
||||||
|
t.Fatalf("smartLBAsToBytes(%d)=%d want %d", tt.lbas, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -44,3 +44,48 @@ func TestHardwareSnapshotMarshalsNewContractFields(t *testing.T) {
|
|||||||
t.Fatalf("missing event_logs payload: %s", text)
|
t.Fatalf("missing event_logs payload: %s", text)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHardwareSnapshotMarshalsStorageTelemetryFields(t *testing.T) {
|
||||||
|
powerOnHours := int64(12450)
|
||||||
|
writtenBytes := int64(9876543210)
|
||||||
|
readBytes := int64(1234567890)
|
||||||
|
lifeRemainingPct := 91.0
|
||||||
|
|
||||||
|
payload := HardwareIngestRequest{
|
||||||
|
CollectedAt: "2026-03-15T15:00:00Z",
|
||||||
|
Hardware: HardwareSnapshot{
|
||||||
|
Board: HardwareBoard{SerialNumber: "SRV-001"},
|
||||||
|
Storage: []HardwareStorage{
|
||||||
|
{
|
||||||
|
SerialNumber: stringPtr("DISK-001"),
|
||||||
|
Model: stringPtr("TestDisk"),
|
||||||
|
PowerOnHours: &powerOnHours,
|
||||||
|
WrittenBytes: &writtenBytes,
|
||||||
|
ReadBytes: &readBytes,
|
||||||
|
LifeRemainingPct: &lifeRemainingPct,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := json.Marshal(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal: %v", err)
|
||||||
|
}
|
||||||
|
text := string(data)
|
||||||
|
for _, needle := range []string{
|
||||||
|
`"storage":[{`,
|
||||||
|
`"power_on_hours":12450`,
|
||||||
|
`"written_bytes":9876543210`,
|
||||||
|
`"read_bytes":1234567890`,
|
||||||
|
`"life_remaining_pct":91`,
|
||||||
|
} {
|
||||||
|
if !strings.Contains(text, needle) {
|
||||||
|
t.Fatalf("missing %q in payload: %s", needle, text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stringPtr(v string) *string {
|
||||||
|
return &v
|
||||||
|
}
|
||||||
|
|||||||
2
bible
2
bible
Submodule bible updated: 1d89a4918e...98448c993f
@@ -10,4 +10,4 @@ Generic engineering rules live in `bible/rules/patterns/`.
|
|||||||
| `architecture/system-overview.md` | What bee does, scope, tech stack |
|
| `architecture/system-overview.md` | What bee does, scope, tech stack |
|
||||||
| `architecture/runtime-flows.md` | Boot sequence, audit flow, service order |
|
| `architecture/runtime-flows.md` | Boot sequence, audit flow, service order |
|
||||||
| `docs/hardware-ingest-contract.md` | Current Reanimator hardware ingest JSON contract |
|
| `docs/hardware-ingest-contract.md` | Current Reanimator hardware ingest JSON contract |
|
||||||
| `decisions/` | Architectural decision log |
|
| `decisions/` | Architectural decision log, including read-only submodule policy |
|
||||||
|
|||||||
@@ -58,6 +58,8 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
- `bee` should populate current component state, hardware inventory, telemetry, and `status_checked_at`.
|
- `bee` should populate current component state, hardware inventory, telemetry, and `status_checked_at`.
|
||||||
- Historical status transitions and component replacement logic belong to the centralized ingest/lifecycle system, not to `bee`.
|
- Historical status transitions and component replacement logic belong to the centralized ingest/lifecycle system, not to `bee`.
|
||||||
- Contract fields that have no honest local source on a generic Linux host may remain empty.
|
- Contract fields that have no honest local source on a generic Linux host may remain empty.
|
||||||
|
- Embedded submodules such as `internal/chart/` and `bible/` are read-only for `bee` feature work.
|
||||||
|
- If the UI needs extra information, `bee` must emit it through the standard audit JSON contract rather than patching `chart`.
|
||||||
|
|
||||||
## Tech stack
|
## Tech stack
|
||||||
|
|
||||||
@@ -101,7 +103,7 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
| `iso/builder/` | ISO build scripts and `live-build` profile |
|
| `iso/builder/` | ISO build scripts and `live-build` profile |
|
||||||
| `iso/overlay/` | Source overlay copied into a staged build overlay |
|
| `iso/overlay/` | Source overlay copied into a staged build overlay |
|
||||||
| `iso/vendor/` | Optional pre-built vendor binaries (storcli64, sas2ircu, sas3ircu, arcconf, ssacli, …) |
|
| `iso/vendor/` | Optional pre-built vendor binaries (storcli64, sas2ircu, sas3ircu, arcconf, ssacli, …) |
|
||||||
| `internal/chart/` | Git submodule with `reanimator/chart`, embedded into `bee web` |
|
| `internal/chart/` | Git submodule with `reanimator/chart`, embedded into `bee web`; update by submodule pointer only, never by local `bee`-specific edits |
|
||||||
| `iso/builder/VERSIONS` | Pinned versions: Debian, Go, NVIDIA driver, kernel ABI |
|
| `iso/builder/VERSIONS` | Pinned versions: Debian, Go, NVIDIA driver, kernel ABI |
|
||||||
| `iso/builder/smoketest.sh` | Post-boot smoke test — run via SSH to verify live ISO |
|
| `iso/builder/smoketest.sh` | Post-boot smoke test — run via SSH to verify live ISO |
|
||||||
| `iso/overlay/etc/profile.d/bee.sh` | tty1 welcome message with web UI URLs |
|
| `iso/overlay/etc/profile.d/bee.sh` | tty1 welcome message with web UI URLs |
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
# Decision: Treat embedded submodules as read-only
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
`bee` embeds external git submodules such as:
|
||||||
|
|
||||||
|
- `internal/chart/` — `reanimator/chart`, a generic read-only viewer for Reanimator JSON snapshots
|
||||||
|
- `bible/` — shared engineering rules and contracts
|
||||||
|
|
||||||
|
These repositories are reused by other projects. A local feature request in `bee`
|
||||||
|
must not be solved by silently changing shared submodule behavior.
|
||||||
|
|
||||||
|
The concrete failure mode here was attempting to add project-specific storage
|
||||||
|
telemetry presentation by editing `internal/chart/`. That couples a shared viewer
|
||||||
|
to one host application's needs and creates hidden cross-project regressions.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
Embedded submodules are read-only from the point of view of `bee`.
|
||||||
|
|
||||||
|
- Do not implement `bee`-specific behavior by editing `internal/chart/`.
|
||||||
|
- Do not implement `bee`-specific behavior by editing `bible/`.
|
||||||
|
- If `bee` needs new data in the report, produce it in the standard audit JSON
|
||||||
|
emitted by `bee` itself.
|
||||||
|
- `chart` must continue to consume the canonical snapshot as an external viewer,
|
||||||
|
without host-specific forks.
|
||||||
|
- Updating a submodule pointer to an upstream commit is allowed.
|
||||||
|
- Carrying local unmerged submodule commits as part of a `bee` feature is forbidden.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
- Audit/report features must be expressed through the contract in
|
||||||
|
`bible-local/docs/hardware-ingest-contract.md`.
|
||||||
|
- `bee` owns collection, normalization, and serialization of storage telemetry in
|
||||||
|
`hardware.storage[]`.
|
||||||
|
- `chart` remains a pure visualization module that reads the snapshot it is given.
|
||||||
|
- If a capability is genuinely missing in a shared submodule, it must be proposed
|
||||||
|
and landed upstream as a generic change first, then pulled into `bee` via a
|
||||||
|
normal submodule update.
|
||||||
@@ -6,3 +6,4 @@ One file per decision, named `YYYY-MM-DD-short-topic.md`.
|
|||||||
|---|---|---|
|
|---|---|---|
|
||||||
| 2026-03-05 | Use NVIDIA proprietary driver | active |
|
| 2026-03-05 | Use NVIDIA proprietary driver | active |
|
||||||
| 2026-04-01 | Treat memtest as explicit ISO content | active |
|
| 2026-04-01 | Treat memtest as explicit ISO content | active |
|
||||||
|
| 2026-04-29 | Treat embedded submodules as read-only | active |
|
||||||
|
|||||||
Reference in New Issue
Block a user