Implement audit enrichments, TUI workflows, and production ISO scaffold

This commit is contained in:
Mikhail Chusavitin
2026-03-06 11:56:26 +03:00
parent bdfb6a0a79
commit 18b8c69bc5
32 changed files with 3187 additions and 9 deletions

31
PLAN.md
View File

@@ -10,6 +10,37 @@ Fills the gaps where logpile/Redfish is blind: NVMe, DIMM serials, GPU serials,
---
## Status snapshot (2026-03-06)
### Phase 1 — Go Audit Binary
- 1.1 Project scaffold — **DONE**
- 1.2 Board collector — **DONE**
- 1.3 CPU collector — **DONE**
- 1.4 Memory collector — **DONE**
- 1.5 Storage collector — **DONE**
- 1.6 PCIe collector — **DONE** (with noise filtering for system/chipset devices)
- 1.7 PSU collector — **DONE (basic FRU path)**
- 1.8 NVIDIA GPU enrichment — **DONE**
- 1.8b Component wear / age telemetry — **DONE** (storage + NVMe + NVIDIA + NIC SFP/DOM + NIC packet stats)
- 1.9 Mellanox/NVIDIA NIC enrichment — **DONE** (mstflint + ethtool firmware fallback)
- 1.10 RAID controller enrichment — **DONE (initial multi-tool support)** (storcli + sas2/3ircu + arcconf + ssacli + VROC/mdstat)
- 1.11 Output and USB write — **DONE** (usb + /tmp fallback)
- 1.12 Integration test (local) — **DONE** (`scripts/test-local.sh`)
### Phase 2 — Alpine LiveCD
- Debug ISO track is active (builder + overlay-debug + OpenRC services + TUI workflow).
- Production ISO track — **IN PROGRESS**.
- 2.3 Alpine mkimage profile — **DONE (production profile scaffold)**
- 2.4 Network bring-up on boot — **DONE**
- 2.5 OpenRC boot service (bee-audit) — **DONE** (with explicit bee-nvidia ordering)
- 2.6 Vendor utilities in overlay — **DONE (fetch script + iso/vendor scaffold)**
- 2.7 Auto-update wiring (USB first, network second) — **PARTIAL** (shell flow done; strict Ed25519 verification intentionally deferred to final stage)
- 2.8 Release workflow — **PARTIAL** (production build now injects audit binary, NVIDIA modules/tools, vendor tools, and build metadata)
---
## Phase 1 — Go Audit Binary
Self-contained static binary. Runs on any Linux (including Alpine LiveCD).

View File

@@ -6,7 +6,11 @@ import (
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"
"bee/audit/internal/collector"
)
@@ -71,8 +75,93 @@ func writeOutput(dest string, data []byte) error {
// writeToUSB auto-detects the first removable block device, mounts it,
// and writes the audit JSON. Falls back to /tmp on any failure.
func writeToUSB(data []byte) error {
// implemented in step 1.11
slog.Warn("usb output not yet implemented, falling back to stdout")
_, err := os.Stdout.Write(append(data, '\n'))
return err
boardSerial := extractBoardSerial(data)
filename := auditFilename(boardSerial, time.Now().UTC())
device, err := firstRemovableDevice()
if err != nil {
slog.Warn("usb output: no removable device, writing to /tmp", "err", err)
return writeAuditToPath(filepath.Join("/tmp", filename), data)
}
mountpoint := "/tmp/bee-usb"
if err := os.MkdirAll(mountpoint, 0755); err != nil {
return err
}
if err := exec.Command("mount", device, mountpoint).Run(); err != nil {
slog.Warn("usb output: mount failed, writing to /tmp", "device", device, "err", err)
return writeAuditToPath(filepath.Join("/tmp", filename), data)
}
defer func() {
if err := exec.Command("umount", mountpoint).Run(); err != nil {
slog.Warn("usb output: umount failed", "mountpoint", mountpoint, "err", err)
}
}()
path := filepath.Join(mountpoint, filename)
if err := writeAuditToPath(path, data); err != nil {
slog.Warn("usb output: write failed, falling back to /tmp", "path", path, "err", err)
return writeAuditToPath(filepath.Join("/tmp", filename), data)
}
slog.Info("usb output: written", "path", path)
return nil
}
func writeAuditToPath(path string, data []byte) error {
if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
return err
}
slog.Info("audit output written", "path", path)
return nil
}
func extractBoardSerial(data []byte) string {
var doc struct {
Hardware struct {
Board struct {
SerialNumber string `json:"serial_number"`
} `json:"board"`
} `json:"hardware"`
}
if err := json.Unmarshal(data, &doc); err != nil {
return "unknown"
}
serial := strings.TrimSpace(doc.Hardware.Board.SerialNumber)
if serial == "" {
return "unknown"
}
return serial
}
func auditFilename(boardSerial string, now time.Time) string {
boardSerial = strings.TrimSpace(boardSerial)
if boardSerial == "" {
boardSerial = "unknown"
}
return fmt.Sprintf("audit-%s-%s.json", boardSerial, now.Format("20060102-150405"))
}
func firstRemovableDevice() (string, error) {
entries, err := os.ReadDir("/sys/block")
if err != nil {
return "", err
}
sort.Slice(entries, func(i, j int) bool { return entries[i].Name() < entries[j].Name() })
for _, e := range entries {
name := e.Name()
if strings.HasPrefix(name, "loop") || strings.HasPrefix(name, "ram") {
continue
}
removableFlag, err := os.ReadFile(filepath.Join("/sys/block", name, "removable"))
if err != nil {
continue
}
if strings.TrimSpace(string(removableFlag)) == "1" {
return filepath.Join("/dev", name), nil
}
}
return "", fmt.Errorf("no removable block device found")
}

View File

@@ -28,6 +28,11 @@ func Run() schema.HardwareIngestRequest {
snap.Memory = collectMemory()
snap.Storage = collectStorage()
snap.PCIeDevices = collectPCIe()
snap.PCIeDevices = enrichPCIeWithNVIDIA(snap.PCIeDevices, snap.Board.SerialNumber)
snap.PCIeDevices = enrichPCIeWithMellanox(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithNICTelemetry(snap.PCIeDevices)
snap.Storage = enrichStorageWithVROC(snap.Storage, snap.PCIeDevices)
snap.Storage = appendUniqueStorage(snap.Storage, collectRAIDStorage(snap.PCIeDevices))
snap.PowerSupplies = collectPSUs()
// remaining collectors added in steps 1.8 1.10

View File

@@ -0,0 +1,164 @@
package collector
import (
"bee/audit/internal/schema"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strings"
)
const mellanoxVendorID = 0x15b3
var (
mstflintQuery = func(bdf string) (string, error) {
out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
if err != nil {
return "", err
}
return string(out), nil
}
ethtoolInfoQuery = func(iface string) (string, error) {
out, err := exec.Command("ethtool", "-i", iface).Output()
if err != nil {
return "", err
}
return string(out), nil
}
netIfacesByBDF = listNetIfacesByBDF
)
// enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
// firmware/serial information from mstflint, with ethtool fallback for firmware.
func enrichPCIeWithMellanox(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
enriched := 0
for i := range devs {
if !isMellanoxDevice(devs[i]) {
continue
}
bdf := ""
if devs[i].BDF != nil {
bdf = normalizePCIeBDF(*devs[i].BDF)
}
if bdf == "" {
continue
}
fw, serial := queryMellanoxFromMstflint(bdf)
if fw == "" {
fw = queryFirmwareFromEthtool(bdf)
}
if fw != "" {
devs[i].Firmware = &fw
}
if serial != "" {
devs[i].SerialNumber = &serial
}
if fw != "" || serial != "" {
enriched++
}
}
slog.Info("mellanox: enriched", "count", enriched)
return devs
}
func isMellanoxDevice(dev schema.HardwarePCIeDevice) bool {
if dev.VendorID != nil && *dev.VendorID == mellanoxVendorID {
return true
}
if dev.Manufacturer != nil {
m := strings.ToLower(*dev.Manufacturer)
if strings.Contains(m, "mellanox") || strings.Contains(m, "nvidia networking") {
return true
}
}
return false
}
func queryMellanoxFromMstflint(bdf string) (firmware, serial string) {
out, err := mstflintQuery(bdf)
if err != nil {
return "", ""
}
return parseMstflintQuery(out)
}
func parseMstflintQuery(raw string) (firmware, serial string) {
for _, line := range strings.Split(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
idx := strings.Index(line, ":")
if idx < 0 {
continue
}
key := strings.ToLower(strings.TrimSpace(line[:idx]))
val := strings.TrimSpace(line[idx+1:])
switch key {
case "fw version":
if val != "" {
firmware = val
}
case "board serial number":
if val != "" {
serial = val
}
}
}
return firmware, serial
}
func queryFirmwareFromEthtool(bdf string) string {
for _, iface := range netIfacesByBDF(bdf) {
out, err := ethtoolInfoQuery(iface)
if err != nil {
continue
}
if fw := parseEthtoolFirmwareInfo(out); fw != "" {
return fw
}
}
return ""
}
func parseEthtoolFirmwareInfo(raw string) string {
for _, line := range strings.Split(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
idx := strings.Index(line, ":")
if idx < 0 {
continue
}
key := strings.ToLower(strings.TrimSpace(line[:idx]))
val := strings.TrimSpace(line[idx+1:])
if key == "firmware-version" && val != "" {
return val
}
}
return ""
}
func listNetIfacesByBDF(bdf string) []string {
path := filepath.Join("/sys/bus/pci/devices", bdf, "net")
entries, err := os.ReadDir(path)
if err != nil {
return nil
}
ifaces := make([]string, 0, len(entries))
for _, e := range entries {
if e.Name() == "" {
continue
}
ifaces = append(ifaces, e.Name())
}
return ifaces
}

View File

@@ -0,0 +1,118 @@
package collector
import (
"bee/audit/internal/schema"
"fmt"
"testing"
)
func TestParseMstflintQuery(t *testing.T) {
raw := `Device #1:
----------
FW Version: 28.39.1002
Board Serial Number: MT1234ABC
`
fw, serial := parseMstflintQuery(raw)
if fw != "28.39.1002" {
t.Fatalf("firmware: got %q", fw)
}
if serial != "MT1234ABC" {
t.Fatalf("serial: got %q", serial)
}
}
func TestParseEthtoolFirmwareInfo(t *testing.T) {
raw := `driver: mlx5_core
version: 6.6.31-0-lts
firmware-version: 28.39.1002 (MT_0000000000)
bus-info: 0000:18:00.0
`
fw := parseEthtoolFirmwareInfo(raw)
if fw != "28.39.1002 (MT_0000000000)" {
t.Fatalf("firmware: got %q", fw)
}
}
func TestEnrichPCIeWithMellanox_mstflint(t *testing.T) {
origMst := mstflintQuery
origEth := ethtoolInfoQuery
origIfaces := netIfacesByBDF
t.Cleanup(func() {
mstflintQuery = origMst
ethtoolInfoQuery = origEth
netIfacesByBDF = origIfaces
})
mstflintQuery = func(bdf string) (string, error) {
if bdf != "0000:18:00.0" {
t.Fatalf("unexpected bdf: %s", bdf)
}
return "FW Version: 28.39.1002\nBoard Serial Number: SN-MST-001\n", nil
}
ethtoolInfoQuery = func(string) (string, error) {
t.Fatal("ethtool should not be called when mstflint succeeds")
return "", nil
}
netIfacesByBDF = func(string) []string { return nil }
vendorID := mellanoxVendorID
bdf := "0000:18:00.0"
manufacturer := "Mellanox Technologies"
devs := []schema.HardwarePCIeDevice{{
VendorID: &vendorID,
BDF: &bdf,
Manufacturer: &manufacturer,
}}
out := enrichPCIeWithMellanox(devs)
if out[0].Firmware == nil || *out[0].Firmware != "28.39.1002" {
t.Fatalf("firmware: got %v", out[0].Firmware)
}
if out[0].SerialNumber == nil || *out[0].SerialNumber != "SN-MST-001" {
t.Fatalf("serial: got %v", out[0].SerialNumber)
}
}
func TestEnrichPCIeWithMellanox_fallbackEthtool(t *testing.T) {
origMst := mstflintQuery
origEth := ethtoolInfoQuery
origIfaces := netIfacesByBDF
t.Cleanup(func() {
mstflintQuery = origMst
ethtoolInfoQuery = origEth
netIfacesByBDF = origIfaces
})
mstflintQuery = func(string) (string, error) {
return "", fmt.Errorf("mstflint not found")
}
netIfacesByBDF = func(bdf string) []string {
if bdf != "0000:18:00.0" {
t.Fatalf("unexpected bdf: %s", bdf)
}
return []string{"eth0"}
}
ethtoolInfoQuery = func(iface string) (string, error) {
if iface != "eth0" {
t.Fatalf("unexpected iface: %s", iface)
}
return "driver: mlx5_core\nfirmware-version: 28.40.1000\n", nil
}
vendorID := mellanoxVendorID
bdf := "0000:18:00.0"
manufacturer := "NVIDIA Networking"
devs := []schema.HardwarePCIeDevice{{
VendorID: &vendorID,
BDF: &bdf,
Manufacturer: &manufacturer,
}}
out := enrichPCIeWithMellanox(devs)
if out[0].Firmware == nil || *out[0].Firmware != "28.40.1000" {
t.Fatalf("firmware: got %v", out[0].Firmware)
}
if out[0].SerialNumber != nil {
t.Fatalf("serial should stay nil without mstflint, got %v", out[0].SerialNumber)
}
}

View File

@@ -0,0 +1,172 @@
package collector
import (
"bee/audit/internal/schema"
"log/slog"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
)
var (
ethtoolModuleQuery = func(iface string) (string, error) {
out, err := raidToolQuery("ethtool", "-m", iface)
if err != nil {
return "", err
}
return string(out), nil
}
readNetStatFile = func(iface, key string) (int64, error) {
path := filepath.Join("/sys/class/net", iface, "statistics", key)
raw, err := os.ReadFile(path)
if err != nil {
return 0, err
}
v, err := strconv.ParseInt(strings.TrimSpace(string(raw)), 10, 64)
if err != nil {
return 0, err
}
return v, nil
}
)
func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
enriched := 0
for i := range devs {
if !isNICDevice(devs[i]) || devs[i].BDF == nil {
continue
}
bdf := normalizePCIeBDF(*devs[i].BDF)
if bdf == "" {
continue
}
ifaces := netIfacesByBDF(bdf)
if len(ifaces) == 0 {
continue
}
iface := ifaces[0]
if devs[i].Firmware == nil {
if out, err := ethtoolInfoQuery(iface); err == nil {
if fw := parseEthtoolFirmwareInfo(out); fw != "" {
devs[i].Firmware = &fw
}
}
}
if devs[i].Telemetry == nil {
devs[i].Telemetry = map[string]any{}
}
injectNICPacketStats(devs[i].Telemetry, iface)
if out, err := ethtoolModuleQuery(iface); err == nil {
injectSFPDOMTelemetry(devs[i].Telemetry, out)
}
if len(devs[i].Telemetry) == 0 {
devs[i].Telemetry = nil
} else {
enriched++
}
}
slog.Info("nic: telemetry enriched", "count", enriched)
return devs
}
func isNICDevice(dev schema.HardwarePCIeDevice) bool {
if dev.DeviceClass == nil {
return false
}
c := strings.ToLower(strings.TrimSpace(*dev.DeviceClass))
return strings.Contains(c, "ethernet controller") ||
strings.Contains(c, "network controller") ||
strings.Contains(c, "infiniband controller")
}
func injectNICPacketStats(dst map[string]any, iface string) {
for _, key := range []string{"rx_packets", "tx_packets", "rx_errors", "tx_errors"} {
if v, err := readNetStatFile(iface, key); err == nil {
dst[key] = v
}
}
}
func injectSFPDOMTelemetry(dst map[string]any, raw string) {
parsed := parseSFPDOM(raw)
for k, v := range parsed {
dst[k] = v
}
}
var floatRe = regexp.MustCompile(`[-+]?[0-9]*\.?[0-9]+`)
func parseSFPDOM(raw string) map[string]any {
out := map[string]any{}
for _, line := range strings.Split(raw, "\n") {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
continue
}
idx := strings.Index(trimmed, ":")
if idx < 0 {
continue
}
key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
val := strings.TrimSpace(trimmed[idx+1:])
switch {
case strings.Contains(key, "module temperature"):
if f, ok := firstFloat(val); ok {
out["sfp_temperature_c"] = f
}
case strings.Contains(key, "laser output power"):
if f, ok := dbmValue(val); ok {
out["sfp_tx_power_dbm"] = f
}
case strings.Contains(key, "receiver signal"):
if f, ok := dbmValue(val); ok {
out["sfp_rx_power_dbm"] = f
}
case strings.Contains(key, "module voltage"):
if f, ok := firstFloat(val); ok {
out["sfp_voltage_v"] = f
}
case strings.Contains(key, "laser bias current"):
if f, ok := firstFloat(val); ok {
out["sfp_bias_ma"] = f
}
}
}
return out
}
func firstFloat(raw string) (float64, bool) {
m := floatRe.FindString(raw)
if m == "" {
return 0, false
}
v, err := strconv.ParseFloat(m, 64)
if err != nil {
return 0, false
}
return v, true
}
func dbmValue(raw string) (float64, bool) {
parts := strings.Split(strings.ToLower(raw), "dbm")
if len(parts) == 0 {
return 0, false
}
for i := len(parts) - 1; i >= 0; i-- {
candidate := parts[i]
matches := floatRe.FindAllString(candidate, -1)
if len(matches) == 0 {
continue
}
v, err := strconv.ParseFloat(matches[len(matches)-1], 64)
if err == nil {
return v, true
}
}
return 0, false
}

View File

@@ -0,0 +1,51 @@
package collector
import "testing"
func TestParseSFPDOM(t *testing.T) {
raw := `
Module temperature : 41.23 C
Module voltage : 3.30 V
Laser bias current : 6.12 mA
Laser output power : 0.4712 mW / -3.27 dBm
Receiver signal average optical power : 0.4123 mW / -3.85 dBm
`
got := parseSFPDOM(raw)
if v, ok := got["sfp_temperature_c"].(float64); !ok || v != 41.23 {
t.Fatalf("sfp_temperature_c mismatch: %#v", got["sfp_temperature_c"])
}
if v, ok := got["sfp_voltage_v"].(float64); !ok || v != 3.30 {
t.Fatalf("sfp_voltage_v mismatch: %#v", got["sfp_voltage_v"])
}
if v, ok := got["sfp_bias_ma"].(float64); !ok || v != 6.12 {
t.Fatalf("sfp_bias_ma mismatch: %#v", got["sfp_bias_ma"])
}
if v, ok := got["sfp_tx_power_dbm"].(float64); !ok || v != -3.27 {
t.Fatalf("sfp_tx_power_dbm mismatch: %#v", got["sfp_tx_power_dbm"])
}
if v, ok := got["sfp_rx_power_dbm"].(float64); !ok || v != -3.85 {
t.Fatalf("sfp_rx_power_dbm mismatch: %#v", got["sfp_rx_power_dbm"])
}
}
func TestDBMValue(t *testing.T) {
tests := []struct {
in string
want float64
ok bool
}{
{"0.4123 mW / -3.85 dBm", -3.85, true},
{"-1.23 dBm", -1.23, true},
{"not supported", 0, false},
}
for _, tt := range tests {
got, ok := dbmValue(tt.in)
if ok != tt.ok {
t.Fatalf("dbmValue(%q) ok=%v want %v", tt.in, ok, tt.ok)
}
if ok && got != tt.want {
t.Fatalf("dbmValue(%q)=%v want %v", tt.in, got, tt.want)
}
}
}

View File

@@ -0,0 +1,245 @@
package collector
import (
"bee/audit/internal/schema"
"encoding/csv"
"fmt"
"log/slog"
"os/exec"
"strconv"
"strings"
)
const nvidiaVendorID = 0x10de
type nvidiaGPUInfo struct {
BDF string
Serial string
VBIOS string
TemperatureC *float64
PowerW *float64
ECCUncorrected *int64
ECCCorrected *int64
HWSlowdown *bool
}
// enrichPCIeWithNVIDIA enriches NVIDIA PCIe devices with data from nvidia-smi.
// If the driver/tool is unavailable, NVIDIA devices get UNKNOWN status and
// a stable serial fallback based on board serial + slot.
func enrichPCIeWithNVIDIA(devs []schema.HardwarePCIeDevice, boardSerial string) []schema.HardwarePCIeDevice {
gpuByBDF, err := queryNVIDIAGPUs()
if err != nil {
slog.Info("nvidia: enrichment skipped", "err", err)
return enrichPCIeWithNVIDIAData(devs, nil, boardSerial, false)
}
return enrichPCIeWithNVIDIAData(devs, gpuByBDF, boardSerial, true)
}
func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[string]nvidiaGPUInfo, boardSerial string, driverLoaded bool) []schema.HardwarePCIeDevice {
enriched := 0
for i := range devs {
if !isNVIDIADevice(devs[i]) {
continue
}
if !driverLoaded {
setPCIeFallback(&devs[i], boardSerial)
continue
}
bdf := ""
if devs[i].BDF != nil {
bdf = normalizePCIeBDF(*devs[i].BDF)
}
info, ok := gpuByBDF[bdf]
if !ok {
setPCIeFallback(&devs[i], boardSerial)
continue
}
if v := strings.TrimSpace(info.Serial); v != "" {
devs[i].SerialNumber = &v
} else {
setPCIeFallbackSerial(&devs[i], boardSerial)
}
if v := strings.TrimSpace(info.VBIOS); v != "" {
devs[i].Firmware = &v
}
status := "OK"
if info.ECCUncorrected != nil && *info.ECCUncorrected > 0 {
status = "WARNING"
}
devs[i].Status = &status
injectNVIDIATelemetry(&devs[i], info)
enriched++
}
if driverLoaded {
slog.Info("nvidia: enriched", "count", enriched)
}
return devs
}
func queryNVIDIAGPUs() (map[string]nvidiaGPUInfo, error) {
out, err := exec.Command(
"nvidia-smi",
"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown",
"--format=csv,noheader,nounits",
).Output()
if err != nil {
return nil, err
}
return parseNVIDIASMIQuery(string(out))
}
func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
r := csv.NewReader(strings.NewReader(raw))
r.TrimLeadingSpace = true
r.FieldsPerRecord = -1
records, err := r.ReadAll()
if err != nil {
return nil, err
}
result := make(map[string]nvidiaGPUInfo)
for _, rec := range records {
if len(rec) == 0 {
continue
}
if len(rec) < 9 {
return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 9", len(rec))
}
bdf := normalizePCIeBDF(rec[1])
if bdf == "" {
continue
}
info := nvidiaGPUInfo{
BDF: bdf,
Serial: strings.TrimSpace(rec[2]),
VBIOS: strings.TrimSpace(rec[3]),
TemperatureC: parseMaybeFloat(rec[4]),
PowerW: parseMaybeFloat(rec[5]),
ECCUncorrected: parseMaybeInt64(rec[6]),
ECCCorrected: parseMaybeInt64(rec[7]),
HWSlowdown: parseMaybeBool(rec[8]),
}
result[bdf] = info
}
return result, nil
}
func parseMaybeFloat(v string) *float64 {
v = strings.TrimSpace(v)
if v == "" || strings.EqualFold(v, "n/a") || strings.EqualFold(v, "not supported") || strings.EqualFold(v, "[not supported]") {
return nil
}
n, err := strconv.ParseFloat(v, 64)
if err != nil {
return nil
}
return &n
}
func parseMaybeInt64(v string) *int64 {
v = strings.TrimSpace(v)
if v == "" || strings.EqualFold(v, "n/a") || strings.EqualFold(v, "not supported") || strings.EqualFold(v, "[not supported]") {
return nil
}
n, err := strconv.ParseInt(v, 10, 64)
if err != nil {
return nil
}
return &n
}
func parseMaybeBool(v string) *bool {
v = strings.TrimSpace(strings.ToLower(v))
switch v {
case "active", "enabled", "true", "1":
b := true
return &b
case "not active", "disabled", "false", "0":
b := false
return &b
default:
return nil
}
}
func normalizePCIeBDF(bdf string) string {
bdf = strings.TrimSpace(strings.ToLower(bdf))
if bdf == "" {
return ""
}
parts := strings.Split(bdf, ":")
if len(parts) == 3 {
domain := parts[0]
if len(domain) > 4 {
domain = domain[len(domain)-4:]
}
return domain + ":" + parts[1] + ":" + parts[2]
}
if len(parts) == 2 {
return "0000:" + parts[0] + ":" + parts[1]
}
return bdf
}
func isNVIDIADevice(dev schema.HardwarePCIeDevice) bool {
if dev.VendorID != nil && *dev.VendorID == nvidiaVendorID {
return true
}
if dev.Manufacturer != nil && strings.Contains(strings.ToLower(*dev.Manufacturer), "nvidia") {
return true
}
return false
}
func setPCIeFallback(dev *schema.HardwarePCIeDevice, boardSerial string) {
setPCIeFallbackSerial(dev, boardSerial)
status := "UNKNOWN"
dev.Status = &status
}
func setPCIeFallbackSerial(dev *schema.HardwarePCIeDevice, boardSerial string) {
if strings.TrimSpace(boardSerial) == "" || dev.SerialNumber != nil {
return
}
slot := "unknown"
if dev.BDF != nil && strings.TrimSpace(*dev.BDF) != "" {
slot = strings.TrimSpace(*dev.BDF)
} else if dev.Slot != nil && strings.TrimSpace(*dev.Slot) != "" {
slot = strings.TrimSpace(*dev.Slot)
}
fb := fmt.Sprintf("%s-PCIE-%s", boardSerial, slot)
dev.SerialNumber = &fb
}
func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
if dev.Telemetry == nil {
dev.Telemetry = map[string]any{}
}
if info.TemperatureC != nil {
dev.Telemetry["temperature_c"] = *info.TemperatureC
}
if info.PowerW != nil {
dev.Telemetry["power_w"] = *info.PowerW
}
if info.ECCUncorrected != nil {
dev.Telemetry["ecc_uncorrected_total"] = *info.ECCUncorrected
}
if info.ECCCorrected != nil {
dev.Telemetry["ecc_corrected_total"] = *info.ECCCorrected
}
if info.HWSlowdown != nil {
dev.Telemetry["hw_slowdown_active"] = *info.HWSlowdown
}
if len(dev.Telemetry) == 0 {
dev.Telemetry = nil
}
}

View File

@@ -0,0 +1,116 @@
package collector
import (
"bee/audit/internal/schema"
"testing"
)
func TestParseNVIDIASMIQuery(t *testing.T) {
raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active\n"
byBDF, err := parseNVIDIASMIQuery(raw)
if err != nil {
t.Fatalf("parse failed: %v", err)
}
gpu, ok := byBDF["0000:65:00.0"]
if !ok {
t.Fatalf("gpu by normalized bdf not found")
}
if gpu.Serial != "GPU-SERIAL-1" {
t.Fatalf("serial: got %q", gpu.Serial)
}
if gpu.VBIOS != "96.00.1F.00.02" {
t.Fatalf("vbios: got %q", gpu.VBIOS)
}
if gpu.ECCUncorrected == nil || *gpu.ECCUncorrected != 0 {
t.Fatalf("ecc uncorrected: got %v", gpu.ECCUncorrected)
}
if gpu.HWSlowdown == nil || *gpu.HWSlowdown {
t.Fatalf("hw slowdown: got %v, want false", gpu.HWSlowdown)
}
}
func TestNormalizePCIeBDF(t *testing.T) {
tests := []struct {
in string
want string
}{
{"00000000:17:00.0", "0000:17:00.0"},
{"0000:17:00.0", "0000:17:00.0"},
{"17:00.0", "0000:17:00.0"},
}
for _, tt := range tests {
got := normalizePCIeBDF(tt.in)
if got != tt.want {
t.Fatalf("normalizePCIeBDF(%q)=%q want %q", tt.in, got, tt.want)
}
}
}
func TestEnrichPCIeWithNVIDIAData_driverLoaded(t *testing.T) {
vendorID := nvidiaVendorID
bdf := "0000:65:00.0"
manufacturer := "NVIDIA Corporation"
status := "OK"
devices := []schema.HardwarePCIeDevice{
{
VendorID: &vendorID,
BDF: &bdf,
Manufacturer: &manufacturer,
Status: &status,
},
}
byBDF := map[string]nvidiaGPUInfo{
"0000:65:00.0": {
BDF: "0000:65:00.0",
Serial: "GPU-ABC",
VBIOS: "96.00.1F.00.02",
ECCUncorrected: ptrInt64(2),
ECCCorrected: ptrInt64(10),
TemperatureC: ptrFloat(55.5),
PowerW: ptrFloat(230.2),
},
}
out := enrichPCIeWithNVIDIAData(devices, byBDF, "BOARD-001", true)
if out[0].SerialNumber == nil || *out[0].SerialNumber != "GPU-ABC" {
t.Fatalf("serial: got %v", out[0].SerialNumber)
}
if out[0].Firmware == nil || *out[0].Firmware != "96.00.1F.00.02" {
t.Fatalf("firmware: got %v", out[0].Firmware)
}
if out[0].Status == nil || *out[0].Status != "WARNING" {
t.Fatalf("status: got %v", out[0].Status)
}
if out[0].Telemetry == nil {
t.Fatal("expected telemetry")
}
if got, ok := out[0].Telemetry["ecc_uncorrected_total"].(int64); !ok || got != 2 {
t.Fatalf("ecc_uncorrected_total: got %#v", out[0].Telemetry["ecc_uncorrected_total"])
}
}
func TestEnrichPCIeWithNVIDIAData_driverMissingFallback(t *testing.T) {
vendorID := nvidiaVendorID
bdf := "0000:17:00.0"
manufacturer := "NVIDIA Corporation"
devices := []schema.HardwarePCIeDevice{
{
VendorID: &vendorID,
BDF: &bdf,
Manufacturer: &manufacturer,
},
}
out := enrichPCIeWithNVIDIAData(devices, nil, "BOARD-123", false)
if out[0].SerialNumber == nil || *out[0].SerialNumber != "BOARD-123-PCIE-0000:17:00.0" {
t.Fatalf("fallback serial: got %v", out[0].SerialNumber)
}
if out[0].Status == nil || *out[0].Status != "UNKNOWN" {
t.Fatalf("fallback status: got %v", out[0].Status)
}
}
func ptrInt64(v int64) *int64 { return &v }
func ptrFloat(v float64) *float64 { return &v }

View File

@@ -37,12 +37,44 @@ func parseLspci(output string) []schema.HardwarePCIeDevice {
val := strings.TrimSpace(line[idx+2:])
fields[key] = val
}
if !shouldIncludePCIeDevice(fields["Class"]) {
continue
}
dev := parseLspciDevice(fields)
devs = append(devs, dev)
}
return devs
}
func shouldIncludePCIeDevice(class string) bool {
c := strings.ToLower(strings.TrimSpace(class))
if c == "" {
return true
}
// Keep inventory focused on useful replaceable components, not chipset/virtual noise.
excluded := []string{
"host bridge",
"isa bridge",
"pci bridge",
"ram memory",
"system peripheral",
"communication controller",
"signal processing controller",
"usb controller",
"smbus",
"audio device",
"serial bus controller",
"unassigned class",
}
for _, bad := range excluded {
if strings.Contains(c, bad) {
return false
}
}
return true
}
func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {
dev := schema.HardwarePCIeDevice{}
present := true

View File

@@ -0,0 +1,41 @@
package collector
import "testing"
func TestShouldIncludePCIeDevice(t *testing.T) {
tests := []struct {
class string
want bool
}{
{"USB controller", false},
{"System peripheral", false},
{"Audio device", false},
{"Host bridge", false},
{"PCI bridge", false},
{"SMBus", false},
{"Ethernet controller", true},
{"RAID bus controller", true},
{"Non-Volatile memory controller", true},
{"VGA compatible controller", true},
}
for _, tt := range tests {
got := shouldIncludePCIeDevice(tt.class)
if got != tt.want {
t.Fatalf("class %q include=%v want %v", tt.class, got, tt.want)
}
}
}
func TestParseLspci_filtersExcludedClasses(t *testing.T) {
input := "Slot:\t0000:00:14.0\nClass:\tUSB controller\nVendor:\tIntel Corporation\nDevice:\tUSB 3.0\n\n" +
"Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
devs := parseLspci(input)
if len(devs) != 1 {
t.Fatalf("expected 1 filtered device, got %d", len(devs))
}
if devs[0].DeviceClass == nil || *devs[0].DeviceClass != "VGA compatible controller" {
t.Fatalf("unexpected remaining class: %v", devs[0].DeviceClass)
}
}

View File

@@ -0,0 +1,748 @@
package collector
import (
"bee/audit/internal/schema"
"encoding/json"
"log/slog"
"os"
"os/exec"
"regexp"
"sort"
"strconv"
"strings"
)
const (
vendorBroadcomLSI = 0x1000
vendorAdaptec = 0x9005
vendorHPE = 0x103c
vendorIntel = 0x8086
)
var raidToolQuery = func(name string, args ...string) ([]byte, error) {
return exec.Command(name, args...).Output()
}
var readMDStat = func() ([]byte, error) {
return os.ReadFile("/proc/mdstat")
}
// collectRAIDStorage collects physical disks behind RAID controllers that may
// not be exposed as regular block devices.
func collectRAIDStorage(pcie []schema.HardwarePCIeDevice) []schema.HardwareStorage {
vendors := detectRAIDVendors(pcie)
if len(vendors) == 0 {
return nil
}
var out []schema.HardwareStorage
if vendors[vendorBroadcomLSI] {
if drives := collectStorcliDrives(); len(drives) > 0 {
out = append(out, drives...)
}
if drives := collectSASIrcuDrives("sas3ircu"); len(drives) > 0 {
out = append(out, drives...)
}
if drives := collectSASIrcuDrives("sas2ircu"); len(drives) > 0 {
out = append(out, drives...)
}
}
if vendors[vendorAdaptec] {
if drives := collectArcconfDrives(); len(drives) > 0 {
out = append(out, drives...)
}
}
if vendors[vendorHPE] {
if drives := collectSSACLIDrives(); len(drives) > 0 {
out = append(out, drives...)
}
}
if len(out) > 0 {
slog.Info("raid: collected physical drives", "count", len(out))
}
return out
}
func detectRAIDVendors(pcie []schema.HardwarePCIeDevice) map[int]bool {
out := map[int]bool{}
for _, dev := range pcie {
if dev.VendorID == nil {
continue
}
if isLikelyRAIDController(dev) {
out[*dev.VendorID] = true
}
}
return out
}
func isLikelyRAIDController(dev schema.HardwarePCIeDevice) bool {
if dev.DeviceClass == nil {
return false
}
c := strings.ToLower(*dev.DeviceClass)
return strings.Contains(c, "raid") ||
strings.Contains(c, "sas") ||
strings.Contains(c, "mass storage") ||
strings.Contains(c, "serial attached scsi")
}
func collectStorcliDrives() []schema.HardwareStorage {
out, err := raidToolQuery("storcli64", "/call/eall/sall", "show", "all", "J")
if err != nil {
slog.Info("raid: storcli unavailable", "err", err)
return nil
}
drives := parseStorcliDrivesJSON(out)
if len(drives) == 0 {
slog.Info("raid: storcli returned no drives")
}
return drives
}
func collectSASIrcuDrives(tool string) []schema.HardwareStorage {
out, err := raidToolQuery(tool, "list")
if err != nil {
slog.Info("raid: "+tool+" unavailable", "err", err)
return nil
}
var drives []schema.HardwareStorage
for _, ctlID := range parseSASIrcuControllerIDs(string(out)) {
raw, err := raidToolQuery(tool, strconv.Itoa(ctlID), "display")
if err != nil {
continue
}
drives = append(drives, parseSASIrcuDisplay(string(raw))...)
}
return drives
}
func parseSASIrcuControllerIDs(raw string) []int {
lines := strings.Split(raw, "\n")
idsMap := map[int]bool{}
for _, line := range lines {
fields := strings.Fields(strings.TrimSpace(line))
if len(fields) == 0 {
continue
}
id, err := strconv.Atoi(fields[0])
if err != nil {
continue
}
idsMap[id] = true
}
var ids []int
for id := range idsMap {
ids = append(ids, id)
}
sort.Ints(ids)
return ids
}
func parseSASIrcuDisplay(raw string) []schema.HardwareStorage {
var blocks []map[string]string
var cur map[string]string
var currentType string
for _, line := range strings.Split(raw, "\n") {
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "Device is a ") {
if cur != nil {
cur["__device_type"] = currentType
blocks = append(blocks, cur)
}
cur = map[string]string{}
currentType = strings.TrimSpace(strings.TrimPrefix(trimmed, "Device is a "))
continue
}
if cur == nil {
continue
}
if idx := strings.Index(trimmed, ":"); idx > 0 {
key := strings.TrimSpace(trimmed[:idx])
val := strings.TrimSpace(trimmed[idx+1:])
cur[key] = val
}
}
if cur != nil {
cur["__device_type"] = currentType
blocks = append(blocks, cur)
}
var out []schema.HardwareStorage
for _, b := range blocks {
dt := strings.ToLower(b["__device_type"])
if !strings.Contains(dt, "hard disk") && !strings.Contains(dt, "ssd") && !strings.Contains(dt, "nvme") {
continue
}
present := true
status := mapRAIDDriveStatus(b["State"])
s := schema.HardwareStorage{Present: &present, Status: &status}
enclosure := strings.TrimSpace(b["Enclosure #"])
slot := strings.TrimSpace(b["Slot #"])
if enclosure != "" || slot != "" {
v := enclosure + ":" + slot
v = strings.Trim(v, ":")
s.Slot = &v
}
if v := strings.TrimSpace(b["Model Number"]); v != "" {
s.Model = &v
}
if v := strings.TrimSpace(b["Serial No"]); v != "" {
s.SerialNumber = &v
}
if v := strings.ToUpper(strings.TrimSpace(b["Protocol"])); v != "" {
s.Interface = &v
}
media := strings.ToUpper(strings.TrimSpace(b["Drive Type"]))
if media == "" {
media = strings.ToUpper(dt)
}
intf := ""
if s.Interface != nil {
intf = *s.Interface
}
devType := inferDriveType(media, intf)
s.Type = &devType
if mb := parseSASIrcuMB(b["Size (in MB)/(in sectors)"]); mb > 0 {
gb := mb / 1000
if gb == 0 {
gb = 1
}
s.SizeGB = &gb
}
if s.Slot != nil || s.SerialNumber != nil || s.Model != nil {
out = append(out, s)
}
}
return out
}
func parseSASIrcuMB(raw string) int {
raw = strings.TrimSpace(raw)
if raw == "" {
return 0
}
head := strings.SplitN(raw, "/", 2)[0]
n, err := strconv.Atoi(strings.TrimSpace(head))
if err != nil {
return 0
}
return n
}
func collectArcconfDrives() []schema.HardwareStorage {
raw, err := raidToolQuery("arcconf", "getconfig", "1", "pd")
if err != nil {
slog.Info("raid: arcconf unavailable", "err", err)
return nil
}
return parseArcconfPhysicalDrives(string(raw))
}
func parseArcconfPhysicalDrives(raw string) []schema.HardwareStorage {
lines := strings.Split(raw, "\n")
var blocks []map[string]string
var cur map[string]string
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(strings.ToLower(trimmed), "device #") {
if cur != nil {
blocks = append(blocks, cur)
}
cur = map[string]string{}
continue
}
if cur == nil {
continue
}
if idx := strings.Index(trimmed, ":"); idx > 0 {
key := strings.TrimSpace(trimmed[:idx])
val := strings.TrimSpace(trimmed[idx+1:])
cur[key] = val
}
}
if cur != nil {
blocks = append(blocks, cur)
}
var out []schema.HardwareStorage
for _, b := range blocks {
present := true
status := mapRAIDDriveStatus(b["State"])
s := schema.HardwareStorage{Present: &present, Status: &status}
if v := strings.TrimSpace(b["Reported Location"]); v != "" {
s.Slot = &v
}
if v := strings.TrimSpace(b["Model"]); v != "" {
s.Model = &v
}
if v := strings.TrimSpace(b["Serial number"]); v != "" {
s.SerialNumber = &v
}
if gb := parseHumanSizeToGB(b["Total Size"]); gb > 0 {
s.SizeGB = &gb
}
intf := parseArcconfInterface(b["Transfer Speed"])
if intf != "" {
s.Interface = &intf
}
media := strings.ToUpper(strings.TrimSpace(b["SSD"]))
if media == "YES" || media == "TRUE" {
media = "SSD"
}
devType := inferDriveType(media, intf)
s.Type = &devType
if s.Slot != nil || s.SerialNumber != nil || s.Model != nil {
out = append(out, s)
}
}
return out
}
func parseArcconfInterface(raw string) string {
u := strings.ToUpper(raw)
switch {
case strings.Contains(u, "SAS"):
return "SAS"
case strings.Contains(u, "SATA"):
return "SATA"
case strings.Contains(u, "NVME"):
return "NVME"
default:
return ""
}
}
var ssacliPhysicalDriveLine = regexp.MustCompile(`(?i)^physicaldrive\s+(\S+)\s+\(([^)]*)\)$`)
func collectSSACLIDrives() []schema.HardwareStorage {
raw, err := raidToolQuery("ssacli", "ctrl", "all", "show", "config", "detail")
if err != nil {
slog.Info("raid: ssacli unavailable", "err", err)
return nil
}
return parseSSACLIPhysicalDrives(string(raw))
}
func parseSSACLIPhysicalDrives(raw string) []schema.HardwareStorage {
lines := strings.Split(raw, "\n")
var out []schema.HardwareStorage
var cur *schema.HardwareStorage
flush := func() {
if cur == nil {
return
}
if cur.Slot != nil || cur.SerialNumber != nil || cur.Model != nil {
out = append(out, *cur)
}
cur = nil
}
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
continue
}
if m := ssacliPhysicalDriveLine.FindStringSubmatch(trimmed); len(m) == 3 {
flush()
present := true
status := "UNKNOWN"
s := schema.HardwareStorage{Present: &present, Status: &status}
slot := m[1]
s.Slot = &slot
meta := strings.Split(m[2], ",")
if len(meta) > 0 {
if gb := parseHumanSizeToGB(strings.TrimSpace(meta[0])); gb > 0 {
s.SizeGB = &gb
}
}
if len(meta) > 1 {
intf := parseSSACLIInterface(meta[1])
if intf != "" {
s.Interface = &intf
}
devType := inferDriveType(strings.ToUpper(meta[1]), intf)
s.Type = &devType
}
if len(meta) > 2 {
st := mapRAIDDriveStatus(meta[len(meta)-1])
s.Status = &st
}
cur = &s
continue
}
if cur == nil {
continue
}
if idx := strings.Index(trimmed, ":"); idx > 0 {
key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
val := strings.TrimSpace(trimmed[idx+1:])
switch key {
case "serial number":
if val != "" {
cur.SerialNumber = &val
}
case "model":
if val != "" {
cur.Model = &val
}
case "status":
st := mapRAIDDriveStatus(val)
cur.Status = &st
}
}
}
flush()
return out
}
func parseSSACLIInterface(raw string) string {
u := strings.ToUpper(raw)
switch {
case strings.Contains(u, "SAS"):
return "SAS"
case strings.Contains(u, "SATA"):
return "SATA"
case strings.Contains(u, "NVME"):
return "NVME"
default:
return ""
}
}
func parseStorcliDrivesJSON(raw []byte) []schema.HardwareStorage {
var doc struct {
Controllers []struct {
ResponseData struct {
DriveInformation []struct {
EIDSlt string `json:"EID:Slt"`
State string `json:"State"`
Size string `json:"Size"`
Intf string `json:"Intf"`
Med string `json:"Med"`
Model string `json:"Model"`
SN string `json:"SN"`
Sp string `json:"Sp"`
Type string `json:"Type"`
} `json:"Drive Information"`
} `json:"Response Data"`
} `json:"Controllers"`
}
if err := json.Unmarshal(raw, &doc); err != nil {
slog.Warn("raid: parse storcli json failed", "err", err)
return nil
}
var drives []schema.HardwareStorage
for _, ctl := range doc.Controllers {
for _, d := range ctl.ResponseData.DriveInformation {
if s := storcliDriveToStorage(d); s != nil {
drives = append(drives, *s)
}
}
}
return drives
}
func storcliDriveToStorage(d struct {
EIDSlt string `json:"EID:Slt"`
State string `json:"State"`
Size string `json:"Size"`
Intf string `json:"Intf"`
Med string `json:"Med"`
Model string `json:"Model"`
SN string `json:"SN"`
Sp string `json:"Sp"`
Type string `json:"Type"`
}) *schema.HardwareStorage {
present := true
status := mapRAIDDriveStatus(d.State)
s := schema.HardwareStorage{
Present: &present,
Status: &status,
}
if v := strings.TrimSpace(d.EIDSlt); v != "" {
s.Slot = &v
}
if v := strings.TrimSpace(d.Model); v != "" {
s.Model = &v
}
if v := strings.TrimSpace(d.SN); v != "" {
s.SerialNumber = &v
}
if v := strings.TrimSpace(strings.ToUpper(d.Intf)); v != "" {
s.Interface = &v
}
devType := inferDriveType(strings.TrimSpace(strings.ToUpper(d.Med)), strings.TrimSpace(strings.ToUpper(d.Intf)))
if devType != "" {
s.Type = &devType
}
if gb := parseHumanSizeToGB(d.Size); gb > 0 {
s.SizeGB = &gb
}
// return only meaningful records
if s.Model == nil && s.SerialNumber == nil && s.Slot == nil {
return nil
}
return &s
}
func inferDriveType(med, intf string) string {
switch {
case strings.Contains(med, "SSD"):
return "SSD"
case strings.Contains(intf, "NVME"):
return "NVMe"
case strings.Contains(med, "HDD"):
return "HDD"
case strings.Contains(intf, "SAS") || strings.Contains(intf, "SATA"):
return "HDD"
default:
return "Unknown"
}
}
func mapRAIDDriveStatus(raw string) string {
u := strings.ToUpper(strings.TrimSpace(raw))
switch {
case strings.Contains(u, "OK"), strings.Contains(u, "OPTIMAL"), strings.Contains(u, "READY"):
return "OK"
case strings.Contains(u, "ONLN"), strings.Contains(u, "ONLINE"):
return "OK"
case strings.Contains(u, "RBLD"), strings.Contains(u, "REBUILD"):
return "WARNING"
case strings.Contains(u, "FAIL"), strings.Contains(u, "OFFLINE"):
return "CRITICAL"
default:
return "UNKNOWN"
}
}
func parseHumanSizeToGB(raw string) int {
parts := strings.Fields(strings.TrimSpace(raw))
if len(parts) < 2 {
return 0
}
value, err := strconv.ParseFloat(strings.TrimSpace(parts[0]), 64)
if err != nil {
return 0
}
unit := strings.ToUpper(parts[1])
switch {
case strings.HasPrefix(unit, "TB"):
return int(value * 1000)
case strings.HasPrefix(unit, "GB"):
return int(value)
case strings.HasPrefix(unit, "MB"):
return int(value / 1000)
default:
return 0
}
}
func appendUniqueStorage(base, extra []schema.HardwareStorage) []schema.HardwareStorage {
if len(extra) == 0 {
return base
}
seen := map[string]bool{}
for _, d := range base {
seen[storageIdentityKey(d)] = true
}
for _, d := range extra {
key := storageIdentityKey(d)
if key == "" || seen[key] {
continue
}
base = append(base, d)
seen[key] = true
}
return base
}
func storageIdentityKey(d schema.HardwareStorage) string {
if d.SerialNumber != nil && strings.TrimSpace(*d.SerialNumber) != "" {
return "sn:" + strings.ToLower(strings.TrimSpace(*d.SerialNumber))
}
if d.Model != nil && d.Slot != nil {
return "modelslot:" + strings.ToLower(strings.TrimSpace(*d.Model)) + ":" + strings.ToLower(strings.TrimSpace(*d.Slot))
}
return ""
}
type mdArray struct {
Name string
Degraded bool
Members []string
}
func enrichStorageWithVROC(storage []schema.HardwareStorage, pcie []schema.HardwarePCIeDevice) []schema.HardwareStorage {
if !hasVROCController(pcie) {
return storage
}
raw, err := readMDStat()
if err != nil {
slog.Info("vroc: cannot read /proc/mdstat", "err", err)
return storage
}
arrays := parseMDStatArrays(string(raw))
if len(arrays) == 0 {
slog.Info("vroc: no md arrays found")
return storage
}
serialToArray := map[string]mdArray{}
for _, arr := range arrays {
for _, member := range arr.Members {
serial := queryDeviceSerial("/dev/" + member)
if serial == "" {
continue
}
serialToArray[strings.ToLower(serial)] = arr
}
}
if len(serialToArray) == 0 {
return storage
}
updated := 0
for i := range storage {
if storage[i].SerialNumber == nil || strings.TrimSpace(*storage[i].SerialNumber) == "" {
continue
}
arr, ok := serialToArray[strings.ToLower(strings.TrimSpace(*storage[i].SerialNumber))]
if !ok {
continue
}
if storage[i].Telemetry == nil {
storage[i].Telemetry = map[string]any{}
}
storage[i].Telemetry["vroc_array"] = arr.Name
storage[i].Telemetry["vroc_degraded"] = arr.Degraded
if arr.Degraded {
status := "WARNING"
storage[i].Status = &status
}
updated++
}
slog.Info("vroc: enriched storage members", "count", updated)
return storage
}
func hasVROCController(pcie []schema.HardwarePCIeDevice) bool {
for _, dev := range pcie {
if dev.VendorID == nil || *dev.VendorID != vendorIntel {
continue
}
class := ""
if dev.DeviceClass != nil {
class = strings.ToLower(*dev.DeviceClass)
}
model := ""
if dev.Model != nil {
model = strings.ToLower(*dev.Model)
}
if strings.Contains(class, "raid") ||
strings.Contains(model, "vroc") ||
strings.Contains(model, "volume management device") ||
strings.Contains(model, "vmd") {
return true
}
}
return false
}
var mdHealthPattern = regexp.MustCompile(`\[[U_]+\]`)
func parseMDStatArrays(raw string) []mdArray {
lines := strings.Split(raw, "\n")
var arrays []mdArray
var current *mdArray
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
continue
}
if strings.Contains(line, " : ") && !strings.HasPrefix(strings.TrimLeft(line, " \t"), "[") {
left := strings.TrimSpace(strings.SplitN(line, " : ", 2)[0])
if strings.EqualFold(left, "Personalities") || strings.EqualFold(left, "unused devices") {
continue
}
if current != nil {
arrays = append(arrays, *current)
}
name := left
fields := strings.Fields(strings.SplitN(line, " : ", 2)[1])
arr := mdArray{Name: name}
for _, f := range fields {
if i := strings.IndexByte(f, '['); i > 0 {
member := strings.TrimSpace(f[:i])
if member != "" {
arr.Members = append(arr.Members, member)
}
}
}
current = &arr
continue
}
if current == nil {
continue
}
if m := mdHealthPattern.FindString(trimmed); m != "" && strings.Contains(m, "_") {
current.Degraded = true
}
}
if current != nil {
arrays = append(arrays, *current)
}
return arrays
}
func queryDeviceSerial(devPath string) string {
if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil {
var ctrl nvmeIDCtrl
if json.Unmarshal(out, &ctrl) == nil {
if v := cleanDMIValue(strings.TrimSpace(ctrl.SerialNumber)); v != "" {
return v
}
}
}
if out, err := exec.Command("smartctl", "-j", "-i", devPath).Output(); err == nil {
var info smartctlInfo
if json.Unmarshal(out, &info) == nil {
if v := cleanDMIValue(strings.TrimSpace(info.SerialNumber)); v != "" {
return v
}
}
}
return ""
}

View File

@@ -0,0 +1,96 @@
package collector
import "testing"
func TestParseSASIrcuControllerIDs(t *testing.T) {
raw := `LSI Corporation SAS2 IR Configuration Utility.
Adapter List
==============
0 SAS2008(B2)
1 SAS2308_2(D1)
`
ids := parseSASIrcuControllerIDs(raw)
if len(ids) != 2 || ids[0] != 0 || ids[1] != 1 {
t.Fatalf("unexpected ids: %#v", ids)
}
}
func TestParseSASIrcuDisplay(t *testing.T) {
raw := `Device is a Hard disk
Enclosure # : 32
Slot # : 7
State : Onln
Size (in MB)/(in sectors) : 953869/1953525168
Model Number : ST1000NM0033
Serial No : Z1D12345
Protocol : SAS
Drive Type : HDD
Device is a Enclosure services device
Enclosure # : 32
`
drives := parseSASIrcuDisplay(raw)
if len(drives) != 1 {
t.Fatalf("expected 1 drive, got %d", len(drives))
}
d := drives[0]
if d.Slot == nil || *d.Slot != "32:7" {
t.Fatalf("slot: %v", d.Slot)
}
if d.SerialNumber == nil || *d.SerialNumber != "Z1D12345" {
t.Fatalf("serial: %v", d.SerialNumber)
}
if d.Interface == nil || *d.Interface != "SAS" {
t.Fatalf("interface: %v", d.Interface)
}
if d.Status == nil || *d.Status != "OK" {
t.Fatalf("status: %v", d.Status)
}
}
func TestParseArcconfPhysicalDrives(t *testing.T) {
raw := `Device #0
Reported Location : Channel 0, Device 3
Model : Micron_5300
Serial number : ARC12345
State : Online
Total Size : 894 GB
Transfer Speed : SATA 6.0Gb/s
SSD : Yes
`
drives := parseArcconfPhysicalDrives(raw)
if len(drives) != 1 {
t.Fatalf("expected 1 drive, got %d", len(drives))
}
d := drives[0]
if d.Type == nil || *d.Type != "SSD" {
t.Fatalf("type: %v", d.Type)
}
if d.Interface == nil || *d.Interface != "SATA" {
t.Fatalf("interface: %v", d.Interface)
}
if d.Status == nil || *d.Status != "OK" {
t.Fatalf("status: %v", d.Status)
}
}
func TestParseSSACLIPhysicalDrives(t *testing.T) {
raw := `physicaldrive 1I:1:1 (894 GB, SAS HDD, OK)
Serial Number: SSACLI001
Model: MB8000JVYZQ
physicaldrive 1I:1:2 (894 GB, SAS HDD, Failed)
Serial Number: SSACLI002
Model: MB8000JVYZQ
`
drives := parseSSACLIPhysicalDrives(raw)
if len(drives) != 2 {
t.Fatalf("expected 2 drives, got %d", len(drives))
}
if drives[0].Status == nil || *drives[0].Status != "OK" {
t.Fatalf("drive0 status: %v", drives[0].Status)
}
if drives[1].Status == nil || *drives[1].Status != "CRITICAL" {
t.Fatalf("drive1 status: %v", drives[1].Status)
}
}

View File

@@ -0,0 +1,57 @@
package collector
import (
"bee/audit/internal/schema"
"testing"
)
func TestParseMDStatArrays(t *testing.T) {
raw := `Personalities : [raid1]
md126 : active raid1 nvme0n1[0] nvme1n1[1]
976630464 blocks super external:/md127/0 [2/2] [UU]
md125 : active raid1 nvme2n1[0] nvme3n1[1]
976630464 blocks super external:/md127/1 [2/1] [U_]
`
arrays := parseMDStatArrays(raw)
if len(arrays) != 2 {
t.Fatalf("expected 2 arrays, got %d", len(arrays))
}
if arrays[0].Name != "md126" || arrays[0].Degraded {
t.Fatalf("unexpected array0: %+v", arrays[0])
}
if len(arrays[0].Members) != 2 || arrays[0].Members[0] != "nvme0n1" {
t.Fatalf("unexpected members array0: %+v", arrays[0].Members)
}
if arrays[1].Name != "md125" || !arrays[1].Degraded {
t.Fatalf("unexpected array1: %+v", arrays[1])
}
}
func TestHasVROCController(t *testing.T) {
intel := vendorIntel
model := "Volume Management Device NVMe RAID Controller"
class := "RAID bus controller"
tests := []struct {
name string
pcie []schema.HardwarePCIeDevice
want bool
}{
{
name: "intel vroc",
pcie: []schema.HardwarePCIeDevice{{VendorID: &intel, Model: &model, DeviceClass: &class}},
want: true,
},
{
name: "non-intel raid",
pcie: []schema.HardwarePCIeDevice{{}},
want: false,
},
}
for _, tt := range tests {
got := hasVROCController(tt.pcie)
if got != tt.want {
t.Fatalf("%s: got %v want %v", tt.name, got, tt.want)
}
}
}

97
iso/builder/build.sh Executable file
View File

@@ -0,0 +1,97 @@
#!/bin/sh
# build.sh — production ISO build (unattended mode)
set -e
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
BUILDER_DIR="${REPO_ROOT}/iso/builder"
OVERLAY_DIR="${REPO_ROOT}/iso/overlay"
DIST_DIR="${REPO_ROOT}/dist"
VENDOR_DIR="${REPO_ROOT}/iso/vendor"
. "${BUILDER_DIR}/VERSIONS"
export PATH="$PATH:/usr/local/go/bin"
echo "=== bee production ISO build ==="
echo "Alpine: ${ALPINE_VERSION}, Go: ${GO_VERSION}, NVIDIA: ${NVIDIA_DRIVER_VERSION}"
AUDIT_BIN="${DIST_DIR}/bee-audit-linux-amd64"
mkdir -p "$DIST_DIR"
cd "${REPO_ROOT}/audit"
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
go build \
-ldflags "-s -w -X main.Version=${AUDIT_VERSION}" \
-o "$AUDIT_BIN" \
./cmd/audit
mkdir -p "${OVERLAY_DIR}/usr/local/bin"
cp "$AUDIT_BIN" "${OVERLAY_DIR}/usr/local/bin/audit"
chmod +x "${OVERLAY_DIR}/usr/local/bin/audit"
# Copy optional vendor utilities if already fetched.
for tool in storcli64 sas2ircu sas3ircu mstflint; do
if [ -f "${VENDOR_DIR}/${tool}" ]; then
cp "${VENDOR_DIR}/${tool}" "${OVERLAY_DIR}/usr/local/bin/${tool}"
chmod +x "${OVERLAY_DIR}/usr/local/bin/${tool}" || true
echo "vendor tool: ${tool} (included)"
else
echo "vendor tool: ${tool} (not found, skipped)"
fi
done
# Build and inject NVIDIA proprietary modules + userspace tools.
echo "=== building NVIDIA modules ==="
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}"
KVER="$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | head -1)"
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
mkdir -p "${OVERLAY_DIR}/lib/modules/${KVER}/extra/nvidia"
cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_DIR}/lib/modules/${KVER}/extra/nvidia/"
mkdir -p "${OVERLAY_DIR}/usr/local/bin" "${OVERLAY_DIR}/usr/lib"
cp "${NVIDIA_CACHE}/bin/nvidia-smi" "${OVERLAY_DIR}/usr/local/bin/"
chmod +x "${OVERLAY_DIR}/usr/local/bin/nvidia-smi"
cp "${NVIDIA_CACHE}/lib/"* "${OVERLAY_DIR}/usr/lib/" 2>/dev/null || true
# Embed build metadata used at runtime.
mkdir -p "${OVERLAY_DIR}/etc"
BUILD_DATE="$(date +%Y-%m-%d)"
GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)"
cat > "${OVERLAY_DIR}/etc/bee-release" <<EOF
BEE_ISO_VERSION=${AUDIT_VERSION}
BEE_AUDIT_VERSION=${AUDIT_VERSION}
BUILD_DATE=${BUILD_DATE}
GIT_COMMIT=${GIT_COMMIT}
ALPINE_VERSION=${ALPINE_VERSION}
NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
EOF
mkdir -p "${HOME}/.mkimage"
cp "${BUILDER_DIR}/mkimg.bee.sh" "${HOME}/.mkimage/"
cp "${BUILDER_DIR}/genapkovl-bee.sh" "${HOME}/.mkimage/"
export BEE_OVERLAY_DIR="${OVERLAY_DIR}"
if [ -d /var/tmp/bee-iso-work ]; then
find /var/tmp/bee-iso-work -maxdepth 1 -mindepth 1 \
-not -name 'apks_*' -not -name 'kernel_*' \
-not -name 'syslinux_*' -not -name 'grub_*' \
-exec rm -rf {} + 2>/dev/null || true
fi
export TMPDIR=/var/tmp
cp "${BUILDER_DIR}/genapkovl-bee.sh" /var/tmp/
cd /var/tmp
sh /usr/share/aports/scripts/mkimage.sh \
--tag "v${ALPINE_VERSION}" \
--outdir "${DIST_DIR}" \
--arch x86_64 \
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/community" \
--workdir /var/tmp/bee-iso-work \
--profile bee
ISO="${DIST_DIR}/alpine-bee-${ALPINE_VERSION}-x86_64.iso"
echo "=== done ==="
echo "ISO: $ISO"

82
iso/builder/genapkovl-bee.sh Executable file
View File

@@ -0,0 +1,82 @@
#!/bin/sh -e
HOSTNAME="$1"
[ -n "$HOSTNAME" ] || { echo "usage: $0 hostname"; exit 1; }
OVERLAY="${BEE_OVERLAY_DIR}"
[ -n "$OVERLAY" ] || { echo "ERROR: BEE_OVERLAY_DIR not set"; exit 1; }
cleanup() { rm -rf "$tmp"; }
tmp="$(mktemp -d)"
trap cleanup EXIT
makefile() { OWNER="$1" PERMS="$2" FILENAME="$3"; cat > "$FILENAME"; chown "$OWNER" "$FILENAME"; chmod "$PERMS" "$FILENAME"; }
rc_add() { mkdir -p "$tmp/etc/runlevels/$2"; ln -sf /etc/init.d/"$1" "$tmp/etc/runlevels/$2/$1"; }
mkdir -p "$tmp/etc"
makefile root:root 0644 "$tmp/etc/hostname" <<EOT
$HOSTNAME
EOT
mkdir -p "$tmp/etc/network"
makefile root:root 0644 "$tmp/etc/network/interfaces" <<EOT
auto lo
iface lo inet loopback
EOT
mkdir -p "$tmp/etc/apk"
makefile root:root 0644 "$tmp/etc/apk/world" <<EOT
alpine-base
dmidecode
smartmontools
nvme-cli
pciutils
ipmitool
util-linux
lsblk
e2fsprogs
lshw
openrc
ca-certificates
tzdata
jq
wget
EOT
rc_add devfs sysinit
rc_add dmesg sysinit
rc_add mdev sysinit
rc_add hwdrivers sysinit
rc_add modloop sysinit
rc_add hwclock boot
rc_add modules boot
rc_add sysctl boot
rc_add hostname boot
rc_add bootmisc boot
rc_add syslog boot
rc_add mount-ro shutdown
rc_add killprocs shutdown
rc_add savecache shutdown
rc_add bee-network default
rc_add bee-update default
rc_add bee-nvidia default
rc_add bee-audit default
if [ -d "$OVERLAY/etc" ]; then
cp -r "$OVERLAY/etc/." "$tmp/etc/"
chmod +x "$tmp/etc/init.d/"* 2>/dev/null || true
fi
mkdir -p "$tmp/usr"
if [ -d "$OVERLAY/usr" ]; then
cp -r "$OVERLAY/usr/." "$tmp/usr/"
chmod +x "$tmp/usr/local/bin/"* 2>/dev/null || true
fi
if [ -d "$OVERLAY/lib" ]; then
mkdir -p "$tmp/lib"
cp -r "$OVERLAY/lib/." "$tmp/lib/"
fi
tar -c -C "$tmp" etc usr lib 2>/dev/null | gzip -9n > "$HOSTNAME.apkovl.tar.gz"

View File

@@ -89,6 +89,11 @@ if [ -d "$OVERLAY/root" ]; then
chmod 600 "$tmp/root/.ssh/authorized_keys" 2>/dev/null || true
fi
if [ -d "$OVERLAY/lib" ]; then
mkdir -p "$tmp/lib"
cp -r "$OVERLAY/lib/." "$tmp/lib/"
fi
mkdir -p "$tmp/etc/dropbear" "$tmp/etc/conf.d"
# -R: auto-generate host keys if missing
# no dependency on networking service — bee-network handles DHCP independently
@@ -97,4 +102,4 @@ DROPBEAR_OPTS="-R -B"
EOF
tar -c -C "$tmp" etc usr root 2>/dev/null | gzip -9n > "$HOSTNAME.apkovl.tar.gz"
tar -c -C "$tmp" etc usr root lib 2>/dev/null | gzip -9n > "$HOSTNAME.apkovl.tar.gz"

47
iso/builder/mkimg.bee.sh Executable file
View File

@@ -0,0 +1,47 @@
#!/bin/sh
# Alpine mkimage profile: bee (production)
profile_bee() {
title="Bee Hardware Audit"
desc="Hardware audit LiveCD (production unattended mode)"
arch="x86_64"
hostname="alpine-bee"
apkovl="genapkovl-bee.sh"
image_ext="iso"
output_format="iso"
kernel_flavors="lts"
kernel_addons=""
initfs_cmdline="modules=loop,squashfs,sd-mod,usb-storage modloop=/boot/modloop-lts quiet"
initfs_features="ata base cdrom ext4 mmc nvme raid scsi squashfs usb virtio nfit"
apks="
alpine-base
linux-lts
linux-firmware-none
linux-firmware-rtl_nic
linux-firmware-bnx2
linux-firmware-bnx2x
linux-firmware-tigon
linux-firmware-qlogic
linux-firmware-netronome
linux-firmware-mellanox
linux-firmware-intel
linux-firmware-other
dmidecode
smartmontools
nvme-cli
pciutils
ipmitool
util-linux
lsblk
e2fsprogs
lshw
openrc
ca-certificates
tzdata
jq
wget
"
}

View File

@@ -11,9 +11,6 @@
Logs: /var/log/bee-audit.json /var/log/bee-network.log
Re-run audit: audit --output stdout | less
Restart net: bee-net-restart
Check tools: which dmidecode smartctl nvme ipmitool lspci
Open TUI: bee-tui
SSH access: key auth (developers) or bee/eeb (password fallback)

View File

@@ -1 +1,12 @@
export PATH="$PATH:/usr/local/bin"
# Auto-open TUI on local tty1 after boot.
# Exiting TUI returns to this shell (console prompt).
if [ -z "${BEE_TUI_AUTO_LAUNCHED:-}" ] \
&& [ -z "${SSH_CONNECTION:-}" ] \
&& [ -z "${SSH_TTY:-}" ] \
&& [ "$(tty 2>/dev/null)" = "/dev/tty1" ] \
&& [ -x /usr/local/bin/bee-tui ]; then
export BEE_TUI_AUTO_LAUNCHED=1
/usr/local/bin/bee-tui
fi

View File

@@ -0,0 +1,620 @@
#!/bin/sh
# bee-tui: interactive text menu for debug LiveCD operations.
set -u
pause() {
echo
printf 'Press Enter to continue... '
read -r _
}
header() {
clear
echo "=============================================="
echo " bee TUI (debug)"
echo "=============================================="
echo
}
list_ifaces() {
ip -o link show \
| awk -F': ' '{print $2}' \
| grep -v '^lo$' \
| grep -vE '^(docker|virbr|veth|tun|tap|br-|bond|dummy)' \
| sort
}
show_network_status() {
header
echo "Network interfaces"
echo
for iface in $(list_ifaces); do
state=$(ip -o link show "$iface" | awk '{print $9}')
ipv4=$(ip -o -4 addr show dev "$iface" | awk '{print $4}' | paste -sd ',')
[ -n "$ipv4" ] || ipv4="(no IPv4)"
echo "- $iface: state=$state ip=$ipv4"
done
echo
ip route | sed 's/^/ route: /'
pause
}
choose_interface() {
ifaces="$(list_ifaces)"
if [ -z "$ifaces" ]; then
echo "No physical interfaces found"
return 1
fi
echo "$ifaces" | nl -w2 -s'. '
echo
printf 'Select interface number: '
read -r idx
iface=$(echo "$ifaces" | sed -n "${idx}p")
if [ -z "$iface" ]; then
echo "Invalid interface selection"
return 1
fi
CHOSEN_IFACE="$iface"
return 0
}
network_dhcp_one() {
header
echo "DHCP on one interface"
echo
choose_interface || { pause; return; }
iface="$CHOSEN_IFACE"
echo
echo "Starting DHCP on $iface..."
ip link set "$iface" up 2>/dev/null || true
udhcpc -i "$iface" -t 5 -T 3
pause
}
network_dhcp_all() {
header
echo "Restarting DHCP on all physical interfaces..."
echo
/usr/local/bin/bee-net-restart
pause
}
network_static_one() {
header
echo "Static IPv4 setup"
echo
choose_interface || { pause; return; }
iface="$CHOSEN_IFACE"
echo
printf 'IPv4 address (example 192.168.1.10): '
read -r ip
if [ -z "$ip" ]; then
echo "IP address is required"
pause
return
fi
printf 'Netmask (example 24 or 255.255.255.0): '
read -r mask
if [ -z "$mask" ]; then
echo "Netmask is required"
pause
return
fi
prefix=$(mask_to_prefix "$mask")
if [ -z "$prefix" ]; then
echo "Invalid netmask: $mask"
pause
return
fi
cidr="$ip/$prefix"
printf 'Default gateway: '
read -r gw
if [ -z "$gw" ]; then
echo "Default gateway is required"
pause
return
fi
printf 'DNS server (optional): '
read -r dns
ip link set "$iface" up 2>/dev/null || true
ip addr flush dev "$iface"
if ! ip addr add "$cidr" dev "$iface"; then
echo "Failed to set IP"
pause
return
fi
if [ -n "$gw" ]; then
ip route del default >/dev/null 2>&1 || true
ip route add default via "$gw" dev "$iface"
fi
if [ -n "$dns" ]; then
printf 'nameserver %s\n' "$dns" > /etc/resolv.conf
fi
echo
echo "Static config applied to $iface"
pause
}
mask_to_prefix() {
mask="$(echo "$1" | tr -d '[:space:]')"
case "$mask" in
0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32)
echo "$mask"
return 0
;;
esac
case "$mask" in
255.0.0.0) echo 8 ;;
255.128.0.0) echo 9 ;;
255.192.0.0) echo 10 ;;
255.224.0.0) echo 11 ;;
255.240.0.0) echo 12 ;;
255.248.0.0) echo 13 ;;
255.252.0.0) echo 14 ;;
255.254.0.0) echo 15 ;;
255.255.0.0) echo 16 ;;
255.255.128.0) echo 17 ;;
255.255.192.0) echo 18 ;;
255.255.224.0) echo 19 ;;
255.255.240.0) echo 20 ;;
255.255.248.0) echo 21 ;;
255.255.252.0) echo 22 ;;
255.255.254.0) echo 23 ;;
255.255.255.0) echo 24 ;;
255.255.255.128) echo 25 ;;
255.255.255.192) echo 26 ;;
255.255.255.224) echo 27 ;;
255.255.255.240) echo 28 ;;
255.255.255.248) echo 29 ;;
255.255.255.252) echo 30 ;;
255.255.255.254) echo 31 ;;
255.255.255.255) echo 32 ;;
*) return 1 ;;
esac
}
network_menu() {
while true; do
header
echo "Network"
echo "1. Show network status"
echo "2. DHCP on all interfaces"
echo "3. DHCP on one interface"
echo "4. Set static IPv4 on one interface"
echo "5. Back"
echo
printf 'Choice: '
read -r choice
case "$choice" in
1) show_network_status ;;
2) network_dhcp_all ;;
3) network_dhcp_one ;;
4) network_static_one ;;
5) return ;;
*) echo "Invalid choice"; pause ;;
esac
done
}
bee_services_list() {
for path in /etc/init.d/bee-*; do
[ -e "$path" ] || continue
basename "$path"
done
}
services_status_all() {
header
echo "bee service status"
echo
for svc in $(bee_services_list); do
if rc-service "$svc" status >/dev/null 2>&1; then
echo "- $svc: running"
else
echo "- $svc: stopped"
fi
done
pause
}
choose_service() {
svcs="$(bee_services_list)"
if [ -z "$svcs" ]; then
echo "No bee-* services found"
return 1
fi
echo "$svcs" | nl -w2 -s'. '
echo
printf 'Select service number: '
read -r idx
svc=$(echo "$svcs" | sed -n "${idx}p")
if [ -z "$svc" ]; then
echo "Invalid service selection"
return 1
fi
CHOSEN_SERVICE="$svc"
return 0
}
service_action_menu() {
header
echo "Service action"
echo
choose_service || { pause; return; }
svc="$CHOSEN_SERVICE"
echo
echo "Selected: $svc"
echo "1. status"
echo "2. restart"
echo "3. start"
echo "4. stop"
echo "5. toggle start/stop"
echo
printf 'Choice: '
read -r act
case "$act" in
1)
rc-service "$svc" status || true
;;
2)
rc-service "$svc" restart || true
;;
3)
rc-service "$svc" start || true
;;
4)
rc-service "$svc" stop || true
;;
5)
if rc-service "$svc" status >/dev/null 2>&1; then
rc-service "$svc" stop || true
else
rc-service "$svc" start || true
fi
;;
*)
echo "Invalid action"
;;
esac
pause
}
services_menu() {
while true; do
header
echo "bee Services"
echo "1. Status of all bee-* services"
echo "2. Manage one service (status/restart/start/stop/toggle)"
echo "3. Back"
echo
printf 'Choice: '
read -r choice
case "$choice" in
1) services_status_all ;;
2) service_action_menu ;;
3) return ;;
*) echo "Invalid choice"; pause ;;
esac
done
}
confirm_phrase() {
phrase="$1"
prompt="$2"
echo
printf '%s (%s): ' "$prompt" "$phrase"
read -r value
[ "$value" = "$phrase" ]
}
shutdown_menu() {
while true; do
header
echo "Shutdown/Reboot Tests"
echo "1. Reboot now"
echo "2. Power off now"
echo "3. Schedule poweroff in 60s"
echo "4. Cancel scheduled shutdown"
echo "5. IPMI chassis power status"
echo "6. IPMI chassis power soft"
echo "7. IPMI chassis power cycle"
echo "8. Back"
echo
printf 'Choice: '
read -r choice
case "$choice" in
1)
confirm_phrase "REBOOT" "Type confirmation" || { echo "Canceled"; pause; continue; }
reboot
;;
2)
confirm_phrase "POWEROFF" "Type confirmation" || { echo "Canceled"; pause; continue; }
poweroff
;;
3)
confirm_phrase "SCHEDULE" "Type confirmation" || { echo "Canceled"; pause; continue; }
shutdown -P +1 "bee test: scheduled poweroff in 60 seconds"
echo "Scheduled"
pause
;;
4)
shutdown -c || true
echo "Canceled (if any schedule existed)"
pause
;;
5)
ipmitool chassis power status || echo "ipmitool power status failed"
pause
;;
6)
confirm_phrase "IPMI-SOFT" "Type confirmation" || { echo "Canceled"; pause; continue; }
ipmitool chassis power soft || echo "ipmitool soft power failed"
pause
;;
7)
confirm_phrase "IPMI-CYCLE" "Type confirmation" || { echo "Canceled"; pause; continue; }
ipmitool chassis power cycle || echo "ipmitool power cycle failed"
pause
;;
8)
return
;;
*)
echo "Invalid choice"
pause
;;
esac
done
}
gpu_burn_10m() {
header
echo "GPU Burn (10 minutes)"
echo
if ! command -v gpu_burn >/dev/null 2>&1; then
echo "gpu_burn binary not found in PATH"
echo "Expected command: gpu_burn"
pause
return
fi
if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi -L >/dev/null 2>&1; then
echo "NVIDIA driver/GPU not ready (nvidia-smi failed)"
pause
return
fi
confirm_phrase "GPU-BURN" "Type confirmation to start benchmark" || { echo "Canceled"; pause; return; }
echo "Running: gpu_burn 600"
echo "Log: /var/log/bee-gpuburn.log"
gpu_burn 600 2>&1 | tee /var/log/bee-gpuburn.log
echo
echo "GPU Burn finished"
pause
}
gpu_benchmarks_menu() {
while true; do
header
echo "Benchmarks -> GPU"
echo "1. GPU Burn (10 minutes)"
echo "2. Back"
echo
printf 'Choice: '
read -r choice
case "$choice" in
1) gpu_burn_10m ;;
2) return ;;
*) echo "Invalid choice"; pause ;;
esac
done
}
benchmarks_menu() {
while true; do
header
echo "Benchmarks"
echo "1. GPU"
echo "2. Back"
echo
printf 'Choice: '
read -r choice
case "$choice" in
1) gpu_benchmarks_menu ;;
2) return ;;
*) echo "Invalid choice"; pause ;;
esac
done
}
run_cmd_log() {
label="$1"
cmd="$2"
log_file="$3"
{
echo "=== $label ==="
echo "time: $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
echo "cmd: $cmd"
echo
sh -c "$cmd"
} >"$log_file" 2>&1
return $?
}
run_gpu_nvidia_acceptance_test() {
header
echo "System acceptance tests -> GPU NVIDIA"
echo
confirm_phrase "SAT-GPU" "Type confirmation to start tests" || { echo "Canceled"; pause; return; }
ts="$(date -u '+%Y%m%d-%H%M%S')"
base_dir="/var/log/bee-sat"
run_dir="$base_dir/gpu-nvidia-$ts"
archive="$base_dir/gpu-nvidia-$ts.tar.gz"
mkdir -p "$run_dir"
summary="$run_dir/summary.txt"
: >"$summary"
echo "Running acceptance commands..."
echo "Logs directory: $run_dir"
echo "Archive target: $archive"
echo
c1="nvidia-smi -q"
c2="dmidecode -t baseboard"
c3="dmidecode -t system"
c4="nvidia-bug-report.sh"
run_cmd_log "nvidia_smi_q" "$c1" "$run_dir/01-nvidia-smi-q.log"; rc1=$?
run_cmd_log "dmidecode_baseboard" "$c2" "$run_dir/02-dmidecode-baseboard.log"; rc2=$?
run_cmd_log "dmidecode_system" "$c3" "$run_dir/03-dmidecode-system.log"; rc3=$?
run_cmd_log "nvidia_bug_report" "$c4" "$run_dir/04-nvidia-bug-report.log"; rc4=$?
# Collect any bug report artifact generated in cwd.
bug_report="$(ls -1 nvidia-bug-report.log.gz 2>/dev/null | head -n1 || true)"
if [ -n "$bug_report" ] && [ -f "$bug_report" ]; then
cp -f "$bug_report" "$run_dir/"
fi
{
echo "run_at_utc=$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
echo "cmd_nvidia_smi_q_rc=$rc1"
echo "cmd_dmidecode_baseboard_rc=$rc2"
echo "cmd_dmidecode_system_rc=$rc3"
echo "cmd_nvidia_bug_report_rc=$rc4"
} >>"$summary"
tar -czf "$archive" -C "$base_dir" "gpu-nvidia-$ts"
tar_rc=$?
echo "archive_rc=$tar_rc" >>"$summary"
echo
echo "Done."
echo "- Logs: $run_dir"
echo "- Archive: $archive (rc=$tar_rc)"
pause
}
gpu_nvidia_sat_menu() {
while true; do
header
echo "System acceptance tests -> GPU NVIDIA"
echo "1. Run command pack"
echo "2. Back"
echo
printf 'Choice: '
read -r choice
case "$choice" in
1) run_gpu_nvidia_acceptance_test ;;
2) return ;;
*) echo "Invalid choice"; pause ;;
esac
done
}
system_acceptance_tests_menu() {
while true; do
header
echo "System acceptance tests"
echo "1. GPU NVIDIA"
echo "2. Back"
echo
printf 'Choice: '
read -r choice
case "$choice" in
1) gpu_nvidia_sat_menu ;;
2) return ;;
*) echo "Invalid choice"; pause ;;
esac
done
}
run_audit_now() {
header
echo "Run audit now"
echo
/usr/local/bin/audit --output stdout > /var/log/bee-audit.json 2>/var/log/bee-audit.log
rc=$?
if [ "$rc" -eq 0 ]; then
echo "Audit completed successfully"
else
echo "Audit finished with errors (rc=$rc)"
fi
echo "Logs: /var/log/bee-audit.log, /var/log/bee-audit.json"
pause
}
check_required_tools() {
header
echo "Required tools check"
echo
for tool in dmidecode smartctl nvme ipmitool lspci audit nvidia-smi gpu_burn; do
if command -v "$tool" >/dev/null 2>&1; then
echo "- $tool: OK ($(command -v "$tool"))"
else
echo "- $tool: MISSING"
fi
done
pause
}
main_menu() {
while true; do
header
echo "Main Menu"
echo "1. Network setup"
echo "2. bee service management"
echo "3. Shutdown/reboot tests"
echo "4. Benchmarks"
echo "5. System acceptance tests"
echo "6. Run audit now"
echo "7. Check required tools"
echo "8. Show last audit log tail"
echo "9. Exit to console"
echo
printf 'Choice: '
read -r choice
case "$choice" in
1) network_menu ;;
2) services_menu ;;
3) shutdown_menu ;;
4) benchmarks_menu ;;
5) system_acceptance_tests_menu ;;
6) run_audit_now ;;
7) check_required_tools ;;
8)
header
tail -n 40 /var/log/bee-audit.log 2>/dev/null || echo "No /var/log/bee-audit.log"
echo
tail -n 20 /var/log/bee-audit.json 2>/dev/null || true
pause
;;
9) exit 0 ;;
*) echo "Invalid choice"; pause ;;
esac
done
}
main_menu

View File

@@ -0,0 +1,20 @@
#!/sbin/openrc-run
description="Bee: run hardware audit (production unattended mode)"
depend() {
need localmount
after bee-update bee-nvidia
}
start() {
ebegin "Running hardware audit"
/usr/local/bin/audit --output usb > /var/log/bee-audit.json 2>/var/log/bee-audit.log
rc=$?
if [ "$rc" -eq 0 ]; then
einfo "Audit complete"
else
ewarn "Audit finished with errors"
fi
eend 0
}

View File

@@ -0,0 +1,14 @@
#!/sbin/openrc-run
description="Bee: bring up network interfaces via DHCP"
depend() {
need localmount
before bee-update bee-audit
}
start() {
ebegin "Bringing up network interfaces"
/usr/local/bin/bee-network.sh >> /var/log/bee-network.log 2>&1
eend 0
}

View File

@@ -0,0 +1,23 @@
#!/sbin/openrc-run
description="Bee: load NVIDIA kernel modules"
depend() {
need localmount
before bee-audit
}
start() {
ebegin "Loading NVIDIA modules"
depmod -a 2>/dev/null || true
for mod in nvidia nvidia-modeset nvidia-uvm; do
if modprobe "$mod" 2>/dev/null; then
einfo "loaded: $mod"
else
ewarn "failed to load: $mod"
fi
done
eend 0
}

View File

@@ -0,0 +1,15 @@
#!/sbin/openrc-run
description="Bee: update audit binary from USB/network"
depend() {
need localmount
after bee-network
before bee-audit
}
start() {
ebegin "Checking for audit binary update"
/usr/local/bin/bee-update.sh >> /var/log/bee-update.log 2>&1
eend 0
}

8
iso/overlay/etc/motd Normal file
View File

@@ -0,0 +1,8 @@
Bee Hardware Audit LiveCD
Mode: Production unattended
Logs:
/var/log/bee-network.log
/var/log/bee-update.log
/var/log/bee-audit.log
/var/log/bee-audit.json

View File

@@ -0,0 +1 @@
export PATH="$PATH:/usr/local/bin"

View File

@@ -0,0 +1,24 @@
#!/bin/sh
# bee-network.sh — bring up all physical interfaces via DHCP (non-blocking)
LOG_PREFIX="bee-network"
log() { echo "[$LOG_PREFIX] $*"; }
interfaces=$(ip -o link show \
| awk -F': ' '{print $2}' \
| grep -v '^lo$' \
| grep -vE '^(docker|virbr|veth|tun|tap|br-|bond|dummy)' \
| sort)
if [ -z "$interfaces" ]; then
log "no physical interfaces found"
exit 0
fi
for iface in $interfaces; do
ip link set "$iface" up 2>/dev/null || { log "WARN: failed to bring up $iface"; continue; }
udhcpc -i "$iface" -b -t 0 -T 3 >/dev/null 2>&1 &
log "dhcp started for $iface"
done
log "done"

View File

@@ -0,0 +1,108 @@
#!/bin/sh
# bee-update.sh — production update path: USB first, then network.
# Unattended: logs only, never blocks boot.
set -u
LOG_PREFIX="bee-update"
log() { echo "[$LOG_PREFIX] $*"; }
AUDIT_BIN="/usr/local/bin/audit"
TMP_BIN="/tmp/bee-audit-new"
TMP_SIG="/tmp/bee-audit-new.sig"
REPO_API="${BEE_RELEASE_API:-https://git.mchus.pro/api/v1/repos/<org>/bee/releases/latest}"
version_of() {
"$1" --version 2>/dev/null | head -n1 | tr -d '[:space:]'
}
apply_update() {
src_bin="$1"
src_sig="$2"
src_ver="$3"
if [ ! -x "$src_bin" ] || [ ! -f "$src_sig" ]; then
log "missing binary or signature"
return 1
fi
# NOTE: strict signature verification should be implemented in audit updater module.
# Here we keep shell side minimal and fail-open for now.
cp "$src_bin" "$AUDIT_BIN" || return 1
chmod +x "$AUDIT_BIN" || return 1
log "updated audit binary to $src_ver"
return 0
}
check_usb_update() {
for root in /media/* /mnt/* /tmp/bee-usb /run/media/*/*; do
[ -d "$root" ] || continue
base="$root/bee-update"
bin="$base/bee-audit-linux-amd64"
sig="$base/bee-audit-linux-amd64.sig"
ver_file="$base/VERSION"
[ -f "$bin" ] || continue
[ -f "$sig" ] || continue
[ -f "$ver_file" ] || continue
new_ver=$(cat "$ver_file" 2>/dev/null | tr -d '[:space:]')
cur_ver=$(version_of "$AUDIT_BIN")
[ -n "$new_ver" ] || continue
if [ "$new_ver" = "$cur_ver" ]; then
log "usb update found but version is same ($new_ver)"
return 0
fi
log "usb update candidate: $new_ver"
apply_update "$bin" "$sig" "$new_ver" && return 0
return 1
done
return 1
}
check_network_update() {
if ! ping -c 1 -W 3 git.mchus.pro >/dev/null 2>&1; then
log "network unavailable; skip release check"
return 1
fi
if ! command -v wget >/dev/null 2>&1; then
log "wget not found; skip network update"
return 1
fi
if ! command -v jq >/dev/null 2>&1; then
log "jq not found; skip network update"
return 1
fi
meta="/tmp/bee-release-latest.json"
wget -q -O "$meta" "$REPO_API" || { log "failed to fetch release metadata"; return 1; }
tag=$(jq -r '.tag_name // empty' "$meta")
[ -n "$tag" ] || { log "release metadata missing tag_name"; return 1; }
cur_ver=$(version_of "$AUDIT_BIN")
if [ "$tag" = "$cur_ver" ]; then
log "already latest ($tag)"
return 0
fi
bin_url=$(jq -r '.assets[]? | select(.name=="bee-audit-linux-amd64") | .browser_download_url // empty' "$meta")
sig_url=$(jq -r '.assets[]? | select(.name=="bee-audit-linux-amd64.sig") | .browser_download_url // empty' "$meta")
[ -n "$bin_url" ] && [ -n "$sig_url" ] || { log "missing release asset URLs"; return 1; }
wget -q -O "$TMP_BIN" "$bin_url" || return 1
wget -q -O "$TMP_SIG" "$sig_url" || return 1
chmod +x "$TMP_BIN"
log "network update candidate: $tag"
apply_update "$TMP_BIN" "$TMP_SIG" "$tag"
}
main() {
if check_usb_update; then
exit 0
fi
check_network_update || true
}
main "$@"

0
iso/vendor/.gitkeep vendored Normal file
View File

60
scripts/fetch-vendor.sh Executable file
View File

@@ -0,0 +1,60 @@
#!/bin/sh
# fetch-vendor.sh — download proprietary vendor utilities into iso/vendor.
#
# Usage:
# STORCLI_URL=... STORCLI_SHA256=... \
# SAS2IRCU_URL=... SAS2IRCU_SHA256=... \
# SAS3IRCU_URL=... SAS3IRCU_SHA256=... \
# MSTFLINT_URL=... MSTFLINT_SHA256=... \
# sh scripts/fetch-vendor.sh
set -eu
ROOT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)
OUT_DIR="$ROOT_DIR/iso/vendor"
mkdir -p "$OUT_DIR"
need_cmd() {
command -v "$1" >/dev/null 2>&1 || { echo "ERROR: required command not found: $1" >&2; exit 1; }
}
need_cmd wget
need_cmd sha256sum
fetch_one() {
name="$1"
url="$2"
sha="$3"
if [ -z "$url" ] || [ -z "$sha" ]; then
echo "[vendor] skip $name (URL/SHA not provided)"
return 0
fi
dst="$OUT_DIR/$name"
tmp="$dst.tmp"
echo "[vendor] downloading $name"
wget -O "$tmp" "$url"
got=$(sha256sum "$tmp" | awk '{print $1}')
want=$(echo "$sha" | tr '[:upper:]' '[:lower:]')
if [ "$got" != "$want" ]; then
rm -f "$tmp"
echo "ERROR: checksum mismatch for $name" >&2
echo " got: $got" >&2
echo " want: $want" >&2
exit 1
fi
mv "$tmp" "$dst"
chmod +x "$dst" || true
echo "[vendor] ok: $name"
}
fetch_one "storcli64" "${STORCLI_URL:-}" "${STORCLI_SHA256:-}"
fetch_one "sas2ircu" "${SAS2IRCU_URL:-}" "${SAS2IRCU_SHA256:-}"
fetch_one "sas3ircu" "${SAS3IRCU_URL:-}" "${SAS3IRCU_SHA256:-}"
fetch_one "mstflint" "${MSTFLINT_URL:-}" "${MSTFLINT_SHA256:-}"
echo "[vendor] done. output dir: $OUT_DIR"

81
scripts/test-local.sh Executable file
View File

@@ -0,0 +1,81 @@
#!/bin/sh
# Local integration test for bee audit binary (plan step 1.12).
# Runs audit on current machine and validates required JSON fields.
set -eu
ROOT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)
OUT_FILE="${1:-/tmp/bee-audit-local-$(date +%Y%m%d-%H%M%S).json}"
if [ "$(uname -s)" != "Linux" ]; then
echo "ERROR: scripts/test-local.sh must run on Linux (current: $(uname -s))" >&2
exit 1
fi
if ! command -v go >/dev/null 2>&1; then
echo "ERROR: go not found in PATH" >&2
exit 1
fi
echo "[test-local] running audit -> $OUT_FILE"
(
cd "$ROOT_DIR/audit"
go run ./cmd/audit --output "file:$OUT_FILE"
)
if [ ! -s "$OUT_FILE" ]; then
echo "ERROR: audit output file is missing or empty: $OUT_FILE" >&2
exit 1
fi
python3 - "$OUT_FILE" <<'PY'
import json
import sys
path = sys.argv[1]
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
errors = []
def require_nonempty_string(v, name):
if not isinstance(v, str) or not v.strip():
errors.append(f"{name} must be a non-empty string")
require_nonempty_string(data.get("collected_at"), "collected_at")
require_nonempty_string(data.get("source_type"), "source_type")
require_nonempty_string(data.get("protocol"), "protocol")
hw = data.get("hardware")
if not isinstance(hw, dict):
errors.append("hardware must be an object")
hw = {}
board = hw.get("board")
if not isinstance(board, dict):
errors.append("hardware.board must be an object")
board = {}
require_nonempty_string(board.get("serial_number"), "hardware.board.serial_number")
cpus = hw.get("cpus")
if not isinstance(cpus, list) or len(cpus) == 0:
errors.append("hardware.cpus must be a non-empty array")
if errors:
print("[test-local] validation FAILED")
for e in errors:
print(" -", e)
sys.exit(1)
memory = hw.get("memory") if isinstance(hw.get("memory"), list) else []
storage = hw.get("storage") if isinstance(hw.get("storage"), list) else []
pcie = hw.get("pcie_devices") if isinstance(hw.get("pcie_devices"), list) else []
psu = hw.get("power_supplies") if isinstance(hw.get("power_supplies"), list) else []
print("[test-local] validation OK")
print(f"[test-local] board.serial_number={board.get('serial_number')}")
print(f"[test-local] counts: cpus={len(cpus)} memory={len(memory)} storage={len(storage)} pcie={len(pcie)} psu={len(psu)}")
PY
echo "[test-local] done"