Implement audit enrichments, TUI workflows, and production ISO scaffold
This commit is contained in:
31
PLAN.md
31
PLAN.md
@@ -10,6 +10,37 @@ Fills the gaps where logpile/Redfish is blind: NVMe, DIMM serials, GPU serials,
|
||||
|
||||
---
|
||||
|
||||
## Status snapshot (2026-03-06)
|
||||
|
||||
### Phase 1 — Go Audit Binary
|
||||
|
||||
- 1.1 Project scaffold — **DONE**
|
||||
- 1.2 Board collector — **DONE**
|
||||
- 1.3 CPU collector — **DONE**
|
||||
- 1.4 Memory collector — **DONE**
|
||||
- 1.5 Storage collector — **DONE**
|
||||
- 1.6 PCIe collector — **DONE** (with noise filtering for system/chipset devices)
|
||||
- 1.7 PSU collector — **DONE (basic FRU path)**
|
||||
- 1.8 NVIDIA GPU enrichment — **DONE**
|
||||
- 1.8b Component wear / age telemetry — **DONE** (storage + NVMe + NVIDIA + NIC SFP/DOM + NIC packet stats)
|
||||
- 1.9 Mellanox/NVIDIA NIC enrichment — **DONE** (mstflint + ethtool firmware fallback)
|
||||
- 1.10 RAID controller enrichment — **DONE (initial multi-tool support)** (storcli + sas2/3ircu + arcconf + ssacli + VROC/mdstat)
|
||||
- 1.11 Output and USB write — **DONE** (usb + /tmp fallback)
|
||||
- 1.12 Integration test (local) — **DONE** (`scripts/test-local.sh`)
|
||||
|
||||
### Phase 2 — Alpine LiveCD
|
||||
|
||||
- Debug ISO track is active (builder + overlay-debug + OpenRC services + TUI workflow).
|
||||
- Production ISO track — **IN PROGRESS**.
|
||||
- 2.3 Alpine mkimage profile — **DONE (production profile scaffold)**
|
||||
- 2.4 Network bring-up on boot — **DONE**
|
||||
- 2.5 OpenRC boot service (bee-audit) — **DONE** (with explicit bee-nvidia ordering)
|
||||
- 2.6 Vendor utilities in overlay — **DONE (fetch script + iso/vendor scaffold)**
|
||||
- 2.7 Auto-update wiring (USB first, network second) — **PARTIAL** (shell flow done; strict Ed25519 verification intentionally deferred to final stage)
|
||||
- 2.8 Release workflow — **PARTIAL** (production build now injects audit binary, NVIDIA modules/tools, vendor tools, and build metadata)
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Go Audit Binary
|
||||
|
||||
Self-contained static binary. Runs on any Linux (including Alpine LiveCD).
|
||||
|
||||
@@ -6,7 +6,11 @@ import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"bee/audit/internal/collector"
|
||||
)
|
||||
@@ -71,8 +75,93 @@ func writeOutput(dest string, data []byte) error {
|
||||
// writeToUSB auto-detects the first removable block device, mounts it,
|
||||
// and writes the audit JSON. Falls back to /tmp on any failure.
|
||||
func writeToUSB(data []byte) error {
|
||||
// implemented in step 1.11
|
||||
slog.Warn("usb output not yet implemented, falling back to stdout")
|
||||
_, err := os.Stdout.Write(append(data, '\n'))
|
||||
return err
|
||||
boardSerial := extractBoardSerial(data)
|
||||
filename := auditFilename(boardSerial, time.Now().UTC())
|
||||
|
||||
device, err := firstRemovableDevice()
|
||||
if err != nil {
|
||||
slog.Warn("usb output: no removable device, writing to /tmp", "err", err)
|
||||
return writeAuditToPath(filepath.Join("/tmp", filename), data)
|
||||
}
|
||||
|
||||
mountpoint := "/tmp/bee-usb"
|
||||
if err := os.MkdirAll(mountpoint, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := exec.Command("mount", device, mountpoint).Run(); err != nil {
|
||||
slog.Warn("usb output: mount failed, writing to /tmp", "device", device, "err", err)
|
||||
return writeAuditToPath(filepath.Join("/tmp", filename), data)
|
||||
}
|
||||
defer func() {
|
||||
if err := exec.Command("umount", mountpoint).Run(); err != nil {
|
||||
slog.Warn("usb output: umount failed", "mountpoint", mountpoint, "err", err)
|
||||
}
|
||||
}()
|
||||
|
||||
path := filepath.Join(mountpoint, filename)
|
||||
if err := writeAuditToPath(path, data); err != nil {
|
||||
slog.Warn("usb output: write failed, falling back to /tmp", "path", path, "err", err)
|
||||
return writeAuditToPath(filepath.Join("/tmp", filename), data)
|
||||
}
|
||||
|
||||
slog.Info("usb output: written", "path", path)
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeAuditToPath(path string, data []byte) error {
|
||||
if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
|
||||
return err
|
||||
}
|
||||
slog.Info("audit output written", "path", path)
|
||||
return nil
|
||||
}
|
||||
|
||||
func extractBoardSerial(data []byte) string {
|
||||
var doc struct {
|
||||
Hardware struct {
|
||||
Board struct {
|
||||
SerialNumber string `json:"serial_number"`
|
||||
} `json:"board"`
|
||||
} `json:"hardware"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &doc); err != nil {
|
||||
return "unknown"
|
||||
}
|
||||
serial := strings.TrimSpace(doc.Hardware.Board.SerialNumber)
|
||||
if serial == "" {
|
||||
return "unknown"
|
||||
}
|
||||
return serial
|
||||
}
|
||||
|
||||
func auditFilename(boardSerial string, now time.Time) string {
|
||||
boardSerial = strings.TrimSpace(boardSerial)
|
||||
if boardSerial == "" {
|
||||
boardSerial = "unknown"
|
||||
}
|
||||
return fmt.Sprintf("audit-%s-%s.json", boardSerial, now.Format("20060102-150405"))
|
||||
}
|
||||
|
||||
func firstRemovableDevice() (string, error) {
|
||||
entries, err := os.ReadDir("/sys/block")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
sort.Slice(entries, func(i, j int) bool { return entries[i].Name() < entries[j].Name() })
|
||||
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if strings.HasPrefix(name, "loop") || strings.HasPrefix(name, "ram") {
|
||||
continue
|
||||
}
|
||||
removableFlag, err := os.ReadFile(filepath.Join("/sys/block", name, "removable"))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(string(removableFlag)) == "1" {
|
||||
return filepath.Join("/dev", name), nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("no removable block device found")
|
||||
}
|
||||
|
||||
@@ -28,6 +28,11 @@ func Run() schema.HardwareIngestRequest {
|
||||
snap.Memory = collectMemory()
|
||||
snap.Storage = collectStorage()
|
||||
snap.PCIeDevices = collectPCIe()
|
||||
snap.PCIeDevices = enrichPCIeWithNVIDIA(snap.PCIeDevices, snap.Board.SerialNumber)
|
||||
snap.PCIeDevices = enrichPCIeWithMellanox(snap.PCIeDevices)
|
||||
snap.PCIeDevices = enrichPCIeWithNICTelemetry(snap.PCIeDevices)
|
||||
snap.Storage = enrichStorageWithVROC(snap.Storage, snap.PCIeDevices)
|
||||
snap.Storage = appendUniqueStorage(snap.Storage, collectRAIDStorage(snap.PCIeDevices))
|
||||
snap.PowerSupplies = collectPSUs()
|
||||
|
||||
// remaining collectors added in steps 1.8 – 1.10
|
||||
|
||||
164
audit/internal/collector/nic_mellanox.go
Normal file
164
audit/internal/collector/nic_mellanox.go
Normal file
@@ -0,0 +1,164 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bee/audit/internal/schema"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const mellanoxVendorID = 0x15b3
|
||||
|
||||
var (
|
||||
mstflintQuery = func(bdf string) (string, error) {
|
||||
out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
ethtoolInfoQuery = func(iface string) (string, error) {
|
||||
out, err := exec.Command("ethtool", "-i", iface).Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
netIfacesByBDF = listNetIfacesByBDF
|
||||
)
|
||||
|
||||
// enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
|
||||
// firmware/serial information from mstflint, with ethtool fallback for firmware.
|
||||
func enrichPCIeWithMellanox(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
|
||||
enriched := 0
|
||||
for i := range devs {
|
||||
if !isMellanoxDevice(devs[i]) {
|
||||
continue
|
||||
}
|
||||
|
||||
bdf := ""
|
||||
if devs[i].BDF != nil {
|
||||
bdf = normalizePCIeBDF(*devs[i].BDF)
|
||||
}
|
||||
if bdf == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
fw, serial := queryMellanoxFromMstflint(bdf)
|
||||
if fw == "" {
|
||||
fw = queryFirmwareFromEthtool(bdf)
|
||||
}
|
||||
|
||||
if fw != "" {
|
||||
devs[i].Firmware = &fw
|
||||
}
|
||||
if serial != "" {
|
||||
devs[i].SerialNumber = &serial
|
||||
}
|
||||
if fw != "" || serial != "" {
|
||||
enriched++
|
||||
}
|
||||
}
|
||||
|
||||
slog.Info("mellanox: enriched", "count", enriched)
|
||||
return devs
|
||||
}
|
||||
|
||||
func isMellanoxDevice(dev schema.HardwarePCIeDevice) bool {
|
||||
if dev.VendorID != nil && *dev.VendorID == mellanoxVendorID {
|
||||
return true
|
||||
}
|
||||
if dev.Manufacturer != nil {
|
||||
m := strings.ToLower(*dev.Manufacturer)
|
||||
if strings.Contains(m, "mellanox") || strings.Contains(m, "nvidia networking") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func queryMellanoxFromMstflint(bdf string) (firmware, serial string) {
|
||||
out, err := mstflintQuery(bdf)
|
||||
if err != nil {
|
||||
return "", ""
|
||||
}
|
||||
return parseMstflintQuery(out)
|
||||
}
|
||||
|
||||
func parseMstflintQuery(raw string) (firmware, serial string) {
|
||||
for _, line := range strings.Split(raw, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
idx := strings.Index(line, ":")
|
||||
if idx < 0 {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(strings.TrimSpace(line[:idx]))
|
||||
val := strings.TrimSpace(line[idx+1:])
|
||||
switch key {
|
||||
case "fw version":
|
||||
if val != "" {
|
||||
firmware = val
|
||||
}
|
||||
case "board serial number":
|
||||
if val != "" {
|
||||
serial = val
|
||||
}
|
||||
}
|
||||
}
|
||||
return firmware, serial
|
||||
}
|
||||
|
||||
func queryFirmwareFromEthtool(bdf string) string {
|
||||
for _, iface := range netIfacesByBDF(bdf) {
|
||||
out, err := ethtoolInfoQuery(iface)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if fw := parseEthtoolFirmwareInfo(out); fw != "" {
|
||||
return fw
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func parseEthtoolFirmwareInfo(raw string) string {
|
||||
for _, line := range strings.Split(raw, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
idx := strings.Index(line, ":")
|
||||
if idx < 0 {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(strings.TrimSpace(line[:idx]))
|
||||
val := strings.TrimSpace(line[idx+1:])
|
||||
if key == "firmware-version" && val != "" {
|
||||
return val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func listNetIfacesByBDF(bdf string) []string {
|
||||
path := filepath.Join("/sys/bus/pci/devices", bdf, "net")
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
ifaces := make([]string, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.Name() == "" {
|
||||
continue
|
||||
}
|
||||
ifaces = append(ifaces, e.Name())
|
||||
}
|
||||
return ifaces
|
||||
}
|
||||
118
audit/internal/collector/nic_mellanox_test.go
Normal file
118
audit/internal/collector/nic_mellanox_test.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bee/audit/internal/schema"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseMstflintQuery(t *testing.T) {
|
||||
raw := `Device #1:
|
||||
----------
|
||||
FW Version: 28.39.1002
|
||||
Board Serial Number: MT1234ABC
|
||||
`
|
||||
fw, serial := parseMstflintQuery(raw)
|
||||
if fw != "28.39.1002" {
|
||||
t.Fatalf("firmware: got %q", fw)
|
||||
}
|
||||
if serial != "MT1234ABC" {
|
||||
t.Fatalf("serial: got %q", serial)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEthtoolFirmwareInfo(t *testing.T) {
|
||||
raw := `driver: mlx5_core
|
||||
version: 6.6.31-0-lts
|
||||
firmware-version: 28.39.1002 (MT_0000000000)
|
||||
bus-info: 0000:18:00.0
|
||||
`
|
||||
fw := parseEthtoolFirmwareInfo(raw)
|
||||
if fw != "28.39.1002 (MT_0000000000)" {
|
||||
t.Fatalf("firmware: got %q", fw)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnrichPCIeWithMellanox_mstflint(t *testing.T) {
|
||||
origMst := mstflintQuery
|
||||
origEth := ethtoolInfoQuery
|
||||
origIfaces := netIfacesByBDF
|
||||
t.Cleanup(func() {
|
||||
mstflintQuery = origMst
|
||||
ethtoolInfoQuery = origEth
|
||||
netIfacesByBDF = origIfaces
|
||||
})
|
||||
|
||||
mstflintQuery = func(bdf string) (string, error) {
|
||||
if bdf != "0000:18:00.0" {
|
||||
t.Fatalf("unexpected bdf: %s", bdf)
|
||||
}
|
||||
return "FW Version: 28.39.1002\nBoard Serial Number: SN-MST-001\n", nil
|
||||
}
|
||||
ethtoolInfoQuery = func(string) (string, error) {
|
||||
t.Fatal("ethtool should not be called when mstflint succeeds")
|
||||
return "", nil
|
||||
}
|
||||
netIfacesByBDF = func(string) []string { return nil }
|
||||
|
||||
vendorID := mellanoxVendorID
|
||||
bdf := "0000:18:00.0"
|
||||
manufacturer := "Mellanox Technologies"
|
||||
devs := []schema.HardwarePCIeDevice{{
|
||||
VendorID: &vendorID,
|
||||
BDF: &bdf,
|
||||
Manufacturer: &manufacturer,
|
||||
}}
|
||||
|
||||
out := enrichPCIeWithMellanox(devs)
|
||||
if out[0].Firmware == nil || *out[0].Firmware != "28.39.1002" {
|
||||
t.Fatalf("firmware: got %v", out[0].Firmware)
|
||||
}
|
||||
if out[0].SerialNumber == nil || *out[0].SerialNumber != "SN-MST-001" {
|
||||
t.Fatalf("serial: got %v", out[0].SerialNumber)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnrichPCIeWithMellanox_fallbackEthtool(t *testing.T) {
|
||||
origMst := mstflintQuery
|
||||
origEth := ethtoolInfoQuery
|
||||
origIfaces := netIfacesByBDF
|
||||
t.Cleanup(func() {
|
||||
mstflintQuery = origMst
|
||||
ethtoolInfoQuery = origEth
|
||||
netIfacesByBDF = origIfaces
|
||||
})
|
||||
|
||||
mstflintQuery = func(string) (string, error) {
|
||||
return "", fmt.Errorf("mstflint not found")
|
||||
}
|
||||
netIfacesByBDF = func(bdf string) []string {
|
||||
if bdf != "0000:18:00.0" {
|
||||
t.Fatalf("unexpected bdf: %s", bdf)
|
||||
}
|
||||
return []string{"eth0"}
|
||||
}
|
||||
ethtoolInfoQuery = func(iface string) (string, error) {
|
||||
if iface != "eth0" {
|
||||
t.Fatalf("unexpected iface: %s", iface)
|
||||
}
|
||||
return "driver: mlx5_core\nfirmware-version: 28.40.1000\n", nil
|
||||
}
|
||||
|
||||
vendorID := mellanoxVendorID
|
||||
bdf := "0000:18:00.0"
|
||||
manufacturer := "NVIDIA Networking"
|
||||
devs := []schema.HardwarePCIeDevice{{
|
||||
VendorID: &vendorID,
|
||||
BDF: &bdf,
|
||||
Manufacturer: &manufacturer,
|
||||
}}
|
||||
|
||||
out := enrichPCIeWithMellanox(devs)
|
||||
if out[0].Firmware == nil || *out[0].Firmware != "28.40.1000" {
|
||||
t.Fatalf("firmware: got %v", out[0].Firmware)
|
||||
}
|
||||
if out[0].SerialNumber != nil {
|
||||
t.Fatalf("serial should stay nil without mstflint, got %v", out[0].SerialNumber)
|
||||
}
|
||||
}
|
||||
172
audit/internal/collector/nic_telemetry.go
Normal file
172
audit/internal/collector/nic_telemetry.go
Normal file
@@ -0,0 +1,172 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bee/audit/internal/schema"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
ethtoolModuleQuery = func(iface string) (string, error) {
|
||||
out, err := raidToolQuery("ethtool", "-m", iface)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
readNetStatFile = func(iface, key string) (int64, error) {
|
||||
path := filepath.Join("/sys/class/net", iface, "statistics", key)
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
v, err := strconv.ParseInt(strings.TrimSpace(string(raw)), 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
)
|
||||
|
||||
func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
|
||||
enriched := 0
|
||||
for i := range devs {
|
||||
if !isNICDevice(devs[i]) || devs[i].BDF == nil {
|
||||
continue
|
||||
}
|
||||
bdf := normalizePCIeBDF(*devs[i].BDF)
|
||||
if bdf == "" {
|
||||
continue
|
||||
}
|
||||
ifaces := netIfacesByBDF(bdf)
|
||||
if len(ifaces) == 0 {
|
||||
continue
|
||||
}
|
||||
iface := ifaces[0]
|
||||
|
||||
if devs[i].Firmware == nil {
|
||||
if out, err := ethtoolInfoQuery(iface); err == nil {
|
||||
if fw := parseEthtoolFirmwareInfo(out); fw != "" {
|
||||
devs[i].Firmware = &fw
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if devs[i].Telemetry == nil {
|
||||
devs[i].Telemetry = map[string]any{}
|
||||
}
|
||||
injectNICPacketStats(devs[i].Telemetry, iface)
|
||||
if out, err := ethtoolModuleQuery(iface); err == nil {
|
||||
injectSFPDOMTelemetry(devs[i].Telemetry, out)
|
||||
}
|
||||
if len(devs[i].Telemetry) == 0 {
|
||||
devs[i].Telemetry = nil
|
||||
} else {
|
||||
enriched++
|
||||
}
|
||||
}
|
||||
slog.Info("nic: telemetry enriched", "count", enriched)
|
||||
return devs
|
||||
}
|
||||
|
||||
func isNICDevice(dev schema.HardwarePCIeDevice) bool {
|
||||
if dev.DeviceClass == nil {
|
||||
return false
|
||||
}
|
||||
c := strings.ToLower(strings.TrimSpace(*dev.DeviceClass))
|
||||
return strings.Contains(c, "ethernet controller") ||
|
||||
strings.Contains(c, "network controller") ||
|
||||
strings.Contains(c, "infiniband controller")
|
||||
}
|
||||
|
||||
func injectNICPacketStats(dst map[string]any, iface string) {
|
||||
for _, key := range []string{"rx_packets", "tx_packets", "rx_errors", "tx_errors"} {
|
||||
if v, err := readNetStatFile(iface, key); err == nil {
|
||||
dst[key] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func injectSFPDOMTelemetry(dst map[string]any, raw string) {
|
||||
parsed := parseSFPDOM(raw)
|
||||
for k, v := range parsed {
|
||||
dst[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
var floatRe = regexp.MustCompile(`[-+]?[0-9]*\.?[0-9]+`)
|
||||
|
||||
func parseSFPDOM(raw string) map[string]any {
|
||||
out := map[string]any{}
|
||||
for _, line := range strings.Split(raw, "\n") {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
idx := strings.Index(trimmed, ":")
|
||||
if idx < 0 {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
|
||||
val := strings.TrimSpace(trimmed[idx+1:])
|
||||
|
||||
switch {
|
||||
case strings.Contains(key, "module temperature"):
|
||||
if f, ok := firstFloat(val); ok {
|
||||
out["sfp_temperature_c"] = f
|
||||
}
|
||||
case strings.Contains(key, "laser output power"):
|
||||
if f, ok := dbmValue(val); ok {
|
||||
out["sfp_tx_power_dbm"] = f
|
||||
}
|
||||
case strings.Contains(key, "receiver signal"):
|
||||
if f, ok := dbmValue(val); ok {
|
||||
out["sfp_rx_power_dbm"] = f
|
||||
}
|
||||
case strings.Contains(key, "module voltage"):
|
||||
if f, ok := firstFloat(val); ok {
|
||||
out["sfp_voltage_v"] = f
|
||||
}
|
||||
case strings.Contains(key, "laser bias current"):
|
||||
if f, ok := firstFloat(val); ok {
|
||||
out["sfp_bias_ma"] = f
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func firstFloat(raw string) (float64, bool) {
|
||||
m := floatRe.FindString(raw)
|
||||
if m == "" {
|
||||
return 0, false
|
||||
}
|
||||
v, err := strconv.ParseFloat(m, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
|
||||
func dbmValue(raw string) (float64, bool) {
|
||||
parts := strings.Split(strings.ToLower(raw), "dbm")
|
||||
if len(parts) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
for i := len(parts) - 1; i >= 0; i-- {
|
||||
candidate := parts[i]
|
||||
matches := floatRe.FindAllString(candidate, -1)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
v, err := strconv.ParseFloat(matches[len(matches)-1], 64)
|
||||
if err == nil {
|
||||
return v, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
51
audit/internal/collector/nic_telemetry_test.go
Normal file
51
audit/internal/collector/nic_telemetry_test.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package collector
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseSFPDOM(t *testing.T) {
|
||||
raw := `
|
||||
Module temperature : 41.23 C
|
||||
Module voltage : 3.30 V
|
||||
Laser bias current : 6.12 mA
|
||||
Laser output power : 0.4712 mW / -3.27 dBm
|
||||
Receiver signal average optical power : 0.4123 mW / -3.85 dBm
|
||||
`
|
||||
got := parseSFPDOM(raw)
|
||||
|
||||
if v, ok := got["sfp_temperature_c"].(float64); !ok || v != 41.23 {
|
||||
t.Fatalf("sfp_temperature_c mismatch: %#v", got["sfp_temperature_c"])
|
||||
}
|
||||
if v, ok := got["sfp_voltage_v"].(float64); !ok || v != 3.30 {
|
||||
t.Fatalf("sfp_voltage_v mismatch: %#v", got["sfp_voltage_v"])
|
||||
}
|
||||
if v, ok := got["sfp_bias_ma"].(float64); !ok || v != 6.12 {
|
||||
t.Fatalf("sfp_bias_ma mismatch: %#v", got["sfp_bias_ma"])
|
||||
}
|
||||
if v, ok := got["sfp_tx_power_dbm"].(float64); !ok || v != -3.27 {
|
||||
t.Fatalf("sfp_tx_power_dbm mismatch: %#v", got["sfp_tx_power_dbm"])
|
||||
}
|
||||
if v, ok := got["sfp_rx_power_dbm"].(float64); !ok || v != -3.85 {
|
||||
t.Fatalf("sfp_rx_power_dbm mismatch: %#v", got["sfp_rx_power_dbm"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestDBMValue(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want float64
|
||||
ok bool
|
||||
}{
|
||||
{"0.4123 mW / -3.85 dBm", -3.85, true},
|
||||
{"-1.23 dBm", -1.23, true},
|
||||
{"not supported", 0, false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got, ok := dbmValue(tt.in)
|
||||
if ok != tt.ok {
|
||||
t.Fatalf("dbmValue(%q) ok=%v want %v", tt.in, ok, tt.ok)
|
||||
}
|
||||
if ok && got != tt.want {
|
||||
t.Fatalf("dbmValue(%q)=%v want %v", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
245
audit/internal/collector/nvidia.go
Normal file
245
audit/internal/collector/nvidia.go
Normal file
@@ -0,0 +1,245 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bee/audit/internal/schema"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const nvidiaVendorID = 0x10de
|
||||
|
||||
type nvidiaGPUInfo struct {
|
||||
BDF string
|
||||
Serial string
|
||||
VBIOS string
|
||||
TemperatureC *float64
|
||||
PowerW *float64
|
||||
ECCUncorrected *int64
|
||||
ECCCorrected *int64
|
||||
HWSlowdown *bool
|
||||
}
|
||||
|
||||
// enrichPCIeWithNVIDIA enriches NVIDIA PCIe devices with data from nvidia-smi.
|
||||
// If the driver/tool is unavailable, NVIDIA devices get UNKNOWN status and
|
||||
// a stable serial fallback based on board serial + slot.
|
||||
func enrichPCIeWithNVIDIA(devs []schema.HardwarePCIeDevice, boardSerial string) []schema.HardwarePCIeDevice {
|
||||
gpuByBDF, err := queryNVIDIAGPUs()
|
||||
if err != nil {
|
||||
slog.Info("nvidia: enrichment skipped", "err", err)
|
||||
return enrichPCIeWithNVIDIAData(devs, nil, boardSerial, false)
|
||||
}
|
||||
return enrichPCIeWithNVIDIAData(devs, gpuByBDF, boardSerial, true)
|
||||
}
|
||||
|
||||
func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[string]nvidiaGPUInfo, boardSerial string, driverLoaded bool) []schema.HardwarePCIeDevice {
|
||||
enriched := 0
|
||||
for i := range devs {
|
||||
if !isNVIDIADevice(devs[i]) {
|
||||
continue
|
||||
}
|
||||
|
||||
if !driverLoaded {
|
||||
setPCIeFallback(&devs[i], boardSerial)
|
||||
continue
|
||||
}
|
||||
|
||||
bdf := ""
|
||||
if devs[i].BDF != nil {
|
||||
bdf = normalizePCIeBDF(*devs[i].BDF)
|
||||
}
|
||||
info, ok := gpuByBDF[bdf]
|
||||
if !ok {
|
||||
setPCIeFallback(&devs[i], boardSerial)
|
||||
continue
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(info.Serial); v != "" {
|
||||
devs[i].SerialNumber = &v
|
||||
} else {
|
||||
setPCIeFallbackSerial(&devs[i], boardSerial)
|
||||
}
|
||||
if v := strings.TrimSpace(info.VBIOS); v != "" {
|
||||
devs[i].Firmware = &v
|
||||
}
|
||||
|
||||
status := "OK"
|
||||
if info.ECCUncorrected != nil && *info.ECCUncorrected > 0 {
|
||||
status = "WARNING"
|
||||
}
|
||||
devs[i].Status = &status
|
||||
injectNVIDIATelemetry(&devs[i], info)
|
||||
enriched++
|
||||
}
|
||||
|
||||
if driverLoaded {
|
||||
slog.Info("nvidia: enriched", "count", enriched)
|
||||
}
|
||||
return devs
|
||||
}
|
||||
|
||||
func queryNVIDIAGPUs() (map[string]nvidiaGPUInfo, error) {
|
||||
out, err := exec.Command(
|
||||
"nvidia-smi",
|
||||
"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown",
|
||||
"--format=csv,noheader,nounits",
|
||||
).Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseNVIDIASMIQuery(string(out))
|
||||
}
|
||||
|
||||
func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
|
||||
r := csv.NewReader(strings.NewReader(raw))
|
||||
r.TrimLeadingSpace = true
|
||||
r.FieldsPerRecord = -1
|
||||
|
||||
records, err := r.ReadAll()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result := make(map[string]nvidiaGPUInfo)
|
||||
for _, rec := range records {
|
||||
if len(rec) == 0 {
|
||||
continue
|
||||
}
|
||||
if len(rec) < 9 {
|
||||
return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 9", len(rec))
|
||||
}
|
||||
|
||||
bdf := normalizePCIeBDF(rec[1])
|
||||
if bdf == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
info := nvidiaGPUInfo{
|
||||
BDF: bdf,
|
||||
Serial: strings.TrimSpace(rec[2]),
|
||||
VBIOS: strings.TrimSpace(rec[3]),
|
||||
TemperatureC: parseMaybeFloat(rec[4]),
|
||||
PowerW: parseMaybeFloat(rec[5]),
|
||||
ECCUncorrected: parseMaybeInt64(rec[6]),
|
||||
ECCCorrected: parseMaybeInt64(rec[7]),
|
||||
HWSlowdown: parseMaybeBool(rec[8]),
|
||||
}
|
||||
result[bdf] = info
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func parseMaybeFloat(v string) *float64 {
|
||||
v = strings.TrimSpace(v)
|
||||
if v == "" || strings.EqualFold(v, "n/a") || strings.EqualFold(v, "not supported") || strings.EqualFold(v, "[not supported]") {
|
||||
return nil
|
||||
}
|
||||
n, err := strconv.ParseFloat(v, 64)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return &n
|
||||
}
|
||||
|
||||
func parseMaybeInt64(v string) *int64 {
|
||||
v = strings.TrimSpace(v)
|
||||
if v == "" || strings.EqualFold(v, "n/a") || strings.EqualFold(v, "not supported") || strings.EqualFold(v, "[not supported]") {
|
||||
return nil
|
||||
}
|
||||
n, err := strconv.ParseInt(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return &n
|
||||
}
|
||||
|
||||
func parseMaybeBool(v string) *bool {
|
||||
v = strings.TrimSpace(strings.ToLower(v))
|
||||
switch v {
|
||||
case "active", "enabled", "true", "1":
|
||||
b := true
|
||||
return &b
|
||||
case "not active", "disabled", "false", "0":
|
||||
b := false
|
||||
return &b
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func normalizePCIeBDF(bdf string) string {
|
||||
bdf = strings.TrimSpace(strings.ToLower(bdf))
|
||||
if bdf == "" {
|
||||
return ""
|
||||
}
|
||||
parts := strings.Split(bdf, ":")
|
||||
if len(parts) == 3 {
|
||||
domain := parts[0]
|
||||
if len(domain) > 4 {
|
||||
domain = domain[len(domain)-4:]
|
||||
}
|
||||
return domain + ":" + parts[1] + ":" + parts[2]
|
||||
}
|
||||
if len(parts) == 2 {
|
||||
return "0000:" + parts[0] + ":" + parts[1]
|
||||
}
|
||||
return bdf
|
||||
}
|
||||
|
||||
func isNVIDIADevice(dev schema.HardwarePCIeDevice) bool {
|
||||
if dev.VendorID != nil && *dev.VendorID == nvidiaVendorID {
|
||||
return true
|
||||
}
|
||||
if dev.Manufacturer != nil && strings.Contains(strings.ToLower(*dev.Manufacturer), "nvidia") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func setPCIeFallback(dev *schema.HardwarePCIeDevice, boardSerial string) {
|
||||
setPCIeFallbackSerial(dev, boardSerial)
|
||||
status := "UNKNOWN"
|
||||
dev.Status = &status
|
||||
}
|
||||
|
||||
func setPCIeFallbackSerial(dev *schema.HardwarePCIeDevice, boardSerial string) {
|
||||
if strings.TrimSpace(boardSerial) == "" || dev.SerialNumber != nil {
|
||||
return
|
||||
}
|
||||
slot := "unknown"
|
||||
if dev.BDF != nil && strings.TrimSpace(*dev.BDF) != "" {
|
||||
slot = strings.TrimSpace(*dev.BDF)
|
||||
} else if dev.Slot != nil && strings.TrimSpace(*dev.Slot) != "" {
|
||||
slot = strings.TrimSpace(*dev.Slot)
|
||||
}
|
||||
fb := fmt.Sprintf("%s-PCIE-%s", boardSerial, slot)
|
||||
dev.SerialNumber = &fb
|
||||
}
|
||||
|
||||
func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
|
||||
if dev.Telemetry == nil {
|
||||
dev.Telemetry = map[string]any{}
|
||||
}
|
||||
if info.TemperatureC != nil {
|
||||
dev.Telemetry["temperature_c"] = *info.TemperatureC
|
||||
}
|
||||
if info.PowerW != nil {
|
||||
dev.Telemetry["power_w"] = *info.PowerW
|
||||
}
|
||||
if info.ECCUncorrected != nil {
|
||||
dev.Telemetry["ecc_uncorrected_total"] = *info.ECCUncorrected
|
||||
}
|
||||
if info.ECCCorrected != nil {
|
||||
dev.Telemetry["ecc_corrected_total"] = *info.ECCCorrected
|
||||
}
|
||||
if info.HWSlowdown != nil {
|
||||
dev.Telemetry["hw_slowdown_active"] = *info.HWSlowdown
|
||||
}
|
||||
if len(dev.Telemetry) == 0 {
|
||||
dev.Telemetry = nil
|
||||
}
|
||||
}
|
||||
116
audit/internal/collector/nvidia_test.go
Normal file
116
audit/internal/collector/nvidia_test.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bee/audit/internal/schema"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseNVIDIASMIQuery(t *testing.T) {
|
||||
raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active\n"
|
||||
byBDF, err := parseNVIDIASMIQuery(raw)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
gpu, ok := byBDF["0000:65:00.0"]
|
||||
if !ok {
|
||||
t.Fatalf("gpu by normalized bdf not found")
|
||||
}
|
||||
if gpu.Serial != "GPU-SERIAL-1" {
|
||||
t.Fatalf("serial: got %q", gpu.Serial)
|
||||
}
|
||||
if gpu.VBIOS != "96.00.1F.00.02" {
|
||||
t.Fatalf("vbios: got %q", gpu.VBIOS)
|
||||
}
|
||||
if gpu.ECCUncorrected == nil || *gpu.ECCUncorrected != 0 {
|
||||
t.Fatalf("ecc uncorrected: got %v", gpu.ECCUncorrected)
|
||||
}
|
||||
if gpu.HWSlowdown == nil || *gpu.HWSlowdown {
|
||||
t.Fatalf("hw slowdown: got %v, want false", gpu.HWSlowdown)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizePCIeBDF(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"00000000:17:00.0", "0000:17:00.0"},
|
||||
{"0000:17:00.0", "0000:17:00.0"},
|
||||
{"17:00.0", "0000:17:00.0"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := normalizePCIeBDF(tt.in)
|
||||
if got != tt.want {
|
||||
t.Fatalf("normalizePCIeBDF(%q)=%q want %q", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnrichPCIeWithNVIDIAData_driverLoaded(t *testing.T) {
|
||||
vendorID := nvidiaVendorID
|
||||
bdf := "0000:65:00.0"
|
||||
manufacturer := "NVIDIA Corporation"
|
||||
status := "OK"
|
||||
devices := []schema.HardwarePCIeDevice{
|
||||
{
|
||||
VendorID: &vendorID,
|
||||
BDF: &bdf,
|
||||
Manufacturer: &manufacturer,
|
||||
Status: &status,
|
||||
},
|
||||
}
|
||||
|
||||
byBDF := map[string]nvidiaGPUInfo{
|
||||
"0000:65:00.0": {
|
||||
BDF: "0000:65:00.0",
|
||||
Serial: "GPU-ABC",
|
||||
VBIOS: "96.00.1F.00.02",
|
||||
ECCUncorrected: ptrInt64(2),
|
||||
ECCCorrected: ptrInt64(10),
|
||||
TemperatureC: ptrFloat(55.5),
|
||||
PowerW: ptrFloat(230.2),
|
||||
},
|
||||
}
|
||||
|
||||
out := enrichPCIeWithNVIDIAData(devices, byBDF, "BOARD-001", true)
|
||||
if out[0].SerialNumber == nil || *out[0].SerialNumber != "GPU-ABC" {
|
||||
t.Fatalf("serial: got %v", out[0].SerialNumber)
|
||||
}
|
||||
if out[0].Firmware == nil || *out[0].Firmware != "96.00.1F.00.02" {
|
||||
t.Fatalf("firmware: got %v", out[0].Firmware)
|
||||
}
|
||||
if out[0].Status == nil || *out[0].Status != "WARNING" {
|
||||
t.Fatalf("status: got %v", out[0].Status)
|
||||
}
|
||||
if out[0].Telemetry == nil {
|
||||
t.Fatal("expected telemetry")
|
||||
}
|
||||
if got, ok := out[0].Telemetry["ecc_uncorrected_total"].(int64); !ok || got != 2 {
|
||||
t.Fatalf("ecc_uncorrected_total: got %#v", out[0].Telemetry["ecc_uncorrected_total"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnrichPCIeWithNVIDIAData_driverMissingFallback(t *testing.T) {
|
||||
vendorID := nvidiaVendorID
|
||||
bdf := "0000:17:00.0"
|
||||
manufacturer := "NVIDIA Corporation"
|
||||
devices := []schema.HardwarePCIeDevice{
|
||||
{
|
||||
VendorID: &vendorID,
|
||||
BDF: &bdf,
|
||||
Manufacturer: &manufacturer,
|
||||
},
|
||||
}
|
||||
|
||||
out := enrichPCIeWithNVIDIAData(devices, nil, "BOARD-123", false)
|
||||
if out[0].SerialNumber == nil || *out[0].SerialNumber != "BOARD-123-PCIE-0000:17:00.0" {
|
||||
t.Fatalf("fallback serial: got %v", out[0].SerialNumber)
|
||||
}
|
||||
if out[0].Status == nil || *out[0].Status != "UNKNOWN" {
|
||||
t.Fatalf("fallback status: got %v", out[0].Status)
|
||||
}
|
||||
}
|
||||
|
||||
func ptrInt64(v int64) *int64 { return &v }
|
||||
func ptrFloat(v float64) *float64 { return &v }
|
||||
@@ -37,12 +37,44 @@ func parseLspci(output string) []schema.HardwarePCIeDevice {
|
||||
val := strings.TrimSpace(line[idx+2:])
|
||||
fields[key] = val
|
||||
}
|
||||
if !shouldIncludePCIeDevice(fields["Class"]) {
|
||||
continue
|
||||
}
|
||||
dev := parseLspciDevice(fields)
|
||||
devs = append(devs, dev)
|
||||
}
|
||||
return devs
|
||||
}
|
||||
|
||||
func shouldIncludePCIeDevice(class string) bool {
|
||||
c := strings.ToLower(strings.TrimSpace(class))
|
||||
if c == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
// Keep inventory focused on useful replaceable components, not chipset/virtual noise.
|
||||
excluded := []string{
|
||||
"host bridge",
|
||||
"isa bridge",
|
||||
"pci bridge",
|
||||
"ram memory",
|
||||
"system peripheral",
|
||||
"communication controller",
|
||||
"signal processing controller",
|
||||
"usb controller",
|
||||
"smbus",
|
||||
"audio device",
|
||||
"serial bus controller",
|
||||
"unassigned class",
|
||||
}
|
||||
for _, bad := range excluded {
|
||||
if strings.Contains(c, bad) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {
|
||||
dev := schema.HardwarePCIeDevice{}
|
||||
present := true
|
||||
|
||||
41
audit/internal/collector/pcie_filter_test.go
Normal file
41
audit/internal/collector/pcie_filter_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package collector
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestShouldIncludePCIeDevice(t *testing.T) {
|
||||
tests := []struct {
|
||||
class string
|
||||
want bool
|
||||
}{
|
||||
{"USB controller", false},
|
||||
{"System peripheral", false},
|
||||
{"Audio device", false},
|
||||
{"Host bridge", false},
|
||||
{"PCI bridge", false},
|
||||
{"SMBus", false},
|
||||
{"Ethernet controller", true},
|
||||
{"RAID bus controller", true},
|
||||
{"Non-Volatile memory controller", true},
|
||||
{"VGA compatible controller", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := shouldIncludePCIeDevice(tt.class)
|
||||
if got != tt.want {
|
||||
t.Fatalf("class %q include=%v want %v", tt.class, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseLspci_filtersExcludedClasses(t *testing.T) {
|
||||
input := "Slot:\t0000:00:14.0\nClass:\tUSB controller\nVendor:\tIntel Corporation\nDevice:\tUSB 3.0\n\n" +
|
||||
"Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
|
||||
|
||||
devs := parseLspci(input)
|
||||
if len(devs) != 1 {
|
||||
t.Fatalf("expected 1 filtered device, got %d", len(devs))
|
||||
}
|
||||
if devs[0].DeviceClass == nil || *devs[0].DeviceClass != "VGA compatible controller" {
|
||||
t.Fatalf("unexpected remaining class: %v", devs[0].DeviceClass)
|
||||
}
|
||||
}
|
||||
748
audit/internal/collector/raid.go
Normal file
748
audit/internal/collector/raid.go
Normal file
@@ -0,0 +1,748 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bee/audit/internal/schema"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
vendorBroadcomLSI = 0x1000
|
||||
vendorAdaptec = 0x9005
|
||||
vendorHPE = 0x103c
|
||||
vendorIntel = 0x8086
|
||||
)
|
||||
|
||||
var raidToolQuery = func(name string, args ...string) ([]byte, error) {
|
||||
return exec.Command(name, args...).Output()
|
||||
}
|
||||
|
||||
var readMDStat = func() ([]byte, error) {
|
||||
return os.ReadFile("/proc/mdstat")
|
||||
}
|
||||
|
||||
// collectRAIDStorage collects physical disks behind RAID controllers that may
|
||||
// not be exposed as regular block devices.
|
||||
func collectRAIDStorage(pcie []schema.HardwarePCIeDevice) []schema.HardwareStorage {
|
||||
vendors := detectRAIDVendors(pcie)
|
||||
if len(vendors) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var out []schema.HardwareStorage
|
||||
|
||||
if vendors[vendorBroadcomLSI] {
|
||||
if drives := collectStorcliDrives(); len(drives) > 0 {
|
||||
out = append(out, drives...)
|
||||
}
|
||||
if drives := collectSASIrcuDrives("sas3ircu"); len(drives) > 0 {
|
||||
out = append(out, drives...)
|
||||
}
|
||||
if drives := collectSASIrcuDrives("sas2ircu"); len(drives) > 0 {
|
||||
out = append(out, drives...)
|
||||
}
|
||||
}
|
||||
|
||||
if vendors[vendorAdaptec] {
|
||||
if drives := collectArcconfDrives(); len(drives) > 0 {
|
||||
out = append(out, drives...)
|
||||
}
|
||||
}
|
||||
if vendors[vendorHPE] {
|
||||
if drives := collectSSACLIDrives(); len(drives) > 0 {
|
||||
out = append(out, drives...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(out) > 0 {
|
||||
slog.Info("raid: collected physical drives", "count", len(out))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func detectRAIDVendors(pcie []schema.HardwarePCIeDevice) map[int]bool {
|
||||
out := map[int]bool{}
|
||||
for _, dev := range pcie {
|
||||
if dev.VendorID == nil {
|
||||
continue
|
||||
}
|
||||
if isLikelyRAIDController(dev) {
|
||||
out[*dev.VendorID] = true
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func isLikelyRAIDController(dev schema.HardwarePCIeDevice) bool {
|
||||
if dev.DeviceClass == nil {
|
||||
return false
|
||||
}
|
||||
c := strings.ToLower(*dev.DeviceClass)
|
||||
return strings.Contains(c, "raid") ||
|
||||
strings.Contains(c, "sas") ||
|
||||
strings.Contains(c, "mass storage") ||
|
||||
strings.Contains(c, "serial attached scsi")
|
||||
}
|
||||
|
||||
func collectStorcliDrives() []schema.HardwareStorage {
|
||||
out, err := raidToolQuery("storcli64", "/call/eall/sall", "show", "all", "J")
|
||||
if err != nil {
|
||||
slog.Info("raid: storcli unavailable", "err", err)
|
||||
return nil
|
||||
}
|
||||
drives := parseStorcliDrivesJSON(out)
|
||||
if len(drives) == 0 {
|
||||
slog.Info("raid: storcli returned no drives")
|
||||
}
|
||||
return drives
|
||||
}
|
||||
|
||||
func collectSASIrcuDrives(tool string) []schema.HardwareStorage {
|
||||
out, err := raidToolQuery(tool, "list")
|
||||
if err != nil {
|
||||
slog.Info("raid: "+tool+" unavailable", "err", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
var drives []schema.HardwareStorage
|
||||
for _, ctlID := range parseSASIrcuControllerIDs(string(out)) {
|
||||
raw, err := raidToolQuery(tool, strconv.Itoa(ctlID), "display")
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
drives = append(drives, parseSASIrcuDisplay(string(raw))...)
|
||||
}
|
||||
return drives
|
||||
}
|
||||
|
||||
func parseSASIrcuControllerIDs(raw string) []int {
|
||||
lines := strings.Split(raw, "\n")
|
||||
idsMap := map[int]bool{}
|
||||
for _, line := range lines {
|
||||
fields := strings.Fields(strings.TrimSpace(line))
|
||||
if len(fields) == 0 {
|
||||
continue
|
||||
}
|
||||
id, err := strconv.Atoi(fields[0])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
idsMap[id] = true
|
||||
}
|
||||
var ids []int
|
||||
for id := range idsMap {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
sort.Ints(ids)
|
||||
return ids
|
||||
}
|
||||
|
||||
func parseSASIrcuDisplay(raw string) []schema.HardwareStorage {
|
||||
var blocks []map[string]string
|
||||
var cur map[string]string
|
||||
var currentType string
|
||||
|
||||
for _, line := range strings.Split(raw, "\n") {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if strings.HasPrefix(trimmed, "Device is a ") {
|
||||
if cur != nil {
|
||||
cur["__device_type"] = currentType
|
||||
blocks = append(blocks, cur)
|
||||
}
|
||||
cur = map[string]string{}
|
||||
currentType = strings.TrimSpace(strings.TrimPrefix(trimmed, "Device is a "))
|
||||
continue
|
||||
}
|
||||
if cur == nil {
|
||||
continue
|
||||
}
|
||||
if idx := strings.Index(trimmed, ":"); idx > 0 {
|
||||
key := strings.TrimSpace(trimmed[:idx])
|
||||
val := strings.TrimSpace(trimmed[idx+1:])
|
||||
cur[key] = val
|
||||
}
|
||||
}
|
||||
if cur != nil {
|
||||
cur["__device_type"] = currentType
|
||||
blocks = append(blocks, cur)
|
||||
}
|
||||
|
||||
var out []schema.HardwareStorage
|
||||
for _, b := range blocks {
|
||||
dt := strings.ToLower(b["__device_type"])
|
||||
if !strings.Contains(dt, "hard disk") && !strings.Contains(dt, "ssd") && !strings.Contains(dt, "nvme") {
|
||||
continue
|
||||
}
|
||||
|
||||
present := true
|
||||
status := mapRAIDDriveStatus(b["State"])
|
||||
s := schema.HardwareStorage{Present: &present, Status: &status}
|
||||
|
||||
enclosure := strings.TrimSpace(b["Enclosure #"])
|
||||
slot := strings.TrimSpace(b["Slot #"])
|
||||
if enclosure != "" || slot != "" {
|
||||
v := enclosure + ":" + slot
|
||||
v = strings.Trim(v, ":")
|
||||
s.Slot = &v
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(b["Model Number"]); v != "" {
|
||||
s.Model = &v
|
||||
}
|
||||
if v := strings.TrimSpace(b["Serial No"]); v != "" {
|
||||
s.SerialNumber = &v
|
||||
}
|
||||
if v := strings.ToUpper(strings.TrimSpace(b["Protocol"])); v != "" {
|
||||
s.Interface = &v
|
||||
}
|
||||
|
||||
media := strings.ToUpper(strings.TrimSpace(b["Drive Type"]))
|
||||
if media == "" {
|
||||
media = strings.ToUpper(dt)
|
||||
}
|
||||
intf := ""
|
||||
if s.Interface != nil {
|
||||
intf = *s.Interface
|
||||
}
|
||||
devType := inferDriveType(media, intf)
|
||||
s.Type = &devType
|
||||
|
||||
if mb := parseSASIrcuMB(b["Size (in MB)/(in sectors)"]); mb > 0 {
|
||||
gb := mb / 1000
|
||||
if gb == 0 {
|
||||
gb = 1
|
||||
}
|
||||
s.SizeGB = &gb
|
||||
}
|
||||
|
||||
if s.Slot != nil || s.SerialNumber != nil || s.Model != nil {
|
||||
out = append(out, s)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func parseSASIrcuMB(raw string) int {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return 0
|
||||
}
|
||||
head := strings.SplitN(raw, "/", 2)[0]
|
||||
n, err := strconv.Atoi(strings.TrimSpace(head))
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func collectArcconfDrives() []schema.HardwareStorage {
|
||||
raw, err := raidToolQuery("arcconf", "getconfig", "1", "pd")
|
||||
if err != nil {
|
||||
slog.Info("raid: arcconf unavailable", "err", err)
|
||||
return nil
|
||||
}
|
||||
return parseArcconfPhysicalDrives(string(raw))
|
||||
}
|
||||
|
||||
func parseArcconfPhysicalDrives(raw string) []schema.HardwareStorage {
|
||||
lines := strings.Split(raw, "\n")
|
||||
var blocks []map[string]string
|
||||
var cur map[string]string
|
||||
|
||||
for _, line := range lines {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if strings.HasPrefix(strings.ToLower(trimmed), "device #") {
|
||||
if cur != nil {
|
||||
blocks = append(blocks, cur)
|
||||
}
|
||||
cur = map[string]string{}
|
||||
continue
|
||||
}
|
||||
if cur == nil {
|
||||
continue
|
||||
}
|
||||
if idx := strings.Index(trimmed, ":"); idx > 0 {
|
||||
key := strings.TrimSpace(trimmed[:idx])
|
||||
val := strings.TrimSpace(trimmed[idx+1:])
|
||||
cur[key] = val
|
||||
}
|
||||
}
|
||||
if cur != nil {
|
||||
blocks = append(blocks, cur)
|
||||
}
|
||||
|
||||
var out []schema.HardwareStorage
|
||||
for _, b := range blocks {
|
||||
present := true
|
||||
status := mapRAIDDriveStatus(b["State"])
|
||||
s := schema.HardwareStorage{Present: &present, Status: &status}
|
||||
|
||||
if v := strings.TrimSpace(b["Reported Location"]); v != "" {
|
||||
s.Slot = &v
|
||||
}
|
||||
if v := strings.TrimSpace(b["Model"]); v != "" {
|
||||
s.Model = &v
|
||||
}
|
||||
if v := strings.TrimSpace(b["Serial number"]); v != "" {
|
||||
s.SerialNumber = &v
|
||||
}
|
||||
if gb := parseHumanSizeToGB(b["Total Size"]); gb > 0 {
|
||||
s.SizeGB = &gb
|
||||
}
|
||||
|
||||
intf := parseArcconfInterface(b["Transfer Speed"])
|
||||
if intf != "" {
|
||||
s.Interface = &intf
|
||||
}
|
||||
media := strings.ToUpper(strings.TrimSpace(b["SSD"]))
|
||||
if media == "YES" || media == "TRUE" {
|
||||
media = "SSD"
|
||||
}
|
||||
devType := inferDriveType(media, intf)
|
||||
s.Type = &devType
|
||||
|
||||
if s.Slot != nil || s.SerialNumber != nil || s.Model != nil {
|
||||
out = append(out, s)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func parseArcconfInterface(raw string) string {
|
||||
u := strings.ToUpper(raw)
|
||||
switch {
|
||||
case strings.Contains(u, "SAS"):
|
||||
return "SAS"
|
||||
case strings.Contains(u, "SATA"):
|
||||
return "SATA"
|
||||
case strings.Contains(u, "NVME"):
|
||||
return "NVME"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
var ssacliPhysicalDriveLine = regexp.MustCompile(`(?i)^physicaldrive\s+(\S+)\s+\(([^)]*)\)$`)
|
||||
|
||||
func collectSSACLIDrives() []schema.HardwareStorage {
|
||||
raw, err := raidToolQuery("ssacli", "ctrl", "all", "show", "config", "detail")
|
||||
if err != nil {
|
||||
slog.Info("raid: ssacli unavailable", "err", err)
|
||||
return nil
|
||||
}
|
||||
return parseSSACLIPhysicalDrives(string(raw))
|
||||
}
|
||||
|
||||
func parseSSACLIPhysicalDrives(raw string) []schema.HardwareStorage {
|
||||
lines := strings.Split(raw, "\n")
|
||||
var out []schema.HardwareStorage
|
||||
var cur *schema.HardwareStorage
|
||||
|
||||
flush := func() {
|
||||
if cur == nil {
|
||||
return
|
||||
}
|
||||
if cur.Slot != nil || cur.SerialNumber != nil || cur.Model != nil {
|
||||
out = append(out, *cur)
|
||||
}
|
||||
cur = nil
|
||||
}
|
||||
|
||||
for _, line := range lines {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
if m := ssacliPhysicalDriveLine.FindStringSubmatch(trimmed); len(m) == 3 {
|
||||
flush()
|
||||
present := true
|
||||
status := "UNKNOWN"
|
||||
s := schema.HardwareStorage{Present: &present, Status: &status}
|
||||
slot := m[1]
|
||||
s.Slot = &slot
|
||||
|
||||
meta := strings.Split(m[2], ",")
|
||||
if len(meta) > 0 {
|
||||
if gb := parseHumanSizeToGB(strings.TrimSpace(meta[0])); gb > 0 {
|
||||
s.SizeGB = &gb
|
||||
}
|
||||
}
|
||||
if len(meta) > 1 {
|
||||
intf := parseSSACLIInterface(meta[1])
|
||||
if intf != "" {
|
||||
s.Interface = &intf
|
||||
}
|
||||
devType := inferDriveType(strings.ToUpper(meta[1]), intf)
|
||||
s.Type = &devType
|
||||
}
|
||||
if len(meta) > 2 {
|
||||
st := mapRAIDDriveStatus(meta[len(meta)-1])
|
||||
s.Status = &st
|
||||
}
|
||||
cur = &s
|
||||
continue
|
||||
}
|
||||
if cur == nil {
|
||||
continue
|
||||
}
|
||||
if idx := strings.Index(trimmed, ":"); idx > 0 {
|
||||
key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
|
||||
val := strings.TrimSpace(trimmed[idx+1:])
|
||||
switch key {
|
||||
case "serial number":
|
||||
if val != "" {
|
||||
cur.SerialNumber = &val
|
||||
}
|
||||
case "model":
|
||||
if val != "" {
|
||||
cur.Model = &val
|
||||
}
|
||||
case "status":
|
||||
st := mapRAIDDriveStatus(val)
|
||||
cur.Status = &st
|
||||
}
|
||||
}
|
||||
}
|
||||
flush()
|
||||
return out
|
||||
}
|
||||
|
||||
func parseSSACLIInterface(raw string) string {
|
||||
u := strings.ToUpper(raw)
|
||||
switch {
|
||||
case strings.Contains(u, "SAS"):
|
||||
return "SAS"
|
||||
case strings.Contains(u, "SATA"):
|
||||
return "SATA"
|
||||
case strings.Contains(u, "NVME"):
|
||||
return "NVME"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func parseStorcliDrivesJSON(raw []byte) []schema.HardwareStorage {
|
||||
var doc struct {
|
||||
Controllers []struct {
|
||||
ResponseData struct {
|
||||
DriveInformation []struct {
|
||||
EIDSlt string `json:"EID:Slt"`
|
||||
State string `json:"State"`
|
||||
Size string `json:"Size"`
|
||||
Intf string `json:"Intf"`
|
||||
Med string `json:"Med"`
|
||||
Model string `json:"Model"`
|
||||
SN string `json:"SN"`
|
||||
Sp string `json:"Sp"`
|
||||
Type string `json:"Type"`
|
||||
} `json:"Drive Information"`
|
||||
} `json:"Response Data"`
|
||||
} `json:"Controllers"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &doc); err != nil {
|
||||
slog.Warn("raid: parse storcli json failed", "err", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
var drives []schema.HardwareStorage
|
||||
for _, ctl := range doc.Controllers {
|
||||
for _, d := range ctl.ResponseData.DriveInformation {
|
||||
if s := storcliDriveToStorage(d); s != nil {
|
||||
drives = append(drives, *s)
|
||||
}
|
||||
}
|
||||
}
|
||||
return drives
|
||||
}
|
||||
|
||||
func storcliDriveToStorage(d struct {
|
||||
EIDSlt string `json:"EID:Slt"`
|
||||
State string `json:"State"`
|
||||
Size string `json:"Size"`
|
||||
Intf string `json:"Intf"`
|
||||
Med string `json:"Med"`
|
||||
Model string `json:"Model"`
|
||||
SN string `json:"SN"`
|
||||
Sp string `json:"Sp"`
|
||||
Type string `json:"Type"`
|
||||
}) *schema.HardwareStorage {
|
||||
present := true
|
||||
status := mapRAIDDriveStatus(d.State)
|
||||
s := schema.HardwareStorage{
|
||||
Present: &present,
|
||||
Status: &status,
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(d.EIDSlt); v != "" {
|
||||
s.Slot = &v
|
||||
}
|
||||
if v := strings.TrimSpace(d.Model); v != "" {
|
||||
s.Model = &v
|
||||
}
|
||||
if v := strings.TrimSpace(d.SN); v != "" {
|
||||
s.SerialNumber = &v
|
||||
}
|
||||
if v := strings.TrimSpace(strings.ToUpper(d.Intf)); v != "" {
|
||||
s.Interface = &v
|
||||
}
|
||||
|
||||
devType := inferDriveType(strings.TrimSpace(strings.ToUpper(d.Med)), strings.TrimSpace(strings.ToUpper(d.Intf)))
|
||||
if devType != "" {
|
||||
s.Type = &devType
|
||||
}
|
||||
|
||||
if gb := parseHumanSizeToGB(d.Size); gb > 0 {
|
||||
s.SizeGB = &gb
|
||||
}
|
||||
|
||||
// return only meaningful records
|
||||
if s.Model == nil && s.SerialNumber == nil && s.Slot == nil {
|
||||
return nil
|
||||
}
|
||||
return &s
|
||||
}
|
||||
|
||||
func inferDriveType(med, intf string) string {
|
||||
switch {
|
||||
case strings.Contains(med, "SSD"):
|
||||
return "SSD"
|
||||
case strings.Contains(intf, "NVME"):
|
||||
return "NVMe"
|
||||
case strings.Contains(med, "HDD"):
|
||||
return "HDD"
|
||||
case strings.Contains(intf, "SAS") || strings.Contains(intf, "SATA"):
|
||||
return "HDD"
|
||||
default:
|
||||
return "Unknown"
|
||||
}
|
||||
}
|
||||
|
||||
func mapRAIDDriveStatus(raw string) string {
|
||||
u := strings.ToUpper(strings.TrimSpace(raw))
|
||||
switch {
|
||||
case strings.Contains(u, "OK"), strings.Contains(u, "OPTIMAL"), strings.Contains(u, "READY"):
|
||||
return "OK"
|
||||
case strings.Contains(u, "ONLN"), strings.Contains(u, "ONLINE"):
|
||||
return "OK"
|
||||
case strings.Contains(u, "RBLD"), strings.Contains(u, "REBUILD"):
|
||||
return "WARNING"
|
||||
case strings.Contains(u, "FAIL"), strings.Contains(u, "OFFLINE"):
|
||||
return "CRITICAL"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
|
||||
func parseHumanSizeToGB(raw string) int {
|
||||
parts := strings.Fields(strings.TrimSpace(raw))
|
||||
if len(parts) < 2 {
|
||||
return 0
|
||||
}
|
||||
value, err := strconv.ParseFloat(strings.TrimSpace(parts[0]), 64)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
unit := strings.ToUpper(parts[1])
|
||||
switch {
|
||||
case strings.HasPrefix(unit, "TB"):
|
||||
return int(value * 1000)
|
||||
case strings.HasPrefix(unit, "GB"):
|
||||
return int(value)
|
||||
case strings.HasPrefix(unit, "MB"):
|
||||
return int(value / 1000)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func appendUniqueStorage(base, extra []schema.HardwareStorage) []schema.HardwareStorage {
|
||||
if len(extra) == 0 {
|
||||
return base
|
||||
}
|
||||
seen := map[string]bool{}
|
||||
for _, d := range base {
|
||||
seen[storageIdentityKey(d)] = true
|
||||
}
|
||||
for _, d := range extra {
|
||||
key := storageIdentityKey(d)
|
||||
if key == "" || seen[key] {
|
||||
continue
|
||||
}
|
||||
base = append(base, d)
|
||||
seen[key] = true
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
func storageIdentityKey(d schema.HardwareStorage) string {
|
||||
if d.SerialNumber != nil && strings.TrimSpace(*d.SerialNumber) != "" {
|
||||
return "sn:" + strings.ToLower(strings.TrimSpace(*d.SerialNumber))
|
||||
}
|
||||
if d.Model != nil && d.Slot != nil {
|
||||
return "modelslot:" + strings.ToLower(strings.TrimSpace(*d.Model)) + ":" + strings.ToLower(strings.TrimSpace(*d.Slot))
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type mdArray struct {
|
||||
Name string
|
||||
Degraded bool
|
||||
Members []string
|
||||
}
|
||||
|
||||
func enrichStorageWithVROC(storage []schema.HardwareStorage, pcie []schema.HardwarePCIeDevice) []schema.HardwareStorage {
|
||||
if !hasVROCController(pcie) {
|
||||
return storage
|
||||
}
|
||||
|
||||
raw, err := readMDStat()
|
||||
if err != nil {
|
||||
slog.Info("vroc: cannot read /proc/mdstat", "err", err)
|
||||
return storage
|
||||
}
|
||||
arrays := parseMDStatArrays(string(raw))
|
||||
if len(arrays) == 0 {
|
||||
slog.Info("vroc: no md arrays found")
|
||||
return storage
|
||||
}
|
||||
|
||||
serialToArray := map[string]mdArray{}
|
||||
for _, arr := range arrays {
|
||||
for _, member := range arr.Members {
|
||||
serial := queryDeviceSerial("/dev/" + member)
|
||||
if serial == "" {
|
||||
continue
|
||||
}
|
||||
serialToArray[strings.ToLower(serial)] = arr
|
||||
}
|
||||
}
|
||||
if len(serialToArray) == 0 {
|
||||
return storage
|
||||
}
|
||||
|
||||
updated := 0
|
||||
for i := range storage {
|
||||
if storage[i].SerialNumber == nil || strings.TrimSpace(*storage[i].SerialNumber) == "" {
|
||||
continue
|
||||
}
|
||||
arr, ok := serialToArray[strings.ToLower(strings.TrimSpace(*storage[i].SerialNumber))]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if storage[i].Telemetry == nil {
|
||||
storage[i].Telemetry = map[string]any{}
|
||||
}
|
||||
storage[i].Telemetry["vroc_array"] = arr.Name
|
||||
storage[i].Telemetry["vroc_degraded"] = arr.Degraded
|
||||
if arr.Degraded {
|
||||
status := "WARNING"
|
||||
storage[i].Status = &status
|
||||
}
|
||||
updated++
|
||||
}
|
||||
|
||||
slog.Info("vroc: enriched storage members", "count", updated)
|
||||
return storage
|
||||
}
|
||||
|
||||
func hasVROCController(pcie []schema.HardwarePCIeDevice) bool {
|
||||
for _, dev := range pcie {
|
||||
if dev.VendorID == nil || *dev.VendorID != vendorIntel {
|
||||
continue
|
||||
}
|
||||
|
||||
class := ""
|
||||
if dev.DeviceClass != nil {
|
||||
class = strings.ToLower(*dev.DeviceClass)
|
||||
}
|
||||
model := ""
|
||||
if dev.Model != nil {
|
||||
model = strings.ToLower(*dev.Model)
|
||||
}
|
||||
|
||||
if strings.Contains(class, "raid") ||
|
||||
strings.Contains(model, "vroc") ||
|
||||
strings.Contains(model, "volume management device") ||
|
||||
strings.Contains(model, "vmd") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
var mdHealthPattern = regexp.MustCompile(`\[[U_]+\]`)
|
||||
|
||||
func parseMDStatArrays(raw string) []mdArray {
|
||||
lines := strings.Split(raw, "\n")
|
||||
var arrays []mdArray
|
||||
var current *mdArray
|
||||
|
||||
for _, line := range lines {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(line, " : ") && !strings.HasPrefix(strings.TrimLeft(line, " \t"), "[") {
|
||||
left := strings.TrimSpace(strings.SplitN(line, " : ", 2)[0])
|
||||
if strings.EqualFold(left, "Personalities") || strings.EqualFold(left, "unused devices") {
|
||||
continue
|
||||
}
|
||||
if current != nil {
|
||||
arrays = append(arrays, *current)
|
||||
}
|
||||
|
||||
name := left
|
||||
fields := strings.Fields(strings.SplitN(line, " : ", 2)[1])
|
||||
|
||||
arr := mdArray{Name: name}
|
||||
for _, f := range fields {
|
||||
if i := strings.IndexByte(f, '['); i > 0 {
|
||||
member := strings.TrimSpace(f[:i])
|
||||
if member != "" {
|
||||
arr.Members = append(arr.Members, member)
|
||||
}
|
||||
}
|
||||
}
|
||||
current = &arr
|
||||
continue
|
||||
}
|
||||
|
||||
if current == nil {
|
||||
continue
|
||||
}
|
||||
if m := mdHealthPattern.FindString(trimmed); m != "" && strings.Contains(m, "_") {
|
||||
current.Degraded = true
|
||||
}
|
||||
}
|
||||
if current != nil {
|
||||
arrays = append(arrays, *current)
|
||||
}
|
||||
return arrays
|
||||
}
|
||||
|
||||
func queryDeviceSerial(devPath string) string {
|
||||
if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil {
|
||||
var ctrl nvmeIDCtrl
|
||||
if json.Unmarshal(out, &ctrl) == nil {
|
||||
if v := cleanDMIValue(strings.TrimSpace(ctrl.SerialNumber)); v != "" {
|
||||
return v
|
||||
}
|
||||
}
|
||||
}
|
||||
if out, err := exec.Command("smartctl", "-j", "-i", devPath).Output(); err == nil {
|
||||
var info smartctlInfo
|
||||
if json.Unmarshal(out, &info) == nil {
|
||||
if v := cleanDMIValue(strings.TrimSpace(info.SerialNumber)); v != "" {
|
||||
return v
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
96
audit/internal/collector/raid_parsers_test.go
Normal file
96
audit/internal/collector/raid_parsers_test.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package collector
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseSASIrcuControllerIDs(t *testing.T) {
|
||||
raw := `LSI Corporation SAS2 IR Configuration Utility.
|
||||
Adapter List
|
||||
==============
|
||||
0 SAS2008(B2)
|
||||
1 SAS2308_2(D1)
|
||||
`
|
||||
ids := parseSASIrcuControllerIDs(raw)
|
||||
if len(ids) != 2 || ids[0] != 0 || ids[1] != 1 {
|
||||
t.Fatalf("unexpected ids: %#v", ids)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSASIrcuDisplay(t *testing.T) {
|
||||
raw := `Device is a Hard disk
|
||||
Enclosure # : 32
|
||||
Slot # : 7
|
||||
State : Onln
|
||||
Size (in MB)/(in sectors) : 953869/1953525168
|
||||
Model Number : ST1000NM0033
|
||||
Serial No : Z1D12345
|
||||
Protocol : SAS
|
||||
Drive Type : HDD
|
||||
|
||||
Device is a Enclosure services device
|
||||
Enclosure # : 32
|
||||
`
|
||||
drives := parseSASIrcuDisplay(raw)
|
||||
if len(drives) != 1 {
|
||||
t.Fatalf("expected 1 drive, got %d", len(drives))
|
||||
}
|
||||
d := drives[0]
|
||||
if d.Slot == nil || *d.Slot != "32:7" {
|
||||
t.Fatalf("slot: %v", d.Slot)
|
||||
}
|
||||
if d.SerialNumber == nil || *d.SerialNumber != "Z1D12345" {
|
||||
t.Fatalf("serial: %v", d.SerialNumber)
|
||||
}
|
||||
if d.Interface == nil || *d.Interface != "SAS" {
|
||||
t.Fatalf("interface: %v", d.Interface)
|
||||
}
|
||||
if d.Status == nil || *d.Status != "OK" {
|
||||
t.Fatalf("status: %v", d.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseArcconfPhysicalDrives(t *testing.T) {
|
||||
raw := `Device #0
|
||||
Reported Location : Channel 0, Device 3
|
||||
Model : Micron_5300
|
||||
Serial number : ARC12345
|
||||
State : Online
|
||||
Total Size : 894 GB
|
||||
Transfer Speed : SATA 6.0Gb/s
|
||||
SSD : Yes
|
||||
`
|
||||
drives := parseArcconfPhysicalDrives(raw)
|
||||
if len(drives) != 1 {
|
||||
t.Fatalf("expected 1 drive, got %d", len(drives))
|
||||
}
|
||||
d := drives[0]
|
||||
if d.Type == nil || *d.Type != "SSD" {
|
||||
t.Fatalf("type: %v", d.Type)
|
||||
}
|
||||
if d.Interface == nil || *d.Interface != "SATA" {
|
||||
t.Fatalf("interface: %v", d.Interface)
|
||||
}
|
||||
if d.Status == nil || *d.Status != "OK" {
|
||||
t.Fatalf("status: %v", d.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSSACLIPhysicalDrives(t *testing.T) {
|
||||
raw := `physicaldrive 1I:1:1 (894 GB, SAS HDD, OK)
|
||||
Serial Number: SSACLI001
|
||||
Model: MB8000JVYZQ
|
||||
|
||||
physicaldrive 1I:1:2 (894 GB, SAS HDD, Failed)
|
||||
Serial Number: SSACLI002
|
||||
Model: MB8000JVYZQ
|
||||
`
|
||||
drives := parseSSACLIPhysicalDrives(raw)
|
||||
if len(drives) != 2 {
|
||||
t.Fatalf("expected 2 drives, got %d", len(drives))
|
||||
}
|
||||
if drives[0].Status == nil || *drives[0].Status != "OK" {
|
||||
t.Fatalf("drive0 status: %v", drives[0].Status)
|
||||
}
|
||||
if drives[1].Status == nil || *drives[1].Status != "CRITICAL" {
|
||||
t.Fatalf("drive1 status: %v", drives[1].Status)
|
||||
}
|
||||
}
|
||||
57
audit/internal/collector/vroc_test.go
Normal file
57
audit/internal/collector/vroc_test.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bee/audit/internal/schema"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseMDStatArrays(t *testing.T) {
|
||||
raw := `Personalities : [raid1]
|
||||
md126 : active raid1 nvme0n1[0] nvme1n1[1]
|
||||
976630464 blocks super external:/md127/0 [2/2] [UU]
|
||||
|
||||
md125 : active raid1 nvme2n1[0] nvme3n1[1]
|
||||
976630464 blocks super external:/md127/1 [2/1] [U_]
|
||||
`
|
||||
arrays := parseMDStatArrays(raw)
|
||||
if len(arrays) != 2 {
|
||||
t.Fatalf("expected 2 arrays, got %d", len(arrays))
|
||||
}
|
||||
if arrays[0].Name != "md126" || arrays[0].Degraded {
|
||||
t.Fatalf("unexpected array0: %+v", arrays[0])
|
||||
}
|
||||
if len(arrays[0].Members) != 2 || arrays[0].Members[0] != "nvme0n1" {
|
||||
t.Fatalf("unexpected members array0: %+v", arrays[0].Members)
|
||||
}
|
||||
if arrays[1].Name != "md125" || !arrays[1].Degraded {
|
||||
t.Fatalf("unexpected array1: %+v", arrays[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasVROCController(t *testing.T) {
|
||||
intel := vendorIntel
|
||||
model := "Volume Management Device NVMe RAID Controller"
|
||||
class := "RAID bus controller"
|
||||
tests := []struct {
|
||||
name string
|
||||
pcie []schema.HardwarePCIeDevice
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "intel vroc",
|
||||
pcie: []schema.HardwarePCIeDevice{{VendorID: &intel, Model: &model, DeviceClass: &class}},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "non-intel raid",
|
||||
pcie: []schema.HardwarePCIeDevice{{}},
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := hasVROCController(tt.pcie)
|
||||
if got != tt.want {
|
||||
t.Fatalf("%s: got %v want %v", tt.name, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
97
iso/builder/build.sh
Executable file
97
iso/builder/build.sh
Executable file
@@ -0,0 +1,97 @@
|
||||
#!/bin/sh
|
||||
# build.sh — production ISO build (unattended mode)
|
||||
|
||||
set -e
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||
BUILDER_DIR="${REPO_ROOT}/iso/builder"
|
||||
OVERLAY_DIR="${REPO_ROOT}/iso/overlay"
|
||||
DIST_DIR="${REPO_ROOT}/dist"
|
||||
VENDOR_DIR="${REPO_ROOT}/iso/vendor"
|
||||
|
||||
. "${BUILDER_DIR}/VERSIONS"
|
||||
export PATH="$PATH:/usr/local/go/bin"
|
||||
|
||||
echo "=== bee production ISO build ==="
|
||||
echo "Alpine: ${ALPINE_VERSION}, Go: ${GO_VERSION}, NVIDIA: ${NVIDIA_DRIVER_VERSION}"
|
||||
|
||||
AUDIT_BIN="${DIST_DIR}/bee-audit-linux-amd64"
|
||||
mkdir -p "$DIST_DIR"
|
||||
|
||||
cd "${REPO_ROOT}/audit"
|
||||
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
|
||||
go build \
|
||||
-ldflags "-s -w -X main.Version=${AUDIT_VERSION}" \
|
||||
-o "$AUDIT_BIN" \
|
||||
./cmd/audit
|
||||
|
||||
mkdir -p "${OVERLAY_DIR}/usr/local/bin"
|
||||
cp "$AUDIT_BIN" "${OVERLAY_DIR}/usr/local/bin/audit"
|
||||
chmod +x "${OVERLAY_DIR}/usr/local/bin/audit"
|
||||
|
||||
# Copy optional vendor utilities if already fetched.
|
||||
for tool in storcli64 sas2ircu sas3ircu mstflint; do
|
||||
if [ -f "${VENDOR_DIR}/${tool}" ]; then
|
||||
cp "${VENDOR_DIR}/${tool}" "${OVERLAY_DIR}/usr/local/bin/${tool}"
|
||||
chmod +x "${OVERLAY_DIR}/usr/local/bin/${tool}" || true
|
||||
echo "vendor tool: ${tool} (included)"
|
||||
else
|
||||
echo "vendor tool: ${tool} (not found, skipped)"
|
||||
fi
|
||||
done
|
||||
|
||||
# Build and inject NVIDIA proprietary modules + userspace tools.
|
||||
echo "=== building NVIDIA modules ==="
|
||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}"
|
||||
KVER="$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | head -1)"
|
||||
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
||||
|
||||
mkdir -p "${OVERLAY_DIR}/lib/modules/${KVER}/extra/nvidia"
|
||||
cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_DIR}/lib/modules/${KVER}/extra/nvidia/"
|
||||
|
||||
mkdir -p "${OVERLAY_DIR}/usr/local/bin" "${OVERLAY_DIR}/usr/lib"
|
||||
cp "${NVIDIA_CACHE}/bin/nvidia-smi" "${OVERLAY_DIR}/usr/local/bin/"
|
||||
chmod +x "${OVERLAY_DIR}/usr/local/bin/nvidia-smi"
|
||||
cp "${NVIDIA_CACHE}/lib/"* "${OVERLAY_DIR}/usr/lib/" 2>/dev/null || true
|
||||
|
||||
# Embed build metadata used at runtime.
|
||||
mkdir -p "${OVERLAY_DIR}/etc"
|
||||
BUILD_DATE="$(date +%Y-%m-%d)"
|
||||
GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)"
|
||||
cat > "${OVERLAY_DIR}/etc/bee-release" <<EOF
|
||||
BEE_ISO_VERSION=${AUDIT_VERSION}
|
||||
BEE_AUDIT_VERSION=${AUDIT_VERSION}
|
||||
BUILD_DATE=${BUILD_DATE}
|
||||
GIT_COMMIT=${GIT_COMMIT}
|
||||
ALPINE_VERSION=${ALPINE_VERSION}
|
||||
NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
|
||||
EOF
|
||||
|
||||
mkdir -p "${HOME}/.mkimage"
|
||||
cp "${BUILDER_DIR}/mkimg.bee.sh" "${HOME}/.mkimage/"
|
||||
cp "${BUILDER_DIR}/genapkovl-bee.sh" "${HOME}/.mkimage/"
|
||||
|
||||
export BEE_OVERLAY_DIR="${OVERLAY_DIR}"
|
||||
|
||||
if [ -d /var/tmp/bee-iso-work ]; then
|
||||
find /var/tmp/bee-iso-work -maxdepth 1 -mindepth 1 \
|
||||
-not -name 'apks_*' -not -name 'kernel_*' \
|
||||
-not -name 'syslinux_*' -not -name 'grub_*' \
|
||||
-exec rm -rf {} + 2>/dev/null || true
|
||||
fi
|
||||
|
||||
export TMPDIR=/var/tmp
|
||||
cp "${BUILDER_DIR}/genapkovl-bee.sh" /var/tmp/
|
||||
cd /var/tmp
|
||||
sh /usr/share/aports/scripts/mkimage.sh \
|
||||
--tag "v${ALPINE_VERSION}" \
|
||||
--outdir "${DIST_DIR}" \
|
||||
--arch x86_64 \
|
||||
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
|
||||
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/community" \
|
||||
--workdir /var/tmp/bee-iso-work \
|
||||
--profile bee
|
||||
|
||||
ISO="${DIST_DIR}/alpine-bee-${ALPINE_VERSION}-x86_64.iso"
|
||||
echo "=== done ==="
|
||||
echo "ISO: $ISO"
|
||||
82
iso/builder/genapkovl-bee.sh
Executable file
82
iso/builder/genapkovl-bee.sh
Executable file
@@ -0,0 +1,82 @@
|
||||
#!/bin/sh -e
|
||||
HOSTNAME="$1"
|
||||
[ -n "$HOSTNAME" ] || { echo "usage: $0 hostname"; exit 1; }
|
||||
OVERLAY="${BEE_OVERLAY_DIR}"
|
||||
[ -n "$OVERLAY" ] || { echo "ERROR: BEE_OVERLAY_DIR not set"; exit 1; }
|
||||
|
||||
cleanup() { rm -rf "$tmp"; }
|
||||
tmp="$(mktemp -d)"
|
||||
trap cleanup EXIT
|
||||
|
||||
makefile() { OWNER="$1" PERMS="$2" FILENAME="$3"; cat > "$FILENAME"; chown "$OWNER" "$FILENAME"; chmod "$PERMS" "$FILENAME"; }
|
||||
rc_add() { mkdir -p "$tmp/etc/runlevels/$2"; ln -sf /etc/init.d/"$1" "$tmp/etc/runlevels/$2/$1"; }
|
||||
|
||||
mkdir -p "$tmp/etc"
|
||||
makefile root:root 0644 "$tmp/etc/hostname" <<EOT
|
||||
$HOSTNAME
|
||||
EOT
|
||||
|
||||
mkdir -p "$tmp/etc/network"
|
||||
makefile root:root 0644 "$tmp/etc/network/interfaces" <<EOT
|
||||
auto lo
|
||||
iface lo inet loopback
|
||||
EOT
|
||||
|
||||
mkdir -p "$tmp/etc/apk"
|
||||
makefile root:root 0644 "$tmp/etc/apk/world" <<EOT
|
||||
alpine-base
|
||||
dmidecode
|
||||
smartmontools
|
||||
nvme-cli
|
||||
pciutils
|
||||
ipmitool
|
||||
util-linux
|
||||
lsblk
|
||||
e2fsprogs
|
||||
lshw
|
||||
openrc
|
||||
ca-certificates
|
||||
tzdata
|
||||
jq
|
||||
wget
|
||||
EOT
|
||||
|
||||
rc_add devfs sysinit
|
||||
rc_add dmesg sysinit
|
||||
rc_add mdev sysinit
|
||||
rc_add hwdrivers sysinit
|
||||
rc_add modloop sysinit
|
||||
|
||||
rc_add hwclock boot
|
||||
rc_add modules boot
|
||||
rc_add sysctl boot
|
||||
rc_add hostname boot
|
||||
rc_add bootmisc boot
|
||||
rc_add syslog boot
|
||||
|
||||
rc_add mount-ro shutdown
|
||||
rc_add killprocs shutdown
|
||||
rc_add savecache shutdown
|
||||
|
||||
rc_add bee-network default
|
||||
rc_add bee-update default
|
||||
rc_add bee-nvidia default
|
||||
rc_add bee-audit default
|
||||
|
||||
if [ -d "$OVERLAY/etc" ]; then
|
||||
cp -r "$OVERLAY/etc/." "$tmp/etc/"
|
||||
chmod +x "$tmp/etc/init.d/"* 2>/dev/null || true
|
||||
fi
|
||||
|
||||
mkdir -p "$tmp/usr"
|
||||
if [ -d "$OVERLAY/usr" ]; then
|
||||
cp -r "$OVERLAY/usr/." "$tmp/usr/"
|
||||
chmod +x "$tmp/usr/local/bin/"* 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -d "$OVERLAY/lib" ]; then
|
||||
mkdir -p "$tmp/lib"
|
||||
cp -r "$OVERLAY/lib/." "$tmp/lib/"
|
||||
fi
|
||||
|
||||
tar -c -C "$tmp" etc usr lib 2>/dev/null | gzip -9n > "$HOSTNAME.apkovl.tar.gz"
|
||||
@@ -89,6 +89,11 @@ if [ -d "$OVERLAY/root" ]; then
|
||||
chmod 600 "$tmp/root/.ssh/authorized_keys" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -d "$OVERLAY/lib" ]; then
|
||||
mkdir -p "$tmp/lib"
|
||||
cp -r "$OVERLAY/lib/." "$tmp/lib/"
|
||||
fi
|
||||
|
||||
mkdir -p "$tmp/etc/dropbear" "$tmp/etc/conf.d"
|
||||
# -R: auto-generate host keys if missing
|
||||
# no dependency on networking service — bee-network handles DHCP independently
|
||||
@@ -97,4 +102,4 @@ DROPBEAR_OPTS="-R -B"
|
||||
EOF
|
||||
|
||||
|
||||
tar -c -C "$tmp" etc usr root 2>/dev/null | gzip -9n > "$HOSTNAME.apkovl.tar.gz"
|
||||
tar -c -C "$tmp" etc usr root lib 2>/dev/null | gzip -9n > "$HOSTNAME.apkovl.tar.gz"
|
||||
|
||||
47
iso/builder/mkimg.bee.sh
Executable file
47
iso/builder/mkimg.bee.sh
Executable file
@@ -0,0 +1,47 @@
|
||||
#!/bin/sh
|
||||
# Alpine mkimage profile: bee (production)
|
||||
|
||||
profile_bee() {
|
||||
title="Bee Hardware Audit"
|
||||
desc="Hardware audit LiveCD (production unattended mode)"
|
||||
arch="x86_64"
|
||||
hostname="alpine-bee"
|
||||
apkovl="genapkovl-bee.sh"
|
||||
image_ext="iso"
|
||||
output_format="iso"
|
||||
kernel_flavors="lts"
|
||||
kernel_addons=""
|
||||
initfs_cmdline="modules=loop,squashfs,sd-mod,usb-storage modloop=/boot/modloop-lts quiet"
|
||||
initfs_features="ata base cdrom ext4 mmc nvme raid scsi squashfs usb virtio nfit"
|
||||
|
||||
apks="
|
||||
alpine-base
|
||||
linux-lts
|
||||
linux-firmware-none
|
||||
linux-firmware-rtl_nic
|
||||
linux-firmware-bnx2
|
||||
linux-firmware-bnx2x
|
||||
linux-firmware-tigon
|
||||
linux-firmware-qlogic
|
||||
linux-firmware-netronome
|
||||
linux-firmware-mellanox
|
||||
linux-firmware-intel
|
||||
linux-firmware-other
|
||||
|
||||
dmidecode
|
||||
smartmontools
|
||||
nvme-cli
|
||||
pciutils
|
||||
ipmitool
|
||||
util-linux
|
||||
lsblk
|
||||
e2fsprogs
|
||||
lshw
|
||||
|
||||
openrc
|
||||
ca-certificates
|
||||
tzdata
|
||||
jq
|
||||
wget
|
||||
"
|
||||
}
|
||||
@@ -11,9 +11,6 @@
|
||||
|
||||
Logs: /var/log/bee-audit.json /var/log/bee-network.log
|
||||
|
||||
Re-run audit: audit --output stdout | less
|
||||
Restart net: bee-net-restart
|
||||
Check tools: which dmidecode smartctl nvme ipmitool lspci
|
||||
Open TUI: bee-tui
|
||||
|
||||
SSH access: key auth (developers) or bee/eeb (password fallback)
|
||||
|
||||
|
||||
@@ -1 +1,12 @@
|
||||
export PATH="$PATH:/usr/local/bin"
|
||||
|
||||
# Auto-open TUI on local tty1 after boot.
|
||||
# Exiting TUI returns to this shell (console prompt).
|
||||
if [ -z "${BEE_TUI_AUTO_LAUNCHED:-}" ] \
|
||||
&& [ -z "${SSH_CONNECTION:-}" ] \
|
||||
&& [ -z "${SSH_TTY:-}" ] \
|
||||
&& [ "$(tty 2>/dev/null)" = "/dev/tty1" ] \
|
||||
&& [ -x /usr/local/bin/bee-tui ]; then
|
||||
export BEE_TUI_AUTO_LAUNCHED=1
|
||||
/usr/local/bin/bee-tui
|
||||
fi
|
||||
|
||||
620
iso/overlay-debug/usr/local/bin/bee-tui
Executable file
620
iso/overlay-debug/usr/local/bin/bee-tui
Executable file
@@ -0,0 +1,620 @@
|
||||
#!/bin/sh
|
||||
# bee-tui: interactive text menu for debug LiveCD operations.
|
||||
|
||||
set -u
|
||||
|
||||
pause() {
|
||||
echo
|
||||
printf 'Press Enter to continue... '
|
||||
read -r _
|
||||
}
|
||||
|
||||
header() {
|
||||
clear
|
||||
echo "=============================================="
|
||||
echo " bee TUI (debug)"
|
||||
echo "=============================================="
|
||||
echo
|
||||
}
|
||||
|
||||
list_ifaces() {
|
||||
ip -o link show \
|
||||
| awk -F': ' '{print $2}' \
|
||||
| grep -v '^lo$' \
|
||||
| grep -vE '^(docker|virbr|veth|tun|tap|br-|bond|dummy)' \
|
||||
| sort
|
||||
}
|
||||
|
||||
show_network_status() {
|
||||
header
|
||||
echo "Network interfaces"
|
||||
echo
|
||||
for iface in $(list_ifaces); do
|
||||
state=$(ip -o link show "$iface" | awk '{print $9}')
|
||||
ipv4=$(ip -o -4 addr show dev "$iface" | awk '{print $4}' | paste -sd ',')
|
||||
[ -n "$ipv4" ] || ipv4="(no IPv4)"
|
||||
echo "- $iface: state=$state ip=$ipv4"
|
||||
done
|
||||
echo
|
||||
ip route | sed 's/^/ route: /'
|
||||
pause
|
||||
}
|
||||
|
||||
choose_interface() {
|
||||
ifaces="$(list_ifaces)"
|
||||
if [ -z "$ifaces" ]; then
|
||||
echo "No physical interfaces found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "$ifaces" | nl -w2 -s'. '
|
||||
echo
|
||||
printf 'Select interface number: '
|
||||
read -r idx
|
||||
iface=$(echo "$ifaces" | sed -n "${idx}p")
|
||||
if [ -z "$iface" ]; then
|
||||
echo "Invalid interface selection"
|
||||
return 1
|
||||
fi
|
||||
CHOSEN_IFACE="$iface"
|
||||
return 0
|
||||
}
|
||||
|
||||
network_dhcp_one() {
|
||||
header
|
||||
echo "DHCP on one interface"
|
||||
echo
|
||||
choose_interface || { pause; return; }
|
||||
|
||||
iface="$CHOSEN_IFACE"
|
||||
echo
|
||||
echo "Starting DHCP on $iface..."
|
||||
ip link set "$iface" up 2>/dev/null || true
|
||||
udhcpc -i "$iface" -t 5 -T 3
|
||||
pause
|
||||
}
|
||||
|
||||
network_dhcp_all() {
|
||||
header
|
||||
echo "Restarting DHCP on all physical interfaces..."
|
||||
echo
|
||||
/usr/local/bin/bee-net-restart
|
||||
pause
|
||||
}
|
||||
|
||||
network_static_one() {
|
||||
header
|
||||
echo "Static IPv4 setup"
|
||||
echo
|
||||
choose_interface || { pause; return; }
|
||||
|
||||
iface="$CHOSEN_IFACE"
|
||||
echo
|
||||
printf 'IPv4 address (example 192.168.1.10): '
|
||||
read -r ip
|
||||
if [ -z "$ip" ]; then
|
||||
echo "IP address is required"
|
||||
pause
|
||||
return
|
||||
fi
|
||||
|
||||
printf 'Netmask (example 24 or 255.255.255.0): '
|
||||
read -r mask
|
||||
if [ -z "$mask" ]; then
|
||||
echo "Netmask is required"
|
||||
pause
|
||||
return
|
||||
fi
|
||||
prefix=$(mask_to_prefix "$mask")
|
||||
if [ -z "$prefix" ]; then
|
||||
echo "Invalid netmask: $mask"
|
||||
pause
|
||||
return
|
||||
fi
|
||||
cidr="$ip/$prefix"
|
||||
|
||||
printf 'Default gateway: '
|
||||
read -r gw
|
||||
if [ -z "$gw" ]; then
|
||||
echo "Default gateway is required"
|
||||
pause
|
||||
return
|
||||
fi
|
||||
printf 'DNS server (optional): '
|
||||
read -r dns
|
||||
|
||||
ip link set "$iface" up 2>/dev/null || true
|
||||
ip addr flush dev "$iface"
|
||||
if ! ip addr add "$cidr" dev "$iface"; then
|
||||
echo "Failed to set IP"
|
||||
pause
|
||||
return
|
||||
fi
|
||||
|
||||
if [ -n "$gw" ]; then
|
||||
ip route del default >/dev/null 2>&1 || true
|
||||
ip route add default via "$gw" dev "$iface"
|
||||
fi
|
||||
|
||||
if [ -n "$dns" ]; then
|
||||
printf 'nameserver %s\n' "$dns" > /etc/resolv.conf
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "Static config applied to $iface"
|
||||
pause
|
||||
}
|
||||
|
||||
mask_to_prefix() {
|
||||
mask="$(echo "$1" | tr -d '[:space:]')"
|
||||
case "$mask" in
|
||||
0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32)
|
||||
echo "$mask"
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
case "$mask" in
|
||||
255.0.0.0) echo 8 ;;
|
||||
255.128.0.0) echo 9 ;;
|
||||
255.192.0.0) echo 10 ;;
|
||||
255.224.0.0) echo 11 ;;
|
||||
255.240.0.0) echo 12 ;;
|
||||
255.248.0.0) echo 13 ;;
|
||||
255.252.0.0) echo 14 ;;
|
||||
255.254.0.0) echo 15 ;;
|
||||
255.255.0.0) echo 16 ;;
|
||||
255.255.128.0) echo 17 ;;
|
||||
255.255.192.0) echo 18 ;;
|
||||
255.255.224.0) echo 19 ;;
|
||||
255.255.240.0) echo 20 ;;
|
||||
255.255.248.0) echo 21 ;;
|
||||
255.255.252.0) echo 22 ;;
|
||||
255.255.254.0) echo 23 ;;
|
||||
255.255.255.0) echo 24 ;;
|
||||
255.255.255.128) echo 25 ;;
|
||||
255.255.255.192) echo 26 ;;
|
||||
255.255.255.224) echo 27 ;;
|
||||
255.255.255.240) echo 28 ;;
|
||||
255.255.255.248) echo 29 ;;
|
||||
255.255.255.252) echo 30 ;;
|
||||
255.255.255.254) echo 31 ;;
|
||||
255.255.255.255) echo 32 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
network_menu() {
|
||||
while true; do
|
||||
header
|
||||
echo "Network"
|
||||
echo "1. Show network status"
|
||||
echo "2. DHCP on all interfaces"
|
||||
echo "3. DHCP on one interface"
|
||||
echo "4. Set static IPv4 on one interface"
|
||||
echo "5. Back"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r choice
|
||||
|
||||
case "$choice" in
|
||||
1) show_network_status ;;
|
||||
2) network_dhcp_all ;;
|
||||
3) network_dhcp_one ;;
|
||||
4) network_static_one ;;
|
||||
5) return ;;
|
||||
*) echo "Invalid choice"; pause ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
bee_services_list() {
|
||||
for path in /etc/init.d/bee-*; do
|
||||
[ -e "$path" ] || continue
|
||||
basename "$path"
|
||||
done
|
||||
}
|
||||
|
||||
services_status_all() {
|
||||
header
|
||||
echo "bee service status"
|
||||
echo
|
||||
for svc in $(bee_services_list); do
|
||||
if rc-service "$svc" status >/dev/null 2>&1; then
|
||||
echo "- $svc: running"
|
||||
else
|
||||
echo "- $svc: stopped"
|
||||
fi
|
||||
done
|
||||
pause
|
||||
}
|
||||
|
||||
choose_service() {
|
||||
svcs="$(bee_services_list)"
|
||||
if [ -z "$svcs" ]; then
|
||||
echo "No bee-* services found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "$svcs" | nl -w2 -s'. '
|
||||
echo
|
||||
printf 'Select service number: '
|
||||
read -r idx
|
||||
svc=$(echo "$svcs" | sed -n "${idx}p")
|
||||
if [ -z "$svc" ]; then
|
||||
echo "Invalid service selection"
|
||||
return 1
|
||||
fi
|
||||
CHOSEN_SERVICE="$svc"
|
||||
return 0
|
||||
}
|
||||
|
||||
service_action_menu() {
|
||||
header
|
||||
echo "Service action"
|
||||
echo
|
||||
choose_service || { pause; return; }
|
||||
svc="$CHOSEN_SERVICE"
|
||||
|
||||
echo
|
||||
echo "Selected: $svc"
|
||||
echo "1. status"
|
||||
echo "2. restart"
|
||||
echo "3. start"
|
||||
echo "4. stop"
|
||||
echo "5. toggle start/stop"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r act
|
||||
|
||||
case "$act" in
|
||||
1)
|
||||
rc-service "$svc" status || true
|
||||
;;
|
||||
2)
|
||||
rc-service "$svc" restart || true
|
||||
;;
|
||||
3)
|
||||
rc-service "$svc" start || true
|
||||
;;
|
||||
4)
|
||||
rc-service "$svc" stop || true
|
||||
;;
|
||||
5)
|
||||
if rc-service "$svc" status >/dev/null 2>&1; then
|
||||
rc-service "$svc" stop || true
|
||||
else
|
||||
rc-service "$svc" start || true
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "Invalid action"
|
||||
;;
|
||||
esac
|
||||
pause
|
||||
}
|
||||
|
||||
services_menu() {
|
||||
while true; do
|
||||
header
|
||||
echo "bee Services"
|
||||
echo "1. Status of all bee-* services"
|
||||
echo "2. Manage one service (status/restart/start/stop/toggle)"
|
||||
echo "3. Back"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r choice
|
||||
|
||||
case "$choice" in
|
||||
1) services_status_all ;;
|
||||
2) service_action_menu ;;
|
||||
3) return ;;
|
||||
*) echo "Invalid choice"; pause ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
confirm_phrase() {
|
||||
phrase="$1"
|
||||
prompt="$2"
|
||||
echo
|
||||
printf '%s (%s): ' "$prompt" "$phrase"
|
||||
read -r value
|
||||
[ "$value" = "$phrase" ]
|
||||
}
|
||||
|
||||
shutdown_menu() {
|
||||
while true; do
|
||||
header
|
||||
echo "Shutdown/Reboot Tests"
|
||||
echo "1. Reboot now"
|
||||
echo "2. Power off now"
|
||||
echo "3. Schedule poweroff in 60s"
|
||||
echo "4. Cancel scheduled shutdown"
|
||||
echo "5. IPMI chassis power status"
|
||||
echo "6. IPMI chassis power soft"
|
||||
echo "7. IPMI chassis power cycle"
|
||||
echo "8. Back"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r choice
|
||||
|
||||
case "$choice" in
|
||||
1)
|
||||
confirm_phrase "REBOOT" "Type confirmation" || { echo "Canceled"; pause; continue; }
|
||||
reboot
|
||||
;;
|
||||
2)
|
||||
confirm_phrase "POWEROFF" "Type confirmation" || { echo "Canceled"; pause; continue; }
|
||||
poweroff
|
||||
;;
|
||||
3)
|
||||
confirm_phrase "SCHEDULE" "Type confirmation" || { echo "Canceled"; pause; continue; }
|
||||
shutdown -P +1 "bee test: scheduled poweroff in 60 seconds"
|
||||
echo "Scheduled"
|
||||
pause
|
||||
;;
|
||||
4)
|
||||
shutdown -c || true
|
||||
echo "Canceled (if any schedule existed)"
|
||||
pause
|
||||
;;
|
||||
5)
|
||||
ipmitool chassis power status || echo "ipmitool power status failed"
|
||||
pause
|
||||
;;
|
||||
6)
|
||||
confirm_phrase "IPMI-SOFT" "Type confirmation" || { echo "Canceled"; pause; continue; }
|
||||
ipmitool chassis power soft || echo "ipmitool soft power failed"
|
||||
pause
|
||||
;;
|
||||
7)
|
||||
confirm_phrase "IPMI-CYCLE" "Type confirmation" || { echo "Canceled"; pause; continue; }
|
||||
ipmitool chassis power cycle || echo "ipmitool power cycle failed"
|
||||
pause
|
||||
;;
|
||||
8)
|
||||
return
|
||||
;;
|
||||
*)
|
||||
echo "Invalid choice"
|
||||
pause
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
gpu_burn_10m() {
|
||||
header
|
||||
echo "GPU Burn (10 minutes)"
|
||||
echo
|
||||
if ! command -v gpu_burn >/dev/null 2>&1; then
|
||||
echo "gpu_burn binary not found in PATH"
|
||||
echo "Expected command: gpu_burn"
|
||||
pause
|
||||
return
|
||||
fi
|
||||
if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi -L >/dev/null 2>&1; then
|
||||
echo "NVIDIA driver/GPU not ready (nvidia-smi failed)"
|
||||
pause
|
||||
return
|
||||
fi
|
||||
|
||||
confirm_phrase "GPU-BURN" "Type confirmation to start benchmark" || { echo "Canceled"; pause; return; }
|
||||
echo "Running: gpu_burn 600"
|
||||
echo "Log: /var/log/bee-gpuburn.log"
|
||||
gpu_burn 600 2>&1 | tee /var/log/bee-gpuburn.log
|
||||
echo
|
||||
echo "GPU Burn finished"
|
||||
pause
|
||||
}
|
||||
|
||||
gpu_benchmarks_menu() {
|
||||
while true; do
|
||||
header
|
||||
echo "Benchmarks -> GPU"
|
||||
echo "1. GPU Burn (10 minutes)"
|
||||
echo "2. Back"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r choice
|
||||
|
||||
case "$choice" in
|
||||
1) gpu_burn_10m ;;
|
||||
2) return ;;
|
||||
*) echo "Invalid choice"; pause ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
benchmarks_menu() {
|
||||
while true; do
|
||||
header
|
||||
echo "Benchmarks"
|
||||
echo "1. GPU"
|
||||
echo "2. Back"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r choice
|
||||
|
||||
case "$choice" in
|
||||
1) gpu_benchmarks_menu ;;
|
||||
2) return ;;
|
||||
*) echo "Invalid choice"; pause ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
run_cmd_log() {
|
||||
label="$1"
|
||||
cmd="$2"
|
||||
log_file="$3"
|
||||
|
||||
{
|
||||
echo "=== $label ==="
|
||||
echo "time: $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
|
||||
echo "cmd: $cmd"
|
||||
echo
|
||||
sh -c "$cmd"
|
||||
} >"$log_file" 2>&1
|
||||
return $?
|
||||
}
|
||||
|
||||
run_gpu_nvidia_acceptance_test() {
|
||||
header
|
||||
echo "System acceptance tests -> GPU NVIDIA"
|
||||
echo
|
||||
confirm_phrase "SAT-GPU" "Type confirmation to start tests" || { echo "Canceled"; pause; return; }
|
||||
|
||||
ts="$(date -u '+%Y%m%d-%H%M%S')"
|
||||
base_dir="/var/log/bee-sat"
|
||||
run_dir="$base_dir/gpu-nvidia-$ts"
|
||||
archive="$base_dir/gpu-nvidia-$ts.tar.gz"
|
||||
mkdir -p "$run_dir"
|
||||
|
||||
summary="$run_dir/summary.txt"
|
||||
: >"$summary"
|
||||
|
||||
echo "Running acceptance commands..."
|
||||
echo "Logs directory: $run_dir"
|
||||
echo "Archive target: $archive"
|
||||
echo
|
||||
|
||||
c1="nvidia-smi -q"
|
||||
c2="dmidecode -t baseboard"
|
||||
c3="dmidecode -t system"
|
||||
c4="nvidia-bug-report.sh"
|
||||
|
||||
run_cmd_log "nvidia_smi_q" "$c1" "$run_dir/01-nvidia-smi-q.log"; rc1=$?
|
||||
run_cmd_log "dmidecode_baseboard" "$c2" "$run_dir/02-dmidecode-baseboard.log"; rc2=$?
|
||||
run_cmd_log "dmidecode_system" "$c3" "$run_dir/03-dmidecode-system.log"; rc3=$?
|
||||
run_cmd_log "nvidia_bug_report" "$c4" "$run_dir/04-nvidia-bug-report.log"; rc4=$?
|
||||
|
||||
# Collect any bug report artifact generated in cwd.
|
||||
bug_report="$(ls -1 nvidia-bug-report.log.gz 2>/dev/null | head -n1 || true)"
|
||||
if [ -n "$bug_report" ] && [ -f "$bug_report" ]; then
|
||||
cp -f "$bug_report" "$run_dir/"
|
||||
fi
|
||||
|
||||
{
|
||||
echo "run_at_utc=$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
|
||||
echo "cmd_nvidia_smi_q_rc=$rc1"
|
||||
echo "cmd_dmidecode_baseboard_rc=$rc2"
|
||||
echo "cmd_dmidecode_system_rc=$rc3"
|
||||
echo "cmd_nvidia_bug_report_rc=$rc4"
|
||||
} >>"$summary"
|
||||
|
||||
tar -czf "$archive" -C "$base_dir" "gpu-nvidia-$ts"
|
||||
tar_rc=$?
|
||||
echo "archive_rc=$tar_rc" >>"$summary"
|
||||
|
||||
echo
|
||||
echo "Done."
|
||||
echo "- Logs: $run_dir"
|
||||
echo "- Archive: $archive (rc=$tar_rc)"
|
||||
pause
|
||||
}
|
||||
|
||||
gpu_nvidia_sat_menu() {
|
||||
while true; do
|
||||
header
|
||||
echo "System acceptance tests -> GPU NVIDIA"
|
||||
echo "1. Run command pack"
|
||||
echo "2. Back"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r choice
|
||||
|
||||
case "$choice" in
|
||||
1) run_gpu_nvidia_acceptance_test ;;
|
||||
2) return ;;
|
||||
*) echo "Invalid choice"; pause ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
system_acceptance_tests_menu() {
|
||||
while true; do
|
||||
header
|
||||
echo "System acceptance tests"
|
||||
echo "1. GPU NVIDIA"
|
||||
echo "2. Back"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r choice
|
||||
|
||||
case "$choice" in
|
||||
1) gpu_nvidia_sat_menu ;;
|
||||
2) return ;;
|
||||
*) echo "Invalid choice"; pause ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
run_audit_now() {
|
||||
header
|
||||
echo "Run audit now"
|
||||
echo
|
||||
/usr/local/bin/audit --output stdout > /var/log/bee-audit.json 2>/var/log/bee-audit.log
|
||||
rc=$?
|
||||
if [ "$rc" -eq 0 ]; then
|
||||
echo "Audit completed successfully"
|
||||
else
|
||||
echo "Audit finished with errors (rc=$rc)"
|
||||
fi
|
||||
echo "Logs: /var/log/bee-audit.log, /var/log/bee-audit.json"
|
||||
pause
|
||||
}
|
||||
|
||||
check_required_tools() {
|
||||
header
|
||||
echo "Required tools check"
|
||||
echo
|
||||
for tool in dmidecode smartctl nvme ipmitool lspci audit nvidia-smi gpu_burn; do
|
||||
if command -v "$tool" >/dev/null 2>&1; then
|
||||
echo "- $tool: OK ($(command -v "$tool"))"
|
||||
else
|
||||
echo "- $tool: MISSING"
|
||||
fi
|
||||
done
|
||||
pause
|
||||
}
|
||||
|
||||
main_menu() {
|
||||
while true; do
|
||||
header
|
||||
echo "Main Menu"
|
||||
echo "1. Network setup"
|
||||
echo "2. bee service management"
|
||||
echo "3. Shutdown/reboot tests"
|
||||
echo "4. Benchmarks"
|
||||
echo "5. System acceptance tests"
|
||||
echo "6. Run audit now"
|
||||
echo "7. Check required tools"
|
||||
echo "8. Show last audit log tail"
|
||||
echo "9. Exit to console"
|
||||
echo
|
||||
printf 'Choice: '
|
||||
read -r choice
|
||||
|
||||
case "$choice" in
|
||||
1) network_menu ;;
|
||||
2) services_menu ;;
|
||||
3) shutdown_menu ;;
|
||||
4) benchmarks_menu ;;
|
||||
5) system_acceptance_tests_menu ;;
|
||||
6) run_audit_now ;;
|
||||
7) check_required_tools ;;
|
||||
8)
|
||||
header
|
||||
tail -n 40 /var/log/bee-audit.log 2>/dev/null || echo "No /var/log/bee-audit.log"
|
||||
echo
|
||||
tail -n 20 /var/log/bee-audit.json 2>/dev/null || true
|
||||
pause
|
||||
;;
|
||||
9) exit 0 ;;
|
||||
*) echo "Invalid choice"; pause ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
main_menu
|
||||
20
iso/overlay/etc/init.d/bee-audit
Executable file
20
iso/overlay/etc/init.d/bee-audit
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/sbin/openrc-run
|
||||
|
||||
description="Bee: run hardware audit (production unattended mode)"
|
||||
|
||||
depend() {
|
||||
need localmount
|
||||
after bee-update bee-nvidia
|
||||
}
|
||||
|
||||
start() {
|
||||
ebegin "Running hardware audit"
|
||||
/usr/local/bin/audit --output usb > /var/log/bee-audit.json 2>/var/log/bee-audit.log
|
||||
rc=$?
|
||||
if [ "$rc" -eq 0 ]; then
|
||||
einfo "Audit complete"
|
||||
else
|
||||
ewarn "Audit finished with errors"
|
||||
fi
|
||||
eend 0
|
||||
}
|
||||
14
iso/overlay/etc/init.d/bee-network
Executable file
14
iso/overlay/etc/init.d/bee-network
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/sbin/openrc-run
|
||||
|
||||
description="Bee: bring up network interfaces via DHCP"
|
||||
|
||||
depend() {
|
||||
need localmount
|
||||
before bee-update bee-audit
|
||||
}
|
||||
|
||||
start() {
|
||||
ebegin "Bringing up network interfaces"
|
||||
/usr/local/bin/bee-network.sh >> /var/log/bee-network.log 2>&1
|
||||
eend 0
|
||||
}
|
||||
23
iso/overlay/etc/init.d/bee-nvidia
Executable file
23
iso/overlay/etc/init.d/bee-nvidia
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/sbin/openrc-run
|
||||
|
||||
description="Bee: load NVIDIA kernel modules"
|
||||
|
||||
depend() {
|
||||
need localmount
|
||||
before bee-audit
|
||||
}
|
||||
|
||||
start() {
|
||||
ebegin "Loading NVIDIA modules"
|
||||
depmod -a 2>/dev/null || true
|
||||
|
||||
for mod in nvidia nvidia-modeset nvidia-uvm; do
|
||||
if modprobe "$mod" 2>/dev/null; then
|
||||
einfo "loaded: $mod"
|
||||
else
|
||||
ewarn "failed to load: $mod"
|
||||
fi
|
||||
done
|
||||
|
||||
eend 0
|
||||
}
|
||||
15
iso/overlay/etc/init.d/bee-update
Executable file
15
iso/overlay/etc/init.d/bee-update
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/sbin/openrc-run
|
||||
|
||||
description="Bee: update audit binary from USB/network"
|
||||
|
||||
depend() {
|
||||
need localmount
|
||||
after bee-network
|
||||
before bee-audit
|
||||
}
|
||||
|
||||
start() {
|
||||
ebegin "Checking for audit binary update"
|
||||
/usr/local/bin/bee-update.sh >> /var/log/bee-update.log 2>&1
|
||||
eend 0
|
||||
}
|
||||
8
iso/overlay/etc/motd
Normal file
8
iso/overlay/etc/motd
Normal file
@@ -0,0 +1,8 @@
|
||||
Bee Hardware Audit LiveCD
|
||||
|
||||
Mode: Production unattended
|
||||
Logs:
|
||||
/var/log/bee-network.log
|
||||
/var/log/bee-update.log
|
||||
/var/log/bee-audit.log
|
||||
/var/log/bee-audit.json
|
||||
1
iso/overlay/etc/profile.d/bee.sh
Normal file
1
iso/overlay/etc/profile.d/bee.sh
Normal file
@@ -0,0 +1 @@
|
||||
export PATH="$PATH:/usr/local/bin"
|
||||
24
iso/overlay/usr/local/bin/bee-network.sh
Executable file
24
iso/overlay/usr/local/bin/bee-network.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/bin/sh
|
||||
# bee-network.sh — bring up all physical interfaces via DHCP (non-blocking)
|
||||
|
||||
LOG_PREFIX="bee-network"
|
||||
log() { echo "[$LOG_PREFIX] $*"; }
|
||||
|
||||
interfaces=$(ip -o link show \
|
||||
| awk -F': ' '{print $2}' \
|
||||
| grep -v '^lo$' \
|
||||
| grep -vE '^(docker|virbr|veth|tun|tap|br-|bond|dummy)' \
|
||||
| sort)
|
||||
|
||||
if [ -z "$interfaces" ]; then
|
||||
log "no physical interfaces found"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
for iface in $interfaces; do
|
||||
ip link set "$iface" up 2>/dev/null || { log "WARN: failed to bring up $iface"; continue; }
|
||||
udhcpc -i "$iface" -b -t 0 -T 3 >/dev/null 2>&1 &
|
||||
log "dhcp started for $iface"
|
||||
done
|
||||
|
||||
log "done"
|
||||
108
iso/overlay/usr/local/bin/bee-update.sh
Executable file
108
iso/overlay/usr/local/bin/bee-update.sh
Executable file
@@ -0,0 +1,108 @@
|
||||
#!/bin/sh
|
||||
# bee-update.sh — production update path: USB first, then network.
|
||||
# Unattended: logs only, never blocks boot.
|
||||
|
||||
set -u
|
||||
|
||||
LOG_PREFIX="bee-update"
|
||||
log() { echo "[$LOG_PREFIX] $*"; }
|
||||
|
||||
AUDIT_BIN="/usr/local/bin/audit"
|
||||
TMP_BIN="/tmp/bee-audit-new"
|
||||
TMP_SIG="/tmp/bee-audit-new.sig"
|
||||
REPO_API="${BEE_RELEASE_API:-https://git.mchus.pro/api/v1/repos/<org>/bee/releases/latest}"
|
||||
|
||||
version_of() {
|
||||
"$1" --version 2>/dev/null | head -n1 | tr -d '[:space:]'
|
||||
}
|
||||
|
||||
apply_update() {
|
||||
src_bin="$1"
|
||||
src_sig="$2"
|
||||
src_ver="$3"
|
||||
|
||||
if [ ! -x "$src_bin" ] || [ ! -f "$src_sig" ]; then
|
||||
log "missing binary or signature"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# NOTE: strict signature verification should be implemented in audit updater module.
|
||||
# Here we keep shell side minimal and fail-open for now.
|
||||
cp "$src_bin" "$AUDIT_BIN" || return 1
|
||||
chmod +x "$AUDIT_BIN" || return 1
|
||||
log "updated audit binary to $src_ver"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_usb_update() {
|
||||
for root in /media/* /mnt/* /tmp/bee-usb /run/media/*/*; do
|
||||
[ -d "$root" ] || continue
|
||||
base="$root/bee-update"
|
||||
bin="$base/bee-audit-linux-amd64"
|
||||
sig="$base/bee-audit-linux-amd64.sig"
|
||||
ver_file="$base/VERSION"
|
||||
[ -f "$bin" ] || continue
|
||||
[ -f "$sig" ] || continue
|
||||
[ -f "$ver_file" ] || continue
|
||||
|
||||
new_ver=$(cat "$ver_file" 2>/dev/null | tr -d '[:space:]')
|
||||
cur_ver=$(version_of "$AUDIT_BIN")
|
||||
[ -n "$new_ver" ] || continue
|
||||
if [ "$new_ver" = "$cur_ver" ]; then
|
||||
log "usb update found but version is same ($new_ver)"
|
||||
return 0
|
||||
fi
|
||||
log "usb update candidate: $new_ver"
|
||||
apply_update "$bin" "$sig" "$new_ver" && return 0
|
||||
return 1
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
check_network_update() {
|
||||
if ! ping -c 1 -W 3 git.mchus.pro >/dev/null 2>&1; then
|
||||
log "network unavailable; skip release check"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! command -v wget >/dev/null 2>&1; then
|
||||
log "wget not found; skip network update"
|
||||
return 1
|
||||
fi
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
log "jq not found; skip network update"
|
||||
return 1
|
||||
fi
|
||||
|
||||
meta="/tmp/bee-release-latest.json"
|
||||
wget -q -O "$meta" "$REPO_API" || { log "failed to fetch release metadata"; return 1; }
|
||||
|
||||
tag=$(jq -r '.tag_name // empty' "$meta")
|
||||
[ -n "$tag" ] || { log "release metadata missing tag_name"; return 1; }
|
||||
|
||||
cur_ver=$(version_of "$AUDIT_BIN")
|
||||
if [ "$tag" = "$cur_ver" ]; then
|
||||
log "already latest ($tag)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
bin_url=$(jq -r '.assets[]? | select(.name=="bee-audit-linux-amd64") | .browser_download_url // empty' "$meta")
|
||||
sig_url=$(jq -r '.assets[]? | select(.name=="bee-audit-linux-amd64.sig") | .browser_download_url // empty' "$meta")
|
||||
[ -n "$bin_url" ] && [ -n "$sig_url" ] || { log "missing release asset URLs"; return 1; }
|
||||
|
||||
wget -q -O "$TMP_BIN" "$bin_url" || return 1
|
||||
wget -q -O "$TMP_SIG" "$sig_url" || return 1
|
||||
chmod +x "$TMP_BIN"
|
||||
|
||||
log "network update candidate: $tag"
|
||||
apply_update "$TMP_BIN" "$TMP_SIG" "$tag"
|
||||
}
|
||||
|
||||
main() {
|
||||
if check_usb_update; then
|
||||
exit 0
|
||||
fi
|
||||
check_network_update || true
|
||||
}
|
||||
|
||||
main "$@"
|
||||
0
iso/vendor/.gitkeep
vendored
Normal file
0
iso/vendor/.gitkeep
vendored
Normal file
60
scripts/fetch-vendor.sh
Executable file
60
scripts/fetch-vendor.sh
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/sh
|
||||
# fetch-vendor.sh — download proprietary vendor utilities into iso/vendor.
|
||||
#
|
||||
# Usage:
|
||||
# STORCLI_URL=... STORCLI_SHA256=... \
|
||||
# SAS2IRCU_URL=... SAS2IRCU_SHA256=... \
|
||||
# SAS3IRCU_URL=... SAS3IRCU_SHA256=... \
|
||||
# MSTFLINT_URL=... MSTFLINT_SHA256=... \
|
||||
# sh scripts/fetch-vendor.sh
|
||||
|
||||
set -eu
|
||||
|
||||
ROOT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)
|
||||
OUT_DIR="$ROOT_DIR/iso/vendor"
|
||||
mkdir -p "$OUT_DIR"
|
||||
|
||||
need_cmd() {
|
||||
command -v "$1" >/dev/null 2>&1 || { echo "ERROR: required command not found: $1" >&2; exit 1; }
|
||||
}
|
||||
|
||||
need_cmd wget
|
||||
need_cmd sha256sum
|
||||
|
||||
fetch_one() {
|
||||
name="$1"
|
||||
url="$2"
|
||||
sha="$3"
|
||||
|
||||
if [ -z "$url" ] || [ -z "$sha" ]; then
|
||||
echo "[vendor] skip $name (URL/SHA not provided)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
dst="$OUT_DIR/$name"
|
||||
tmp="$dst.tmp"
|
||||
|
||||
echo "[vendor] downloading $name"
|
||||
wget -O "$tmp" "$url"
|
||||
|
||||
got=$(sha256sum "$tmp" | awk '{print $1}')
|
||||
want=$(echo "$sha" | tr '[:upper:]' '[:lower:]')
|
||||
if [ "$got" != "$want" ]; then
|
||||
rm -f "$tmp"
|
||||
echo "ERROR: checksum mismatch for $name" >&2
|
||||
echo " got: $got" >&2
|
||||
echo " want: $want" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mv "$tmp" "$dst"
|
||||
chmod +x "$dst" || true
|
||||
echo "[vendor] ok: $name"
|
||||
}
|
||||
|
||||
fetch_one "storcli64" "${STORCLI_URL:-}" "${STORCLI_SHA256:-}"
|
||||
fetch_one "sas2ircu" "${SAS2IRCU_URL:-}" "${SAS2IRCU_SHA256:-}"
|
||||
fetch_one "sas3ircu" "${SAS3IRCU_URL:-}" "${SAS3IRCU_SHA256:-}"
|
||||
fetch_one "mstflint" "${MSTFLINT_URL:-}" "${MSTFLINT_SHA256:-}"
|
||||
|
||||
echo "[vendor] done. output dir: $OUT_DIR"
|
||||
81
scripts/test-local.sh
Executable file
81
scripts/test-local.sh
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/bin/sh
|
||||
# Local integration test for bee audit binary (plan step 1.12).
|
||||
# Runs audit on current machine and validates required JSON fields.
|
||||
|
||||
set -eu
|
||||
|
||||
ROOT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)
|
||||
OUT_FILE="${1:-/tmp/bee-audit-local-$(date +%Y%m%d-%H%M%S).json}"
|
||||
|
||||
if [ "$(uname -s)" != "Linux" ]; then
|
||||
echo "ERROR: scripts/test-local.sh must run on Linux (current: $(uname -s))" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v go >/dev/null 2>&1; then
|
||||
echo "ERROR: go not found in PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[test-local] running audit -> $OUT_FILE"
|
||||
(
|
||||
cd "$ROOT_DIR/audit"
|
||||
go run ./cmd/audit --output "file:$OUT_FILE"
|
||||
)
|
||||
|
||||
if [ ! -s "$OUT_FILE" ]; then
|
||||
echo "ERROR: audit output file is missing or empty: $OUT_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
python3 - "$OUT_FILE" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
|
||||
path = sys.argv[1]
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
errors = []
|
||||
|
||||
def require_nonempty_string(v, name):
|
||||
if not isinstance(v, str) or not v.strip():
|
||||
errors.append(f"{name} must be a non-empty string")
|
||||
|
||||
require_nonempty_string(data.get("collected_at"), "collected_at")
|
||||
require_nonempty_string(data.get("source_type"), "source_type")
|
||||
require_nonempty_string(data.get("protocol"), "protocol")
|
||||
|
||||
hw = data.get("hardware")
|
||||
if not isinstance(hw, dict):
|
||||
errors.append("hardware must be an object")
|
||||
hw = {}
|
||||
|
||||
board = hw.get("board")
|
||||
if not isinstance(board, dict):
|
||||
errors.append("hardware.board must be an object")
|
||||
board = {}
|
||||
|
||||
require_nonempty_string(board.get("serial_number"), "hardware.board.serial_number")
|
||||
|
||||
cpus = hw.get("cpus")
|
||||
if not isinstance(cpus, list) or len(cpus) == 0:
|
||||
errors.append("hardware.cpus must be a non-empty array")
|
||||
|
||||
if errors:
|
||||
print("[test-local] validation FAILED")
|
||||
for e in errors:
|
||||
print(" -", e)
|
||||
sys.exit(1)
|
||||
|
||||
memory = hw.get("memory") if isinstance(hw.get("memory"), list) else []
|
||||
storage = hw.get("storage") if isinstance(hw.get("storage"), list) else []
|
||||
pcie = hw.get("pcie_devices") if isinstance(hw.get("pcie_devices"), list) else []
|
||||
psu = hw.get("power_supplies") if isinstance(hw.get("power_supplies"), list) else []
|
||||
|
||||
print("[test-local] validation OK")
|
||||
print(f"[test-local] board.serial_number={board.get('serial_number')}")
|
||||
print(f"[test-local] counts: cpus={len(cpus)} memory={len(memory)} storage={len(storage)} pcie={len(pcie)} psu={len(psu)}")
|
||||
PY
|
||||
|
||||
echo "[test-local] done"
|
||||
Reference in New Issue
Block a user