Add health verdicts and acceptance tests
This commit is contained in:
10
PLAN.md
10
PLAN.md
@@ -23,8 +23,10 @@ Fills the gaps where logpile/Redfish is blind: NVMe, DIMM serials, GPU serials,
|
|||||||
- 1.7 PSU collector — **DONE (basic FRU path)**
|
- 1.7 PSU collector — **DONE (basic FRU path)**
|
||||||
- 1.8 NVIDIA GPU enrichment — **DONE**
|
- 1.8 NVIDIA GPU enrichment — **DONE**
|
||||||
- 1.8b Component wear / age telemetry — **DONE** (storage + NVMe + NVIDIA + NIC SFP/DOM + NIC packet stats)
|
- 1.8b Component wear / age telemetry — **DONE** (storage + NVMe + NVIDIA + NIC SFP/DOM + NIC packet stats)
|
||||||
|
- 1.8c Storage health verdicts — **DONE** (SMART/NVMe warning/failed status derivation)
|
||||||
- 1.9 Mellanox/NVIDIA NIC enrichment — **DONE** (mstflint + ethtool firmware fallback)
|
- 1.9 Mellanox/NVIDIA NIC enrichment — **DONE** (mstflint + ethtool firmware fallback)
|
||||||
- 1.10 RAID controller enrichment — **DONE (initial multi-tool support)** (storcli + sas2/3ircu + arcconf + ssacli + VROC/mdstat)
|
- 1.10 RAID controller enrichment — **DONE (initial multi-tool support)** (storcli + sas2/3ircu + arcconf + ssacli + VROC/mdstat)
|
||||||
|
- 1.11 PSU SDR health — **DONE** (`ipmitool sdr` merged with FRU inventory)
|
||||||
- 1.11 Output and export workflow — **DONE** (explicit file output + manual removable export via TUI)
|
- 1.11 Output and export workflow — **DONE** (explicit file output + manual removable export via TUI)
|
||||||
- 1.12 Integration test (local) — **DONE** (`scripts/test-local.sh`)
|
- 1.12 Integration test (local) — **DONE** (`scripts/test-local.sh`)
|
||||||
|
|
||||||
@@ -343,6 +345,8 @@ Planned code shape:
|
|||||||
- `menu` launches the LiveCD wrapper `bee-tui`, which escalates to `root` via `sudo -n`
|
- `menu` launches the LiveCD wrapper `bee-tui`, which escalates to `root` via `sudo -n`
|
||||||
- `bee tui` can rerun the audit manually
|
- `bee tui` can rerun the audit manually
|
||||||
- `bee tui` can export the latest audit JSON to removable media
|
- `bee tui` can export the latest audit JSON to removable media
|
||||||
|
- `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
|
||||||
|
- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-stress`
|
||||||
- removable export requires explicit target selection, mount, confirmation, copy, and cleanup
|
- removable export requires explicit target selection, mount, confirmation, copy, and cleanup
|
||||||
|
|
||||||
### 2.6 — Vendor utilities and optional assets
|
### 2.6 — Vendor utilities and optional assets
|
||||||
@@ -350,7 +354,9 @@ Planned code shape:
|
|||||||
Optional binaries live in `iso/vendor/` and are included when present:
|
Optional binaries live in `iso/vendor/` and are included when present:
|
||||||
- `storcli64`
|
- `storcli64`
|
||||||
- `sas2ircu`, `sas3ircu`
|
- `sas2ircu`, `sas3ircu`
|
||||||
- `mstflint`
|
- `arcconf`
|
||||||
|
- `ssacli`
|
||||||
|
- `mstflint` (via Debian package set)
|
||||||
|
|
||||||
Missing optional tools do not fail the build or boot.
|
Missing optional tools do not fail the build or boot.
|
||||||
|
|
||||||
@@ -405,7 +411,7 @@ No "works on my Mac" drift.
|
|||||||
2.4 NVIDIA driver build → driver compiled into overlay
|
2.4 NVIDIA driver build → driver compiled into overlay
|
||||||
2.5 network bring-up on boot → DHCP on all interfaces
|
2.5 network bring-up on boot → DHCP on all interfaces
|
||||||
2.6 systemd boot service → audit runs on boot automatically
|
2.6 systemd boot service → audit runs on boot automatically
|
||||||
2.7 vendor utilities → storcli/sas2ircu/mstflint in image
|
2.7 vendor utilities → storcli/sas2ircu/arcconf/ssacli in image
|
||||||
2.8 release workflow → versioning + release notes
|
2.8 release workflow → versioning + release notes
|
||||||
2.9 operator export flow → explicit TUI export to removable media
|
2.9 operator export flow → explicit TUI export to removable media
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -27,11 +27,14 @@ func run(args []string, stdout, stderr io.Writer) int {
|
|||||||
|
|
||||||
if len(args) == 0 {
|
if len(args) == 0 {
|
||||||
printRootUsage(stderr)
|
printRootUsage(stderr)
|
||||||
return 1
|
return 2
|
||||||
}
|
}
|
||||||
|
|
||||||
switch args[0] {
|
switch args[0] {
|
||||||
case "help", "--help", "-h":
|
case "help", "--help", "-h":
|
||||||
|
if len(args) > 1 {
|
||||||
|
return runHelp(args[1:], stdout, stderr)
|
||||||
|
}
|
||||||
printRootUsage(stdout)
|
printRootUsage(stdout)
|
||||||
return 0
|
return 0
|
||||||
case "audit":
|
case "audit":
|
||||||
@@ -48,7 +51,7 @@ func run(args []string, stdout, stderr io.Writer) int {
|
|||||||
default:
|
default:
|
||||||
fmt.Fprintf(stderr, "bee: unknown command %q\n\n", args[0])
|
fmt.Fprintf(stderr, "bee: unknown command %q\n\n", args[0])
|
||||||
printRootUsage(stderr)
|
printRootUsage(stderr)
|
||||||
return 1
|
return 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -57,8 +60,29 @@ func printRootUsage(w io.Writer) {
|
|||||||
bee audit --runtime auto|local|livecd --output stdout|file:<path>
|
bee audit --runtime auto|local|livecd --output stdout|file:<path>
|
||||||
bee tui --runtime auto|local|livecd
|
bee tui --runtime auto|local|livecd
|
||||||
bee export --target <device>
|
bee export --target <device>
|
||||||
bee sat nvidia
|
bee sat nvidia|memory|storage
|
||||||
bee version`)
|
bee version
|
||||||
|
bee help [command]`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func runHelp(args []string, stdout, stderr io.Writer) int {
|
||||||
|
switch args[0] {
|
||||||
|
case "audit":
|
||||||
|
return runAudit([]string{"--help"}, stdout, stdout)
|
||||||
|
case "tui":
|
||||||
|
return runTUI([]string{"--help"}, stdout, stdout)
|
||||||
|
case "export":
|
||||||
|
return runExport([]string{"--help"}, stdout, stdout)
|
||||||
|
case "sat":
|
||||||
|
return runSAT([]string{"--help"}, stdout, stderr)
|
||||||
|
case "version":
|
||||||
|
fmt.Fprintln(stdout, "usage: bee version")
|
||||||
|
return 0
|
||||||
|
default:
|
||||||
|
fmt.Fprintf(stderr, "bee help: unknown command %q\n\n", args[0])
|
||||||
|
printRootUsage(stderr)
|
||||||
|
return 2
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func runAudit(args []string, stdout, stderr io.Writer) int {
|
func runAudit(args []string, stdout, stderr io.Writer) int {
|
||||||
@@ -72,6 +96,13 @@ func runAudit(args []string, stdout, stderr io.Writer) int {
|
|||||||
fs.PrintDefaults()
|
fs.PrintDefaults()
|
||||||
}
|
}
|
||||||
if err := fs.Parse(args); err != nil {
|
if err := fs.Parse(args); err != nil {
|
||||||
|
if err == flag.ErrHelp {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
if fs.NArg() != 0 {
|
||||||
|
fs.Usage()
|
||||||
return 2
|
return 2
|
||||||
}
|
}
|
||||||
if *showVersion {
|
if *showVersion {
|
||||||
@@ -107,6 +138,13 @@ func runTUI(args []string, stdout, stderr io.Writer) int {
|
|||||||
fs.PrintDefaults()
|
fs.PrintDefaults()
|
||||||
}
|
}
|
||||||
if err := fs.Parse(args); err != nil {
|
if err := fs.Parse(args); err != nil {
|
||||||
|
if err == flag.ErrHelp {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
if fs.NArg() != 0 {
|
||||||
|
fs.Usage()
|
||||||
return 2
|
return 2
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -137,6 +175,13 @@ func runExport(args []string, stdout, stderr io.Writer) int {
|
|||||||
fs.PrintDefaults()
|
fs.PrintDefaults()
|
||||||
}
|
}
|
||||||
if err := fs.Parse(args); err != nil {
|
if err := fs.Parse(args); err != nil {
|
||||||
|
if err == flag.ErrHelp {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
if fs.NArg() != 0 {
|
||||||
|
fs.Usage()
|
||||||
return 2
|
return 2
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(*targetDevice) == "" {
|
if strings.TrimSpace(*targetDevice) == "" {
|
||||||
@@ -169,21 +214,44 @@ func runExport(args []string, stdout, stderr io.Writer) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func runSAT(args []string, stdout, stderr io.Writer) int {
|
func runSAT(args []string, stdout, stderr io.Writer) int {
|
||||||
if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
|
if len(args) == 0 {
|
||||||
fmt.Fprintln(stderr, "usage: bee sat nvidia")
|
fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage")
|
||||||
return 2
|
return 2
|
||||||
}
|
}
|
||||||
if args[0] != "nvidia" {
|
if args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
|
||||||
|
fmt.Fprintln(stdout, "usage: bee sat nvidia|memory|storage")
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if args[0] != "nvidia" && args[0] != "memory" && args[0] != "storage" {
|
||||||
fmt.Fprintf(stderr, "bee sat: unknown target %q\n", args[0])
|
fmt.Fprintf(stderr, "bee sat: unknown target %q\n", args[0])
|
||||||
fmt.Fprintln(stderr, "usage: bee sat nvidia")
|
fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage")
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
if len(args) > 1 {
|
||||||
|
fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage")
|
||||||
return 2
|
return 2
|
||||||
}
|
}
|
||||||
application := app.New(platform.New())
|
application := app.New(platform.New())
|
||||||
archive, err := application.RunNvidiaAcceptancePack("")
|
var (
|
||||||
|
archive string
|
||||||
|
err error
|
||||||
|
label string
|
||||||
|
)
|
||||||
|
switch args[0] {
|
||||||
|
case "nvidia":
|
||||||
|
label = "nvidia"
|
||||||
|
archive, err = application.RunNvidiaAcceptancePack("")
|
||||||
|
case "memory":
|
||||||
|
label = "memory"
|
||||||
|
archive, err = application.RunMemoryAcceptancePack("")
|
||||||
|
case "storage":
|
||||||
|
label = "storage"
|
||||||
|
archive, err = application.RunStorageAcceptancePack("")
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("run nvidia sat", "err", err)
|
slog.Error("run sat", "target", label, "err", err)
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
slog.Info("nvidia sat archive written", "path", archive)
|
slog.Info("sat archive written", "target", label, "path", archive)
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ func TestRunNoArgsPrintsUsage(t *testing.T) {
|
|||||||
|
|
||||||
var stdout, stderr bytes.Buffer
|
var stdout, stderr bytes.Buffer
|
||||||
rc := run(nil, &stdout, &stderr)
|
rc := run(nil, &stdout, &stderr)
|
||||||
if rc != 1 {
|
if rc != 2 {
|
||||||
t.Fatalf("rc=%d want 1", rc)
|
t.Fatalf("rc=%d want 2", rc)
|
||||||
}
|
}
|
||||||
if !strings.Contains(stderr.String(), "bee commands:") {
|
if !strings.Contains(stderr.String(), "bee commands:") {
|
||||||
t.Fatalf("stderr missing root usage:\n%s", stderr.String())
|
t.Fatalf("stderr missing root usage:\n%s", stderr.String())
|
||||||
@@ -37,8 +37,8 @@ func TestRunUnknownCommand(t *testing.T) {
|
|||||||
|
|
||||||
var stdout, stderr bytes.Buffer
|
var stdout, stderr bytes.Buffer
|
||||||
rc := run([]string{"wat"}, &stdout, &stderr)
|
rc := run([]string{"wat"}, &stdout, &stderr)
|
||||||
if rc != 1 {
|
if rc != 2 {
|
||||||
t.Fatalf("rc=%d want 1", rc)
|
t.Fatalf("rc=%d want 2", rc)
|
||||||
}
|
}
|
||||||
if !strings.Contains(stderr.String(), `unknown command "wat"`) {
|
if !strings.Contains(stderr.String(), `unknown command "wat"`) {
|
||||||
t.Fatalf("stderr missing unknown command message:\n%s", stderr.String())
|
t.Fatalf("stderr missing unknown command message:\n%s", stderr.String())
|
||||||
@@ -86,11 +86,37 @@ func TestRunSATUsage(t *testing.T) {
|
|||||||
if rc != 2 {
|
if rc != 2 {
|
||||||
t.Fatalf("rc=%d want 2", rc)
|
t.Fatalf("rc=%d want 2", rc)
|
||||||
}
|
}
|
||||||
if !strings.Contains(stderr.String(), "usage: bee sat nvidia") {
|
if !strings.Contains(stderr.String(), "usage: bee sat nvidia|memory|storage") {
|
||||||
t.Fatalf("stderr missing sat usage:\n%s", stderr.String())
|
t.Fatalf("stderr missing sat usage:\n%s", stderr.String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRunHelpForSubcommand(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
rc := run([]string{"help", "export"}, &stdout, &stderr)
|
||||||
|
if rc != 0 {
|
||||||
|
t.Fatalf("rc=%d want 0", rc)
|
||||||
|
}
|
||||||
|
if !strings.Contains(stdout.String(), "usage: bee export --target <device>") {
|
||||||
|
t.Fatalf("stdout missing export help:\n%s", stdout.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunHelpUnknownSubcommand(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
rc := run([]string{"help", "wat"}, &stdout, &stderr)
|
||||||
|
if rc != 2 {
|
||||||
|
t.Fatalf("rc=%d want 2", rc)
|
||||||
|
}
|
||||||
|
if !strings.Contains(stderr.String(), `bee help: unknown command "wat"`) {
|
||||||
|
t.Fatalf("stderr missing help error:\n%s", stderr.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRunSATUnknownTarget(t *testing.T) {
|
func TestRunSATUnknownTarget(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
@@ -104,6 +130,32 @@ func TestRunSATUnknownTarget(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRunSATHelp(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
rc := run([]string{"sat", "--help"}, &stdout, &stderr)
|
||||||
|
if rc != 0 {
|
||||||
|
t.Fatalf("rc=%d want 0", rc)
|
||||||
|
}
|
||||||
|
if !strings.Contains(stdout.String(), "usage: bee sat nvidia|memory|storage") {
|
||||||
|
t.Fatalf("stdout missing sat help:\n%s", stdout.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunSATRejectsExtraArgs(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
rc := run([]string{"sat", "memory", "extra"}, &stdout, &stderr)
|
||||||
|
if rc != 2 {
|
||||||
|
t.Fatalf("rc=%d want 2", rc)
|
||||||
|
}
|
||||||
|
if !strings.Contains(stderr.String(), "usage: bee sat nvidia|memory|storage") {
|
||||||
|
t.Fatalf("stderr missing sat usage:\n%s", stderr.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRunAuditInvalidRuntime(t *testing.T) {
|
func TestRunAuditInvalidRuntime(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
@@ -113,3 +165,29 @@ func TestRunAuditInvalidRuntime(t *testing.T) {
|
|||||||
t.Fatalf("rc=%d want 1", rc)
|
t.Fatalf("rc=%d want 1", rc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRunAuditRejectsExtraArgs(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
rc := run([]string{"audit", "extra"}, &stdout, &stderr)
|
||||||
|
if rc != 2 {
|
||||||
|
t.Fatalf("rc=%d want 2", rc)
|
||||||
|
}
|
||||||
|
if !strings.Contains(stderr.String(), "usage: bee audit") {
|
||||||
|
t.Fatalf("stderr missing audit usage:\n%s", stderr.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunExportRejectsExtraArgs(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
rc := run([]string{"export", "--target", "/dev/sdb1", "extra"}, &stdout, &stderr)
|
||||||
|
if rc != 2 {
|
||||||
|
t.Fatalf("rc=%d want 2", rc)
|
||||||
|
}
|
||||||
|
if !strings.Contains(stderr.String(), "usage: bee export --target <device>") {
|
||||||
|
t.Fatalf("stderr missing export usage:\n%s", stderr.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -58,6 +59,8 @@ type toolManager interface {
|
|||||||
|
|
||||||
type satRunner interface {
|
type satRunner interface {
|
||||||
RunNvidiaAcceptancePack(baseDir string) (string, error)
|
RunNvidiaAcceptancePack(baseDir string) (string, error)
|
||||||
|
RunMemoryAcceptancePack(baseDir string) (string, error)
|
||||||
|
RunStorageAcceptancePack(baseDir string) (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(platform *platform.System) *App {
|
func New(platform *platform.System) *App {
|
||||||
@@ -124,7 +127,11 @@ func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error)
|
|||||||
|
|
||||||
func (a *App) ExportLatestAuditResult(target platform.RemovableTarget) (ActionResult, error) {
|
func (a *App) ExportLatestAuditResult(target platform.RemovableTarget) (ActionResult, error) {
|
||||||
path, err := a.ExportLatestAudit(target)
|
path, err := a.ExportLatestAudit(target)
|
||||||
return ActionResult{Title: "Export audit", Body: "Audit exported to " + path}, err
|
body := "Audit exported."
|
||||||
|
if path != "" {
|
||||||
|
body = "Audit exported to " + path
|
||||||
|
}
|
||||||
|
return ActionResult{Title: "Export audit", Body: body}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) ListInterfaces() ([]platform.InterfaceInfo, error) {
|
func (a *App) ListInterfaces() ([]platform.InterfaceInfo, error) {
|
||||||
@@ -141,7 +148,7 @@ func (a *App) DHCPOne(iface string) (string, error) {
|
|||||||
|
|
||||||
func (a *App) DHCPOneResult(iface string) (ActionResult, error) {
|
func (a *App) DHCPOneResult(iface string) (ActionResult, error) {
|
||||||
body, err := a.network.DHCPOne(iface)
|
body, err := a.network.DHCPOne(iface)
|
||||||
return ActionResult{Title: "DHCP on " + iface, Body: body}, err
|
return ActionResult{Title: "DHCP: " + iface, Body: bodyOr(body, "DHCP completed.")}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) DHCPAll() (string, error) {
|
func (a *App) DHCPAll() (string, error) {
|
||||||
@@ -150,7 +157,7 @@ func (a *App) DHCPAll() (string, error) {
|
|||||||
|
|
||||||
func (a *App) DHCPAllResult() (ActionResult, error) {
|
func (a *App) DHCPAllResult() (ActionResult, error) {
|
||||||
body, err := a.network.DHCPAll()
|
body, err := a.network.DHCPAll()
|
||||||
return ActionResult{Title: "DHCP all interfaces", Body: body}, err
|
return ActionResult{Title: "DHCP: all interfaces", Body: bodyOr(body, "DHCP completed.")}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
|
func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
|
||||||
@@ -159,7 +166,7 @@ func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
|
|||||||
|
|
||||||
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
|
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
|
||||||
body, err := a.network.SetStaticIPv4(cfg)
|
body, err := a.network.SetStaticIPv4(cfg)
|
||||||
return ActionResult{Title: "Static IPv4 on " + cfg.Interface, Body: body}, err
|
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) NetworkStatus() (ActionResult, error) {
|
func (a *App) NetworkStatus() (ActionResult, error) {
|
||||||
@@ -167,6 +174,9 @@ func (a *App) NetworkStatus() (ActionResult, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return ActionResult{Title: "Network status"}, err
|
return ActionResult{Title: "Network status"}, err
|
||||||
}
|
}
|
||||||
|
if len(ifaces) == 0 {
|
||||||
|
return ActionResult{Title: "Network status", Body: "No physical interfaces found."}, nil
|
||||||
|
}
|
||||||
var body strings.Builder
|
var body strings.Builder
|
||||||
for _, iface := range ifaces {
|
for _, iface := range ifaces {
|
||||||
ipv4 := "(no IPv4)"
|
ipv4 := "(no IPv4)"
|
||||||
@@ -216,7 +226,7 @@ func (a *App) ServiceStatus(name string) (string, error) {
|
|||||||
|
|
||||||
func (a *App) ServiceStatusResult(name string) (ActionResult, error) {
|
func (a *App) ServiceStatusResult(name string) (ActionResult, error) {
|
||||||
body, err := a.services.ServiceStatus(name)
|
body, err := a.services.ServiceStatus(name)
|
||||||
return ActionResult{Title: "service: " + name, Body: body}, err
|
return ActionResult{Title: "service status: " + name, Body: bodyOr(body, "No status output.")}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) ServiceDo(name string, action platform.ServiceAction) (string, error) {
|
func (a *App) ServiceDo(name string, action platform.ServiceAction) (string, error) {
|
||||||
@@ -225,7 +235,7 @@ func (a *App) ServiceDo(name string, action platform.ServiceAction) (string, err
|
|||||||
|
|
||||||
func (a *App) ServiceActionResult(name string, action platform.ServiceAction) (ActionResult, error) {
|
func (a *App) ServiceActionResult(name string, action platform.ServiceAction) (ActionResult, error) {
|
||||||
body, err := a.services.ServiceDo(name, action)
|
body, err := a.services.ServiceDo(name, action)
|
||||||
return ActionResult{Title: "service: " + name, Body: body}, err
|
return ActionResult{Title: "service " + string(action) + ": " + name, Body: bodyOr(body, "Action completed.")}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) ListRemovableTargets() ([]platform.RemovableTarget, error) {
|
func (a *App) ListRemovableTargets() ([]platform.RemovableTarget, error) {
|
||||||
@@ -241,6 +251,9 @@ func (a *App) CheckTools(names []string) []platform.ToolStatus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) ToolCheckResult(names []string) ActionResult {
|
func (a *App) ToolCheckResult(names []string) ActionResult {
|
||||||
|
if len(names) == 0 {
|
||||||
|
return ActionResult{Title: "Required tools", Body: "No tools checked."}
|
||||||
|
}
|
||||||
var body strings.Builder
|
var body strings.Builder
|
||||||
for _, tool := range a.tools.CheckTools(names) {
|
for _, tool := range a.tools.CheckTools(names) {
|
||||||
status := "MISSING"
|
status := "MISSING"
|
||||||
@@ -253,7 +266,12 @@ func (a *App) ToolCheckResult(names []string) ActionResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) AuditLogTailResult() ActionResult {
|
func (a *App) AuditLogTailResult() ActionResult {
|
||||||
body := a.tools.TailFile(DefaultAuditLogPath, 40) + "\n\n" + a.tools.TailFile(DefaultAuditJSONPath, 20)
|
logTail := strings.TrimSpace(a.tools.TailFile(DefaultAuditLogPath, 40))
|
||||||
|
jsonTail := strings.TrimSpace(a.tools.TailFile(DefaultAuditJSONPath, 20))
|
||||||
|
body := strings.TrimSpace(logTail + "\n\n" + jsonTail)
|
||||||
|
if body == "" {
|
||||||
|
body = "No audit logs found."
|
||||||
|
}
|
||||||
return ActionResult{Title: "Audit log tail", Body: body}
|
return ActionResult{Title: "Audit log tail", Body: body}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -263,7 +281,88 @@ func (a *App) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
|||||||
|
|
||||||
func (a *App) RunNvidiaAcceptancePackResult(baseDir string) (ActionResult, error) {
|
func (a *App) RunNvidiaAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
path, err := a.sat.RunNvidiaAcceptancePack(baseDir)
|
path, err := a.sat.RunNvidiaAcceptancePack(baseDir)
|
||||||
return ActionResult{Title: "NVIDIA SAT", Body: "Archive written to " + path}, err
|
body := "Archive written."
|
||||||
|
if path != "" {
|
||||||
|
body = "Archive written to " + path
|
||||||
|
}
|
||||||
|
return ActionResult{Title: "NVIDIA SAT", Body: body}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunMemoryAcceptancePack(baseDir string) (string, error) {
|
||||||
|
return a.sat.RunMemoryAcceptancePack(baseDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
|
path, err := a.sat.RunMemoryAcceptancePack(baseDir)
|
||||||
|
body := "Archive written."
|
||||||
|
if path != "" {
|
||||||
|
body = "Archive written to " + path
|
||||||
|
}
|
||||||
|
return ActionResult{Title: "Memory SAT", Body: body}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunStorageAcceptancePack(baseDir string) (string, error) {
|
||||||
|
return a.sat.RunStorageAcceptancePack(baseDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
|
path, err := a.sat.RunStorageAcceptancePack(baseDir)
|
||||||
|
body := "Archive written."
|
||||||
|
if path != "" {
|
||||||
|
body = "Archive written to " + path
|
||||||
|
}
|
||||||
|
return ActionResult{Title: "Storage SAT", Body: body}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) HealthSummaryResult() ActionResult {
|
||||||
|
type auditFile struct {
|
||||||
|
Hardware struct {
|
||||||
|
Summary struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
Warnings []string `json:"warnings"`
|
||||||
|
Failures []string `json:"failures"`
|
||||||
|
StorageWarn int `json:"storage_warn"`
|
||||||
|
StorageFail int `json:"storage_fail"`
|
||||||
|
PCIeWarn int `json:"pcie_warn"`
|
||||||
|
PCIeFail int `json:"pcie_fail"`
|
||||||
|
PSUWarn int `json:"psu_warn"`
|
||||||
|
PSUFail int `json:"psu_fail"`
|
||||||
|
MemoryWarn int `json:"memory_warn"`
|
||||||
|
MemoryFail int `json:"memory_fail"`
|
||||||
|
} `json:"summary"`
|
||||||
|
} `json:"hardware"`
|
||||||
|
}
|
||||||
|
|
||||||
|
raw, err := os.ReadFile(DefaultAuditJSONPath)
|
||||||
|
if err != nil {
|
||||||
|
return ActionResult{Title: "Health summary", Body: "No audit JSON found."}
|
||||||
|
}
|
||||||
|
var snapshot auditFile
|
||||||
|
if err := json.Unmarshal(raw, &snapshot); err != nil {
|
||||||
|
return ActionResult{Title: "Health summary", Body: "Audit JSON is unreadable."}
|
||||||
|
}
|
||||||
|
|
||||||
|
summary := snapshot.Hardware.Summary
|
||||||
|
var body strings.Builder
|
||||||
|
status := summary.Status
|
||||||
|
if status == "" {
|
||||||
|
status = "UNKNOWN"
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&body, "Overall: %s\n", status)
|
||||||
|
fmt.Fprintf(&body, "Storage: warn=%d fail=%d\n", summary.StorageWarn, summary.StorageFail)
|
||||||
|
fmt.Fprintf(&body, "PCIe: warn=%d fail=%d\n", summary.PCIeWarn, summary.PCIeFail)
|
||||||
|
fmt.Fprintf(&body, "PSU: warn=%d fail=%d\n", summary.PSUWarn, summary.PSUFail)
|
||||||
|
fmt.Fprintf(&body, "Memory: warn=%d fail=%d\n", summary.MemoryWarn, summary.MemoryFail)
|
||||||
|
for _, item := range latestSATSummaries() {
|
||||||
|
fmt.Fprintf(&body, "\n%s", item)
|
||||||
|
}
|
||||||
|
if len(summary.Failures) > 0 {
|
||||||
|
fmt.Fprintf(&body, "\n\nFailures:\n- %s", strings.Join(summary.Failures, "\n- "))
|
||||||
|
}
|
||||||
|
if len(summary.Warnings) > 0 {
|
||||||
|
fmt.Fprintf(&body, "\n\nWarnings:\n- %s", strings.Join(summary.Warnings, "\n- "))
|
||||||
|
}
|
||||||
|
return ActionResult{Title: "Health summary", Body: strings.TrimSpace(body.String())}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) FormatToolStatuses(statuses []platform.ToolStatus) string {
|
func (a *App) FormatToolStatuses(statuses []platform.ToolStatus) string {
|
||||||
@@ -309,3 +408,37 @@ func sanitizeFilename(v string) string {
|
|||||||
}
|
}
|
||||||
return string(out)
|
return string(out)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bodyOr(body, fallback string) string {
|
||||||
|
body = strings.TrimSpace(body)
|
||||||
|
if body == "" {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
func latestSATSummaries() []string {
|
||||||
|
baseDir := "/var/log/bee-sat"
|
||||||
|
patterns := []struct {
|
||||||
|
label string
|
||||||
|
prefix string
|
||||||
|
}{
|
||||||
|
{label: "NVIDIA SAT", prefix: "gpu-nvidia-"},
|
||||||
|
{label: "Memory SAT", prefix: "memory-"},
|
||||||
|
{label: "Storage SAT", prefix: "storage-"},
|
||||||
|
}
|
||||||
|
var out []string
|
||||||
|
for _, item := range patterns {
|
||||||
|
matches, err := filepath.Glob(filepath.Join(baseDir, item.prefix+"*/summary.txt"))
|
||||||
|
if err != nil || len(matches) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sort.Strings(matches)
|
||||||
|
raw, err := os.ReadFile(matches[len(matches)-1])
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, item.label+":\n"+strings.TrimSpace(string(raw)))
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|||||||
@@ -76,11 +76,21 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type fakeSAT struct {
|
type fakeSAT struct {
|
||||||
runFn func(string) (string, error)
|
runNvidiaFn func(string) (string, error)
|
||||||
|
runMemoryFn func(string) (string, error)
|
||||||
|
runStorageFn func(string) (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
||||||
return f.runFn(baseDir)
|
return f.runNvidiaFn(baseDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeSAT) RunMemoryAcceptancePack(baseDir string) (string, error) {
|
||||||
|
return f.runMemoryFn(baseDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeSAT) RunStorageAcceptancePack(baseDir string) (string, error) {
|
||||||
|
return f.runStorageFn(baseDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
|
func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
|
||||||
@@ -116,6 +126,25 @@ func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNetworkStatusHandlesNoInterfaces(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
a := &App{
|
||||||
|
network: fakeNetwork{
|
||||||
|
listInterfacesFn: func() ([]platform.InterfaceInfo, error) { return nil, nil },
|
||||||
|
defaultRouteFn: func() string { return "" },
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := a.NetworkStatus()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NetworkStatus error: %v", err)
|
||||||
|
}
|
||||||
|
if result.Body != "No physical interfaces found." {
|
||||||
|
t.Fatalf("body=%q want %q", result.Body, "No physical interfaces found.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNetworkStatusPropagatesListError(t *testing.T) {
|
func TestNetworkStatusPropagatesListError(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
@@ -192,7 +221,7 @@ func TestServiceActionResults(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("ServiceStatusResult error: %v", err)
|
t.Fatalf("ServiceStatusResult error: %v", err)
|
||||||
}
|
}
|
||||||
if statusResult.Title != "service: bee-audit" || statusResult.Body != "active" {
|
if statusResult.Title != "service status: bee-audit" || statusResult.Body != "active" {
|
||||||
t.Fatalf("unexpected status result: %#v", statusResult)
|
t.Fatalf("unexpected status result: %#v", statusResult)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -200,7 +229,7 @@ func TestServiceActionResults(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("ServiceActionResult error: %v", err)
|
t.Fatalf("ServiceActionResult error: %v", err)
|
||||||
}
|
}
|
||||||
if actionResult.Title != "service: bee-audit" || actionResult.Body != "restart ok" {
|
if actionResult.Title != "service restart: bee-audit" || actionResult.Body != "restart ok" {
|
||||||
t.Fatalf("unexpected action result: %#v", actionResult)
|
t.Fatalf("unexpected action result: %#v", actionResult)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -242,17 +271,79 @@ func TestToolCheckAndLogTailResults(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestActionResultsUseFallbackBody(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
a := &App{
|
||||||
|
network: fakeNetwork{
|
||||||
|
dhcpOneFn: func(string) (string, error) { return " ", nil },
|
||||||
|
dhcpAllFn: func() (string, error) { return "", nil },
|
||||||
|
setStaticIPv4Fn: func(platform.StaticIPv4Config) (string, error) { return "", nil },
|
||||||
|
listInterfacesFn: func() ([]platform.InterfaceInfo, error) {
|
||||||
|
return nil, nil
|
||||||
|
},
|
||||||
|
defaultRouteFn: func() string { return "" },
|
||||||
|
},
|
||||||
|
services: fakeServices{
|
||||||
|
serviceStatusFn: func(string) (string, error) { return "", nil },
|
||||||
|
serviceDoFn: func(string, platform.ServiceAction) (string, error) { return "", nil },
|
||||||
|
},
|
||||||
|
tools: fakeTools{
|
||||||
|
tailFileFn: func(string, int) string { return " " },
|
||||||
|
checkToolsFn: func([]string) []platform.ToolStatus { return nil },
|
||||||
|
},
|
||||||
|
sat: fakeSAT{
|
||||||
|
runNvidiaFn: func(string) (string, error) { return "", nil },
|
||||||
|
runMemoryFn: func(string) (string, error) { return "", nil },
|
||||||
|
runStorageFn: func(string) (string, error) { return "", nil },
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if got, _ := a.DHCPOneResult("eth0"); got.Body != "DHCP completed." {
|
||||||
|
t.Fatalf("dhcp one body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got, _ := a.DHCPAllResult(); got.Body != "DHCP completed." {
|
||||||
|
t.Fatalf("dhcp all body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got, _ := a.SetStaticIPv4Result(platform.StaticIPv4Config{Interface: "eth0"}); got.Body != "Static IPv4 updated." {
|
||||||
|
t.Fatalf("static body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got, _ := a.ServiceStatusResult("bee-audit"); got.Body != "No status output." {
|
||||||
|
t.Fatalf("status body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got, _ := a.ServiceActionResult("bee-audit", platform.ServiceRestart); got.Body != "Action completed." {
|
||||||
|
t.Fatalf("action body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got := a.ToolCheckResult(nil); got.Body != "No tools checked." {
|
||||||
|
t.Fatalf("tool body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got := a.AuditLogTailResult(); got.Body != "No audit logs found." {
|
||||||
|
t.Fatalf("log body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got, _ := a.RunNvidiaAcceptancePackResult(""); got.Body != "Archive written." {
|
||||||
|
t.Fatalf("sat body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got, _ := a.RunMemoryAcceptancePackResult(""); got.Body != "Archive written." {
|
||||||
|
t.Fatalf("memory sat body=%q", got.Body)
|
||||||
|
}
|
||||||
|
if got, _ := a.RunStorageAcceptancePackResult(""); got.Body != "Archive written." {
|
||||||
|
t.Fatalf("storage sat body=%q", got.Body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRunNvidiaAcceptancePackResult(t *testing.T) {
|
func TestRunNvidiaAcceptancePackResult(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
a := &App{
|
a := &App{
|
||||||
sat: fakeSAT{
|
sat: fakeSAT{
|
||||||
runFn: func(baseDir string) (string, error) {
|
runNvidiaFn: func(baseDir string) (string, error) {
|
||||||
if baseDir != "/tmp/sat" {
|
if baseDir != "/tmp/sat" {
|
||||||
t.Fatalf("baseDir=%q want %q", baseDir, "/tmp/sat")
|
t.Fatalf("baseDir=%q want %q", baseDir, "/tmp/sat")
|
||||||
}
|
}
|
||||||
return "/tmp/sat/out.tar.gz", nil
|
return "/tmp/sat/out.tar.gz", nil
|
||||||
},
|
},
|
||||||
|
runMemoryFn: func(string) (string, error) { return "", nil },
|
||||||
|
runStorageFn: func(string) (string, error) { return "", nil },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ func Run(runtimeMode runtimeenv.Mode) schema.HardwareIngestRequest {
|
|||||||
snap.Storage = enrichStorageWithVROC(snap.Storage, snap.PCIeDevices)
|
snap.Storage = enrichStorageWithVROC(snap.Storage, snap.PCIeDevices)
|
||||||
snap.Storage = appendUniqueStorage(snap.Storage, collectRAIDStorage(snap.PCIeDevices))
|
snap.Storage = appendUniqueStorage(snap.Storage, collectRAIDStorage(snap.PCIeDevices))
|
||||||
snap.PowerSupplies = collectPSUs()
|
snap.PowerSupplies = collectPSUs()
|
||||||
|
snap.Summary = buildHealthSummary(snap)
|
||||||
|
|
||||||
// remaining collectors added in steps 1.8 – 1.10
|
// remaining collectors added in steps 1.8 – 1.10
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"bee/audit/internal/schema"
|
"bee/audit/internal/schema"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
@@ -16,6 +17,9 @@ func collectPSUs() []schema.HardwarePowerSupply {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
psus := parseFRU(string(out))
|
psus := parseFRU(string(out))
|
||||||
|
if sdrOut, err := exec.Command("ipmitool", "sdr").Output(); err == nil {
|
||||||
|
mergePSUSDR(psus, parsePSUSDR(string(sdrOut)))
|
||||||
|
}
|
||||||
slog.Info("psu: collected", "count", len(psus))
|
slog.Info("psu: collected", "count", len(psus))
|
||||||
return psus
|
return psus
|
||||||
}
|
}
|
||||||
@@ -116,6 +120,135 @@ func parseFRUBlock(block string, slotIdx int) (schema.HardwarePowerSupply, bool)
|
|||||||
return psu, true
|
return psu, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type psuSDR struct {
|
||||||
|
slot int
|
||||||
|
status string
|
||||||
|
inputPowerW *float64
|
||||||
|
outputPowerW *float64
|
||||||
|
inputVoltage *float64
|
||||||
|
}
|
||||||
|
|
||||||
|
var psuSlotRe = regexp.MustCompile(`(?i)\bpsu?\s*([0-9]+)\b|\bps\s*([0-9]+)\b`)
|
||||||
|
|
||||||
|
func parsePSUSDR(raw string) map[int]psuSDR {
|
||||||
|
out := map[int]psuSDR{}
|
||||||
|
for _, line := range strings.Split(raw, "\n") {
|
||||||
|
fields := splitSDRFields(line)
|
||||||
|
if len(fields) < 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := fields[0]
|
||||||
|
value := fields[1]
|
||||||
|
state := strings.ToLower(fields[2])
|
||||||
|
slot, ok := parsePSUSlot(name)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
entry := out[slot]
|
||||||
|
entry.slot = slot
|
||||||
|
if entry.status == "" {
|
||||||
|
entry.status = "OK"
|
||||||
|
}
|
||||||
|
if state != "" && state != "ok" && state != "ns" {
|
||||||
|
entry.status = "FAILED"
|
||||||
|
}
|
||||||
|
|
||||||
|
lowerName := strings.ToLower(name)
|
||||||
|
switch {
|
||||||
|
case strings.Contains(lowerName, "input power"):
|
||||||
|
entry.inputPowerW = parseFloatPtr(value)
|
||||||
|
case strings.Contains(lowerName, "output power"):
|
||||||
|
entry.outputPowerW = parseFloatPtr(value)
|
||||||
|
case strings.Contains(lowerName, "input voltage"), strings.Contains(lowerName, "ac input"):
|
||||||
|
entry.inputVoltage = parseFloatPtr(value)
|
||||||
|
}
|
||||||
|
out[slot] = entry
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergePSUSDR(psus []schema.HardwarePowerSupply, sdr map[int]psuSDR) {
|
||||||
|
for i := range psus {
|
||||||
|
slotIdx, err := strconv.Atoi(derefPSUSlot(psus[i].Slot))
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
entry, ok := sdr[slotIdx+1]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if entry.inputPowerW != nil {
|
||||||
|
psus[i].InputPowerW = entry.inputPowerW
|
||||||
|
}
|
||||||
|
if entry.outputPowerW != nil {
|
||||||
|
psus[i].OutputPowerW = entry.outputPowerW
|
||||||
|
}
|
||||||
|
if entry.inputVoltage != nil {
|
||||||
|
psus[i].InputVoltage = entry.inputVoltage
|
||||||
|
}
|
||||||
|
if entry.status != "" {
|
||||||
|
psus[i].Status = &entry.status
|
||||||
|
}
|
||||||
|
if psus[i].Status != nil && *psus[i].Status == "OK" {
|
||||||
|
if (entry.inputPowerW == nil && entry.outputPowerW == nil && entry.inputVoltage == nil) && entry.status == "" {
|
||||||
|
unknown := "UNKNOWN"
|
||||||
|
psus[i].Status = &unknown
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitSDRFields(line string) []string {
|
||||||
|
parts := strings.Split(line, "|")
|
||||||
|
out := make([]string, 0, len(parts))
|
||||||
|
for _, part := range parts {
|
||||||
|
part = strings.TrimSpace(part)
|
||||||
|
if part != "" {
|
||||||
|
out = append(out, part)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func parsePSUSlot(name string) (int, bool) {
|
||||||
|
m := psuSlotRe.FindStringSubmatch(strings.ToLower(name))
|
||||||
|
if len(m) == 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
for _, group := range m[1:] {
|
||||||
|
if group == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
n, err := strconv.Atoi(group)
|
||||||
|
if err == nil && n > 0 {
|
||||||
|
return n, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseFloatPtr(raw string) *float64 {
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
if raw == "" || strings.EqualFold(raw, "na") {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
for _, field := range strings.Fields(raw) {
|
||||||
|
n, err := strconv.ParseFloat(strings.TrimSpace(field), 64)
|
||||||
|
if err == nil {
|
||||||
|
return &n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func derefPSUSlot(slot *string) string {
|
||||||
|
if slot == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return *slot
|
||||||
|
}
|
||||||
|
|
||||||
// parseWattage extracts wattage from strings like "PSU 800W", "1200W PLATINUM".
|
// parseWattage extracts wattage from strings like "PSU 800W", "1200W PLATINUM".
|
||||||
func parseWattage(s string) int {
|
func parseWattage(s string) int {
|
||||||
s = strings.ToUpper(s)
|
s = strings.ToUpper(s)
|
||||||
|
|||||||
32
audit/internal/collector/psu_sdr_test.go
Normal file
32
audit/internal/collector/psu_sdr_test.go
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestParsePSUSDR(t *testing.T) {
|
||||||
|
raw := `
|
||||||
|
PS1 Input Power | 215 Watts | ok
|
||||||
|
PS1 Output Power | 198 Watts | ok
|
||||||
|
PS1 Input Voltage | 229 Volts | ok
|
||||||
|
PS2 Input Power | 0 Watts | cr
|
||||||
|
`
|
||||||
|
|
||||||
|
got := parsePSUSDR(raw)
|
||||||
|
if len(got) != 2 {
|
||||||
|
t.Fatalf("len(got)=%d want 2", len(got))
|
||||||
|
}
|
||||||
|
if got[1].status != "OK" {
|
||||||
|
t.Fatalf("ps1 status=%q", got[1].status)
|
||||||
|
}
|
||||||
|
if got[1].inputPowerW == nil || *got[1].inputPowerW != 215 {
|
||||||
|
t.Fatalf("ps1 input power=%v", got[1].inputPowerW)
|
||||||
|
}
|
||||||
|
if got[1].outputPowerW == nil || *got[1].outputPowerW != 198 {
|
||||||
|
t.Fatalf("ps1 output power=%v", got[1].outputPowerW)
|
||||||
|
}
|
||||||
|
if got[1].inputVoltage == nil || *got[1].inputVoltage != 229 {
|
||||||
|
t.Fatalf("ps1 input voltage=%v", got[1].inputVoltage)
|
||||||
|
}
|
||||||
|
if got[2].status != "FAILED" {
|
||||||
|
t.Fatalf("ps2 status=%q", got[2].status)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -67,6 +67,9 @@ type smartctlInfo struct {
|
|||||||
SerialNumber string `json:"serial_number"`
|
SerialNumber string `json:"serial_number"`
|
||||||
FirmwareVer string `json:"firmware_version"`
|
FirmwareVer string `json:"firmware_version"`
|
||||||
RotationRate int `json:"rotation_rate"`
|
RotationRate int `json:"rotation_rate"`
|
||||||
|
SmartStatus struct {
|
||||||
|
Passed bool `json:"passed"`
|
||||||
|
} `json:"smart_status"`
|
||||||
UserCapacity struct {
|
UserCapacity struct {
|
||||||
Bytes int64 `json:"bytes"`
|
Bytes int64 `json:"bytes"`
|
||||||
} `json:"user_capacity"`
|
} `json:"user_capacity"`
|
||||||
@@ -127,7 +130,7 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
|||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
var info smartctlInfo
|
var info smartctlInfo
|
||||||
if err := json.Unmarshal(out, &info); err == nil {
|
if err := json.Unmarshal(out, &info); err == nil {
|
||||||
if v := cleanDMIValue(info.ModelName); v != "" {
|
if v := cleanDMIValue(info.ModelName); v != "" {
|
||||||
s.Model = &v
|
s.Model = &v
|
||||||
@@ -158,37 +161,65 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
|||||||
if info.PowerCycleCount > 0 {
|
if info.PowerCycleCount > 0 {
|
||||||
tel["power_cycles"] = info.PowerCycleCount
|
tel["power_cycles"] = info.PowerCycleCount
|
||||||
}
|
}
|
||||||
|
reallocated := int64(0)
|
||||||
|
pending := int64(0)
|
||||||
|
uncorrectable := int64(0)
|
||||||
|
lifeRemaining := int64(0)
|
||||||
for _, attr := range info.AtaSmartAttributes.Table {
|
for _, attr := range info.AtaSmartAttributes.Table {
|
||||||
switch attr.ID {
|
switch attr.ID {
|
||||||
case 5:
|
case 5:
|
||||||
|
reallocated = attr.Raw.Value
|
||||||
tel["reallocated_sectors"] = attr.Raw.Value
|
tel["reallocated_sectors"] = attr.Raw.Value
|
||||||
case 177:
|
case 177:
|
||||||
tel["wear_leveling_pct"] = attr.Raw.Value
|
tel["wear_leveling_pct"] = attr.Raw.Value
|
||||||
case 231:
|
case 231:
|
||||||
|
lifeRemaining = attr.Raw.Value
|
||||||
tel["life_remaining_pct"] = attr.Raw.Value
|
tel["life_remaining_pct"] = attr.Raw.Value
|
||||||
case 241:
|
case 241:
|
||||||
tel["total_lba_written"] = attr.Raw.Value
|
tel["total_lba_written"] = attr.Raw.Value
|
||||||
|
case 197:
|
||||||
|
pending = attr.Raw.Value
|
||||||
|
tel["current_pending_sectors"] = attr.Raw.Value
|
||||||
|
case 198:
|
||||||
|
uncorrectable = attr.Raw.Value
|
||||||
|
tel["offline_uncorrectable"] = attr.Raw.Value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(tel) > 0 {
|
if len(tel) > 0 {
|
||||||
s.Telemetry = tel
|
s.Telemetry = tel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status := storageHealthStatus{
|
||||||
|
overallPassed: info.SmartStatus.Passed,
|
||||||
|
hasOverall: true,
|
||||||
|
reallocatedSectors: reallocated,
|
||||||
|
pendingSectors: pending,
|
||||||
|
offlineUncorrectable: uncorrectable,
|
||||||
|
lifeRemainingPct: lifeRemaining,
|
||||||
|
}
|
||||||
|
setStorageHealthStatus(&s, status)
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
s.Type = &devType
|
s.Type = &devType
|
||||||
status := "OK"
|
status := "UNKNOWN"
|
||||||
s.Status = &status
|
s.Status = &status
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// nvmeSmartLog is the subset of `nvme smart-log -o json` output we care about.
|
// nvmeSmartLog is the subset of `nvme smart-log -o json` output we care about.
|
||||||
type nvmeSmartLog struct {
|
type nvmeSmartLog struct {
|
||||||
PercentageUsed int `json:"percentage_used"`
|
CriticalWarning int `json:"critical_warning"`
|
||||||
PowerOnHours int64 `json:"power_on_hours"`
|
PercentageUsed int `json:"percentage_used"`
|
||||||
PowerCycles int64 `json:"power_cycles"`
|
AvailableSpare int `json:"available_spare"`
|
||||||
UnsafeShutdowns int64 `json:"unsafe_shutdowns"`
|
SpareThreshold int `json:"spare_thresh"`
|
||||||
DataUnitsWritten int64 `json:"data_units_written"`
|
PowerOnHours int64 `json:"power_on_hours"`
|
||||||
ControllerBusy int64 `json:"controller_busy_time"`
|
PowerCycles int64 `json:"power_cycles"`
|
||||||
|
UnsafeShutdowns int64 `json:"unsafe_shutdowns"`
|
||||||
|
DataUnitsWritten int64 `json:"data_units_written"`
|
||||||
|
ControllerBusy int64 `json:"controller_busy_time"`
|
||||||
|
MediaErrors int64 `json:"media_errors"`
|
||||||
|
NumErrLogEntries int64 `json:"num_err_log_entries"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// nvmeIDCtrl is the subset of `nvme id-ctrl -o json` output.
|
// nvmeIDCtrl is the subset of `nvme id-ctrl -o json` output.
|
||||||
@@ -238,6 +269,9 @@ func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage {
|
|||||||
var log nvmeSmartLog
|
var log nvmeSmartLog
|
||||||
if json.Unmarshal(out, &log) == nil {
|
if json.Unmarshal(out, &log) == nil {
|
||||||
tel := map[string]any{}
|
tel := map[string]any{}
|
||||||
|
if log.CriticalWarning > 0 {
|
||||||
|
tel["critical_warning"] = log.CriticalWarning
|
||||||
|
}
|
||||||
if log.PowerOnHours > 0 {
|
if log.PowerOnHours > 0 {
|
||||||
tel["power_on_hours"] = log.PowerOnHours
|
tel["power_on_hours"] = log.PowerOnHours
|
||||||
}
|
}
|
||||||
@@ -256,11 +290,78 @@ func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage {
|
|||||||
if log.ControllerBusy > 0 {
|
if log.ControllerBusy > 0 {
|
||||||
tel["controller_busy_time"] = log.ControllerBusy
|
tel["controller_busy_time"] = log.ControllerBusy
|
||||||
}
|
}
|
||||||
|
if log.AvailableSpare > 0 {
|
||||||
|
tel["available_spare_pct"] = log.AvailableSpare
|
||||||
|
}
|
||||||
|
if log.SpareThreshold > 0 {
|
||||||
|
tel["available_spare_threshold_pct"] = log.SpareThreshold
|
||||||
|
}
|
||||||
|
if log.MediaErrors > 0 {
|
||||||
|
tel["media_errors"] = log.MediaErrors
|
||||||
|
}
|
||||||
|
if log.NumErrLogEntries > 0 {
|
||||||
|
tel["error_log_entries"] = log.NumErrLogEntries
|
||||||
|
}
|
||||||
if len(tel) > 0 {
|
if len(tel) > 0 {
|
||||||
s.Telemetry = tel
|
s.Telemetry = tel
|
||||||
}
|
}
|
||||||
|
setStorageHealthStatus(&s, storageHealthStatus{
|
||||||
|
criticalWarning: log.CriticalWarning,
|
||||||
|
percentageUsed: int64(log.PercentageUsed),
|
||||||
|
availableSpare: int64(log.AvailableSpare),
|
||||||
|
spareThreshold: int64(log.SpareThreshold),
|
||||||
|
unsafeShutdowns: log.UnsafeShutdowns,
|
||||||
|
mediaErrors: log.MediaErrors,
|
||||||
|
errorLogEntries: log.NumErrLogEntries,
|
||||||
|
})
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status = "UNKNOWN"
|
||||||
|
s.Status = &status
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type storageHealthStatus struct {
|
||||||
|
hasOverall bool
|
||||||
|
overallPassed bool
|
||||||
|
reallocatedSectors int64
|
||||||
|
pendingSectors int64
|
||||||
|
offlineUncorrectable int64
|
||||||
|
lifeRemainingPct int64
|
||||||
|
criticalWarning int
|
||||||
|
percentageUsed int64
|
||||||
|
availableSpare int64
|
||||||
|
spareThreshold int64
|
||||||
|
unsafeShutdowns int64
|
||||||
|
mediaErrors int64
|
||||||
|
errorLogEntries int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func setStorageHealthStatus(s *schema.HardwareStorage, health storageHealthStatus) {
|
||||||
|
status := "OK"
|
||||||
|
switch {
|
||||||
|
case health.hasOverall && !health.overallPassed:
|
||||||
|
status = "FAILED"
|
||||||
|
case health.criticalWarning > 0:
|
||||||
|
status = "FAILED"
|
||||||
|
case health.pendingSectors > 0 || health.offlineUncorrectable > 0:
|
||||||
|
status = "FAILED"
|
||||||
|
case health.mediaErrors > 0:
|
||||||
|
status = "WARNING"
|
||||||
|
case health.reallocatedSectors > 0:
|
||||||
|
status = "WARNING"
|
||||||
|
case health.errorLogEntries > 0:
|
||||||
|
status = "WARNING"
|
||||||
|
case health.lifeRemainingPct > 0 && health.lifeRemainingPct <= 10:
|
||||||
|
status = "WARNING"
|
||||||
|
case health.percentageUsed >= 95:
|
||||||
|
status = "WARNING"
|
||||||
|
case health.availableSpare > 0 && health.spareThreshold > 0 && health.availableSpare <= health.spareThreshold:
|
||||||
|
status = "WARNING"
|
||||||
|
case health.unsafeShutdowns > 100:
|
||||||
|
status = "WARNING"
|
||||||
|
}
|
||||||
|
s.Status = &status
|
||||||
|
}
|
||||||
|
|||||||
63
audit/internal/collector/storage_health_test.go
Normal file
63
audit/internal/collector/storage_health_test.go
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"bee/audit/internal/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSetStorageHealthStatus(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
health storageHealthStatus
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "smart overall failed",
|
||||||
|
health: storageHealthStatus{hasOverall: true, overallPassed: false},
|
||||||
|
want: "FAILED",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nvme critical warning",
|
||||||
|
health: storageHealthStatus{criticalWarning: 1},
|
||||||
|
want: "FAILED",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "pending sectors",
|
||||||
|
health: storageHealthStatus{pendingSectors: 1},
|
||||||
|
want: "FAILED",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "media errors warning",
|
||||||
|
health: storageHealthStatus{mediaErrors: 2},
|
||||||
|
want: "WARNING",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "reallocated warning",
|
||||||
|
health: storageHealthStatus{reallocatedSectors: 1},
|
||||||
|
want: "WARNING",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "life remaining low",
|
||||||
|
health: storageHealthStatus{lifeRemainingPct: 8},
|
||||||
|
want: "WARNING",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "healthy",
|
||||||
|
health: storageHealthStatus{},
|
||||||
|
want: "OK",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
var disk schema.HardwareStorage
|
||||||
|
setStorageHealthStatus(&disk, tt.health)
|
||||||
|
if disk.Status == nil || *disk.Status != tt.want {
|
||||||
|
t.Fatalf("status=%v want %q", disk.Status, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
114
audit/internal/collector/summary.go
Normal file
114
audit/internal/collector/summary.go
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bee/audit/internal/schema"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func buildHealthSummary(snap schema.HardwareSnapshot) *schema.HardwareHealthSummary {
|
||||||
|
summary := &schema.HardwareHealthSummary{
|
||||||
|
Status: "OK",
|
||||||
|
CollectedAt: time.Now().UTC().Format(time.RFC3339),
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, dimm := range snap.Memory {
|
||||||
|
switch derefString(dimm.Status) {
|
||||||
|
case "WARNING":
|
||||||
|
summary.MemoryWarn++
|
||||||
|
summary.Warnings = append(summary.Warnings, formatMemorySummary(dimm))
|
||||||
|
case "FAILED":
|
||||||
|
summary.MemoryFail++
|
||||||
|
summary.Failures = append(summary.Failures, formatMemorySummary(dimm))
|
||||||
|
case "EMPTY":
|
||||||
|
summary.EmptyDIMMs++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, disk := range snap.Storage {
|
||||||
|
switch derefString(disk.Status) {
|
||||||
|
case "WARNING":
|
||||||
|
summary.StorageWarn++
|
||||||
|
summary.Warnings = append(summary.Warnings, formatStorageSummary(disk))
|
||||||
|
case "FAILED":
|
||||||
|
summary.StorageFail++
|
||||||
|
summary.Failures = append(summary.Failures, formatStorageSummary(disk))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, dev := range snap.PCIeDevices {
|
||||||
|
switch derefString(dev.Status) {
|
||||||
|
case "WARNING":
|
||||||
|
summary.PCIeWarn++
|
||||||
|
summary.Warnings = append(summary.Warnings, formatPCIeSummary(dev))
|
||||||
|
case "FAILED":
|
||||||
|
summary.PCIeFail++
|
||||||
|
summary.Failures = append(summary.Failures, formatPCIeSummary(dev))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, psu := range snap.PowerSupplies {
|
||||||
|
if psu.Present != nil && !*psu.Present {
|
||||||
|
summary.MissingPSUs++
|
||||||
|
}
|
||||||
|
switch derefString(psu.Status) {
|
||||||
|
case "WARNING":
|
||||||
|
summary.PSUWarn++
|
||||||
|
summary.Warnings = append(summary.Warnings, formatPSUSummary(psu))
|
||||||
|
case "FAILED":
|
||||||
|
summary.PSUFail++
|
||||||
|
summary.Failures = append(summary.Failures, formatPSUSummary(psu))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(summary.Failures) > 0 || summary.StorageFail > 0 || summary.PCIeFail > 0 || summary.PSUFail > 0 || summary.MemoryFail > 0 {
|
||||||
|
summary.Status = "FAILED"
|
||||||
|
} else if len(summary.Warnings) > 0 || summary.StorageWarn > 0 || summary.PCIeWarn > 0 || summary.PSUWarn > 0 || summary.MemoryWarn > 0 {
|
||||||
|
summary.Status = "WARNING"
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(summary.Warnings) == 0 {
|
||||||
|
summary.Warnings = nil
|
||||||
|
}
|
||||||
|
if len(summary.Failures) == 0 {
|
||||||
|
summary.Failures = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary
|
||||||
|
}
|
||||||
|
|
||||||
|
func derefString(value *string) string {
|
||||||
|
if value == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return *value
|
||||||
|
}
|
||||||
|
|
||||||
|
func preferredName(model, serial, slot *string) string {
|
||||||
|
switch {
|
||||||
|
case model != nil && *model != "":
|
||||||
|
return *model
|
||||||
|
case serial != nil && *serial != "":
|
||||||
|
return *serial
|
||||||
|
case slot != nil && *slot != "":
|
||||||
|
return *slot
|
||||||
|
default:
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatStorageSummary(disk schema.HardwareStorage) string {
|
||||||
|
return fmt.Sprintf("storage %s status=%s", preferredName(disk.Model, disk.SerialNumber, disk.Slot), derefString(disk.Status))
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatPCIeSummary(dev schema.HardwarePCIeDevice) string {
|
||||||
|
return fmt.Sprintf("pcie %s status=%s", preferredName(dev.Model, dev.SerialNumber, dev.BDF), derefString(dev.Status))
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatPSUSummary(psu schema.HardwarePowerSupply) string {
|
||||||
|
return fmt.Sprintf("psu %s status=%s", preferredName(psu.Model, psu.SerialNumber, psu.Slot), derefString(psu.Status))
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatMemorySummary(dimm schema.HardwareMemory) string {
|
||||||
|
return fmt.Sprintf("memory %s status=%s", preferredName(dimm.PartNumber, dimm.SerialNumber, dimm.Slot), derefString(dimm.Status))
|
||||||
|
}
|
||||||
@@ -8,35 +8,107 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
||||||
|
return runAcceptancePack(baseDir, "gpu-nvidia", nvidiaSATJobs())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *System) RunMemoryAcceptancePack(baseDir string) (string, error) {
|
||||||
|
return runAcceptancePack(baseDir, "memory", []satJob{
|
||||||
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||||
|
{name: "02-memtester.log", cmd: []string{"memtester", "128M", "1"}},
|
||||||
|
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
|
||||||
if baseDir == "" {
|
if baseDir == "" {
|
||||||
baseDir = "/var/log/bee-sat"
|
baseDir = "/var/log/bee-sat"
|
||||||
}
|
}
|
||||||
ts := time.Now().UTC().Format("20060102-150405")
|
ts := time.Now().UTC().Format("20060102-150405")
|
||||||
runDir := filepath.Join(baseDir, "gpu-nvidia-"+ts)
|
runDir := filepath.Join(baseDir, "storage-"+ts)
|
||||||
if err := os.MkdirAll(runDir, 0755); err != nil {
|
if err := os.MkdirAll(runDir, 0755); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
type job struct {
|
devices, err := listStorageDevices()
|
||||||
name string
|
if err != nil {
|
||||||
cmd []string
|
return "", err
|
||||||
}
|
}
|
||||||
jobs := []job{
|
sort.Strings(devices)
|
||||||
|
|
||||||
|
var summary strings.Builder
|
||||||
|
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
||||||
|
if len(devices) == 0 {
|
||||||
|
fmt.Fprintln(&summary, "devices=0")
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(&summary, "devices=%d\n", len(devices))
|
||||||
|
}
|
||||||
|
|
||||||
|
for index, devPath := range devices {
|
||||||
|
prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
|
||||||
|
commands := storageSATCommands(devPath)
|
||||||
|
for cmdIndex, job := range commands {
|
||||||
|
name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name)
|
||||||
|
out, err := exec.Command(job.cmd[0], job.cmd[1:]...).CombinedOutput()
|
||||||
|
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
|
||||||
|
return "", writeErr
|
||||||
|
}
|
||||||
|
rc := 0
|
||||||
|
if err != nil {
|
||||||
|
rc = 1
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&summary, "%s_%s_rc=%d\n", filepath.Base(devPath), strings.ReplaceAll(job.name, "-", "_"), rc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
archive := filepath.Join(baseDir, "storage-"+ts+".tar.gz")
|
||||||
|
if err := createTarGz(archive, runDir); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return archive, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type satJob struct {
|
||||||
|
name string
|
||||||
|
cmd []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func nvidiaSATJobs() []satJob {
|
||||||
|
return []satJob{
|
||||||
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
|
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
|
||||||
{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
||||||
{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
|
{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
|
||||||
{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output", filepath.Join(runDir, "nvidia-bug-report.log")}},
|
{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output", "{{run_dir}}/nvidia-bug-report.log"}},
|
||||||
|
{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", "5", "--size-mb", "64"}},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runAcceptancePack(baseDir, prefix string, jobs []satJob) (string, error) {
|
||||||
|
if baseDir == "" {
|
||||||
|
baseDir = "/var/log/bee-sat"
|
||||||
|
}
|
||||||
|
ts := time.Now().UTC().Format("20060102-150405")
|
||||||
|
runDir := filepath.Join(baseDir, prefix+"-"+ts)
|
||||||
|
if err := os.MkdirAll(runDir, 0755); err != nil {
|
||||||
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
var summary strings.Builder
|
var summary strings.Builder
|
||||||
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
||||||
for _, job := range jobs {
|
for _, job := range jobs {
|
||||||
out, err := exec.Command(job.cmd[0], job.cmd[1:]...).CombinedOutput()
|
cmd := make([]string, 0, len(job.cmd))
|
||||||
|
for _, arg := range job.cmd {
|
||||||
|
cmd = append(cmd, strings.ReplaceAll(arg, "{{run_dir}}", runDir))
|
||||||
|
}
|
||||||
|
out, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput()
|
||||||
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
|
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
|
||||||
return "", writeErr
|
return "", writeErr
|
||||||
}
|
}
|
||||||
@@ -50,13 +122,43 @@ func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
archive := filepath.Join(baseDir, "gpu-nvidia-"+ts+".tar.gz")
|
archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
|
||||||
if err := createTarGz(archive, runDir); err != nil {
|
if err := createTarGz(archive, runDir); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return archive, nil
|
return archive, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func listStorageDevices() ([]string, error) {
|
||||||
|
out, err := exec.Command("lsblk", "-dn", "-o", "NAME,TYPE").Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var devices []string
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||||
|
fields := strings.Fields(strings.TrimSpace(line))
|
||||||
|
if len(fields) != 2 || fields[1] != "disk" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
devices = append(devices, "/dev/"+fields[0])
|
||||||
|
}
|
||||||
|
return devices, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func storageSATCommands(devPath string) []satJob {
|
||||||
|
if strings.Contains(filepath.Base(devPath), "nvme") {
|
||||||
|
return []satJob{
|
||||||
|
{name: "nvme-id-ctrl", cmd: []string{"nvme", "id-ctrl", devPath, "-o", "json"}},
|
||||||
|
{name: "nvme-smart-log", cmd: []string{"nvme", "smart-log", devPath, "-o", "json"}},
|
||||||
|
{name: "nvme-device-self-test", cmd: []string{"nvme", "device-self-test", devPath, "--start", "1"}},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return []satJob{
|
||||||
|
{name: "smartctl-health", cmd: []string{"smartctl", "-H", "-A", devPath}},
|
||||||
|
{name: "smartctl-self-test-short", cmd: []string{"smartctl", "-t", "short", devPath}},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func createTarGz(dst, srcDir string) error {
|
func createTarGz(dst, srcDir string) error {
|
||||||
file, err := os.Create(dst)
|
file, err := os.Create(dst)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
30
audit/internal/platform/sat_test.go
Normal file
30
audit/internal/platform/sat_test.go
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestStorageSATCommands(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
nvme := storageSATCommands("/dev/nvme0n1")
|
||||||
|
if len(nvme) != 3 || nvme[2].cmd[0] != "nvme" {
|
||||||
|
t.Fatalf("unexpected nvme commands: %#v", nvme)
|
||||||
|
}
|
||||||
|
|
||||||
|
sata := storageSATCommands("/dev/sda")
|
||||||
|
if len(sata) != 2 || sata[0].cmd[0] != "smartctl" {
|
||||||
|
t.Fatalf("unexpected sata commands: %#v", sata)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
jobs := nvidiaSATJobs()
|
||||||
|
|
||||||
|
if len(jobs) != 5 {
|
||||||
|
t.Fatalf("jobs=%d want 5", len(jobs))
|
||||||
|
}
|
||||||
|
if got := jobs[4].cmd[0]; got != "bee-gpu-stress" {
|
||||||
|
t.Fatalf("gpu stress command=%q want bee-gpu-stress", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -21,6 +21,24 @@ type HardwareSnapshot struct {
|
|||||||
Storage []HardwareStorage `json:"storage,omitempty"`
|
Storage []HardwareStorage `json:"storage,omitempty"`
|
||||||
PCIeDevices []HardwarePCIeDevice `json:"pcie_devices,omitempty"`
|
PCIeDevices []HardwarePCIeDevice `json:"pcie_devices,omitempty"`
|
||||||
PowerSupplies []HardwarePowerSupply `json:"power_supplies,omitempty"`
|
PowerSupplies []HardwarePowerSupply `json:"power_supplies,omitempty"`
|
||||||
|
Summary *HardwareHealthSummary `json:"summary,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type HardwareHealthSummary struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
Warnings []string `json:"warnings,omitempty"`
|
||||||
|
Failures []string `json:"failures,omitempty"`
|
||||||
|
StorageWarn int `json:"storage_warn,omitempty"`
|
||||||
|
StorageFail int `json:"storage_fail,omitempty"`
|
||||||
|
PCIeWarn int `json:"pcie_warn,omitempty"`
|
||||||
|
PCIeFail int `json:"pcie_fail,omitempty"`
|
||||||
|
PSUWarn int `json:"psu_warn,omitempty"`
|
||||||
|
PSUFail int `json:"psu_fail,omitempty"`
|
||||||
|
MemoryWarn int `json:"memory_warn,omitempty"`
|
||||||
|
MemoryFail int `json:"memory_fail,omitempty"`
|
||||||
|
EmptyDIMMs int `json:"empty_dimms,omitempty"`
|
||||||
|
MissingPSUs int `json:"missing_psus,omitempty"`
|
||||||
|
CollectedAt string `json:"collected_at,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type HardwareBoard struct {
|
type HardwareBoard struct {
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ func (m model) updateStaticForm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
m.formFields[3].Value,
|
m.formFields[3].Value,
|
||||||
})
|
})
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Static IPv4: " + m.selectedIface
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result, err := m.app.SetStaticIPv4Result(cfg)
|
result, err := m.app.SetStaticIPv4Result(cfg)
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
|
||||||
@@ -59,26 +60,42 @@ func (m model) updateConfirm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
case "esc":
|
case "esc":
|
||||||
m.screen = m.confirmCancelTarget()
|
m.screen = m.confirmCancelTarget()
|
||||||
m.cursor = 0
|
m.cursor = 0
|
||||||
|
m.pendingAction = actionNone
|
||||||
return m, nil
|
return m, nil
|
||||||
case "enter":
|
case "enter":
|
||||||
if m.cursor == 1 {
|
if m.cursor == 1 {
|
||||||
m.screen = m.confirmCancelTarget()
|
m.screen = m.confirmCancelTarget()
|
||||||
m.cursor = 0
|
m.cursor = 0
|
||||||
|
m.pendingAction = actionNone
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
m.busy = true
|
m.busy = true
|
||||||
switch m.pendingAction {
|
switch m.pendingAction {
|
||||||
case actionExportAudit:
|
case actionExportAudit:
|
||||||
|
m.busyTitle = "Export audit"
|
||||||
target := *m.selectedTarget
|
target := *m.selectedTarget
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result, err := m.app.ExportLatestAuditResult(target)
|
result, err := m.app.ExportLatestAuditResult(target)
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenMain}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenMain}
|
||||||
}
|
}
|
||||||
case actionRunNvidiaSAT:
|
case actionRunNvidiaSAT:
|
||||||
|
m.busyTitle = "NVIDIA SAT"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result, err := m.app.RunNvidiaAcceptancePackResult("")
|
result, err := m.app.RunNvidiaAcceptancePackResult("")
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenAcceptance}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenAcceptance}
|
||||||
}
|
}
|
||||||
|
case actionRunMemorySAT:
|
||||||
|
m.busyTitle = "Memory SAT"
|
||||||
|
return m, func() tea.Msg {
|
||||||
|
result, err := m.app.RunMemoryAcceptancePackResult("")
|
||||||
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenAcceptance}
|
||||||
|
}
|
||||||
|
case actionRunStorageSAT:
|
||||||
|
m.busyTitle = "Storage SAT"
|
||||||
|
return m, func() tea.Msg {
|
||||||
|
result, err := m.app.RunStorageAcceptancePackResult("")
|
||||||
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenAcceptance}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
case "ctrl+c":
|
case "ctrl+c":
|
||||||
return m, tea.Quit
|
return m, tea.Quit
|
||||||
@@ -91,6 +108,10 @@ func (m model) confirmCancelTarget() screen {
|
|||||||
case actionExportAudit:
|
case actionExportAudit:
|
||||||
return screenExportTargets
|
return screenExportTargets
|
||||||
case actionRunNvidiaSAT:
|
case actionRunNvidiaSAT:
|
||||||
|
fallthrough
|
||||||
|
case actionRunMemorySAT:
|
||||||
|
fallthrough
|
||||||
|
case actionRunStorageSAT:
|
||||||
return screenAcceptance
|
return screenAcceptance
|
||||||
default:
|
default:
|
||||||
return screenMain
|
return screenMain
|
||||||
|
|||||||
@@ -3,12 +3,19 @@ package tui
|
|||||||
import tea "github.com/charmbracelet/bubbletea"
|
import tea "github.com/charmbracelet/bubbletea"
|
||||||
|
|
||||||
func (m model) handleAcceptanceMenu() (tea.Model, tea.Cmd) {
|
func (m model) handleAcceptanceMenu() (tea.Model, tea.Cmd) {
|
||||||
if m.cursor == 1 {
|
if m.cursor == 3 {
|
||||||
m.screen = screenMain
|
m.screen = screenMain
|
||||||
m.cursor = 0
|
m.cursor = 0
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
m.pendingAction = actionRunNvidiaSAT
|
switch m.cursor {
|
||||||
|
case 0:
|
||||||
|
m.pendingAction = actionRunNvidiaSAT
|
||||||
|
case 1:
|
||||||
|
m.pendingAction = actionRunMemorySAT
|
||||||
|
case 2:
|
||||||
|
m.pendingAction = actionRunStorageSAT
|
||||||
|
}
|
||||||
m.screen = screenConfirm
|
m.screen = screenConfirm
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ func (m model) handleMainMenu() (tea.Model, tea.Cmd) {
|
|||||||
return m, nil
|
return m, nil
|
||||||
case 1:
|
case 1:
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Services"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
services, err := m.app.ListBeeServices()
|
services, err := m.app.ListBeeServices()
|
||||||
return servicesMsg{services: services, err: err}
|
return servicesMsg{services: services, err: err}
|
||||||
@@ -22,29 +23,40 @@ func (m model) handleMainMenu() (tea.Model, tea.Cmd) {
|
|||||||
return m, nil
|
return m, nil
|
||||||
case 3:
|
case 3:
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Run audit"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result, err := m.app.RunAuditNow(m.runtimeMode)
|
result, err := m.app.RunAuditNow(m.runtimeMode)
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenMain}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenMain}
|
||||||
}
|
}
|
||||||
case 4:
|
case 4:
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Export audit"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
targets, err := m.app.ListRemovableTargets()
|
targets, err := m.app.ListRemovableTargets()
|
||||||
return exportTargetsMsg{targets: targets, err: err}
|
return exportTargetsMsg{targets: targets, err: err}
|
||||||
}
|
}
|
||||||
case 5:
|
case 5:
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Required tools"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result := m.app.ToolCheckResult([]string{"dmidecode", "smartctl", "nvme", "ipmitool", "lspci", "bee", "nvidia-smi", "dhclient", "lsblk", "mount"})
|
result := m.app.ToolCheckResult([]string{"dmidecode", "smartctl", "nvme", "ipmitool", "lspci", "ethtool", "bee", "nvidia-smi", "bee-gpu-stress", "memtester", "dhclient", "lsblk", "mount"})
|
||||||
return resultMsg{title: result.Title, body: result.Body, back: screenMain}
|
return resultMsg{title: result.Title, body: result.Body, back: screenMain}
|
||||||
}
|
}
|
||||||
case 6:
|
case 6:
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Health summary"
|
||||||
|
return m, func() tea.Msg {
|
||||||
|
result := m.app.HealthSummaryResult()
|
||||||
|
return resultMsg{title: result.Title, body: result.Body, back: screenMain}
|
||||||
|
}
|
||||||
|
case 7:
|
||||||
|
m.busy = true
|
||||||
|
m.busyTitle = "Audit logs"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result := m.app.AuditLogTailResult()
|
result := m.app.AuditLogTailResult()
|
||||||
return resultMsg{title: result.Title, body: result.Body, back: screenMain}
|
return resultMsg{title: result.Title, body: result.Body, back: screenMain}
|
||||||
}
|
}
|
||||||
case 7:
|
case 8:
|
||||||
return m, tea.Quit
|
return m, tea.Quit
|
||||||
}
|
}
|
||||||
return m, nil
|
return m, nil
|
||||||
|
|||||||
@@ -10,12 +10,14 @@ func (m model) handleNetworkMenu() (tea.Model, tea.Cmd) {
|
|||||||
switch m.cursor {
|
switch m.cursor {
|
||||||
case 0:
|
case 0:
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Network status"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result, err := m.app.NetworkStatus()
|
result, err := m.app.NetworkStatus()
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
|
||||||
}
|
}
|
||||||
case 1:
|
case 1:
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "DHCP all interfaces"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result, err := m.app.DHCPAllResult()
|
result, err := m.app.DHCPAllResult()
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
|
||||||
@@ -23,6 +25,7 @@ func (m model) handleNetworkMenu() (tea.Model, tea.Cmd) {
|
|||||||
case 2:
|
case 2:
|
||||||
m.pendingAction = actionDHCPOne
|
m.pendingAction = actionDHCPOne
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Interfaces"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
ifaces, err := m.app.ListInterfaces()
|
ifaces, err := m.app.ListInterfaces()
|
||||||
return interfacesMsg{ifaces: ifaces, err: err}
|
return interfacesMsg{ifaces: ifaces, err: err}
|
||||||
@@ -30,6 +33,7 @@ func (m model) handleNetworkMenu() (tea.Model, tea.Cmd) {
|
|||||||
case 3:
|
case 3:
|
||||||
m.pendingAction = actionStaticIPv4
|
m.pendingAction = actionStaticIPv4
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "Interfaces"
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
ifaces, err := m.app.ListInterfaces()
|
ifaces, err := m.app.ListInterfaces()
|
||||||
return interfacesMsg{ifaces: ifaces, err: err}
|
return interfacesMsg{ifaces: ifaces, err: err}
|
||||||
@@ -50,6 +54,7 @@ func (m model) handleInterfacePickMenu() (tea.Model, tea.Cmd) {
|
|||||||
switch m.pendingAction {
|
switch m.pendingAction {
|
||||||
case actionDHCPOne:
|
case actionDHCPOne:
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "DHCP on " + m.selectedIface
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
result, err := m.app.DHCPOneResult(m.selectedIface)
|
result, err := m.app.DHCPOneResult(m.selectedIface)
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
|
|
||||||
func (m model) handleServicesMenu() (tea.Model, tea.Cmd) {
|
func (m model) handleServicesMenu() (tea.Model, tea.Cmd) {
|
||||||
if len(m.services) == 0 {
|
if len(m.services) == 0 {
|
||||||
return m, resultCmd("bee services", "No bee-* services found", nil, screenMain)
|
return m, resultCmd("Services", "No bee-* services found.", nil, screenMain)
|
||||||
}
|
}
|
||||||
m.selectedService = m.services[m.cursor]
|
m.selectedService = m.services[m.cursor]
|
||||||
m.screen = screenServiceAction
|
m.screen = screenServiceAction
|
||||||
@@ -25,22 +25,23 @@ func (m model) handleServiceActionMenu() (tea.Model, tea.Cmd) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
m.busy = true
|
m.busy = true
|
||||||
|
m.busyTitle = "service: " + m.selectedService
|
||||||
return m, func() tea.Msg {
|
return m, func() tea.Msg {
|
||||||
switch action {
|
switch action {
|
||||||
case "status":
|
case "Status":
|
||||||
result, err := m.app.ServiceStatusResult(m.selectedService)
|
result, err := m.app.ServiceStatusResult(m.selectedService)
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
|
||||||
case "restart":
|
case "Restart":
|
||||||
result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceRestart)
|
result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceRestart)
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
|
||||||
case "start":
|
case "Start":
|
||||||
result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceStart)
|
result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceStart)
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
|
||||||
case "stop":
|
case "Stop":
|
||||||
result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceStop)
|
result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceStop)
|
||||||
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
|
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
|
||||||
default:
|
default:
|
||||||
return resultMsg{title: "service", body: "unknown action", back: screenServiceAction}
|
return resultMsg{title: "Service", body: "Unknown action.", back: screenServiceAction}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package tui
|
package tui
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"bee/audit/internal/app"
|
"bee/audit/internal/app"
|
||||||
@@ -153,7 +154,8 @@ func TestMainMenuAsyncActionsSetBusy(t *testing.T) {
|
|||||||
{name: "run audit", cursor: 3},
|
{name: "run audit", cursor: 3},
|
||||||
{name: "export", cursor: 4},
|
{name: "export", cursor: 4},
|
||||||
{name: "check tools", cursor: 5},
|
{name: "check tools", cursor: 5},
|
||||||
{name: "log tail", cursor: 6},
|
{name: "health summary", cursor: 6},
|
||||||
|
{name: "log tail", cursor: 7},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
@@ -262,6 +264,31 @@ func TestAcceptanceConfirmFlow(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAcceptanceMenuMapsNewTargets(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
cursor int
|
||||||
|
want actionKind
|
||||||
|
}{
|
||||||
|
{cursor: 0, want: actionRunNvidiaSAT},
|
||||||
|
{cursor: 1, want: actionRunMemorySAT},
|
||||||
|
{cursor: 2, want: actionRunStorageSAT},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
m := newTestModel()
|
||||||
|
m.screen = screenAcceptance
|
||||||
|
m.cursor = test.cursor
|
||||||
|
|
||||||
|
next, _ := m.handleAcceptanceMenu()
|
||||||
|
got := next.(model)
|
||||||
|
if got.pendingAction != test.want {
|
||||||
|
t.Fatalf("cursor=%d pendingAction=%q want %q", test.cursor, got.pendingAction, test.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestExportTargetSelectionOpensConfirm(t *testing.T) {
|
func TestExportTargetSelectionOpensConfirm(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
@@ -347,3 +374,197 @@ func TestConfirmCancelTarget(t *testing.T) {
|
|||||||
t.Fatalf("default cancel target=%q want %q", got, screenMain)
|
t.Fatalf("default cancel target=%q want %q", got, screenMain)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestViewMainMenuRendersSelectedItem(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.cursor = 1
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"bee",
|
||||||
|
"Select action",
|
||||||
|
" Network",
|
||||||
|
"> Services",
|
||||||
|
"Acceptance tests",
|
||||||
|
"[↑/↓] move [enter] select [esc] back [ctrl+c] quit",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(view, want) {
|
||||||
|
t.Fatalf("view missing %q\nview:\n%s", want, view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestViewBusyStateIsMinimal(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.busy = true
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
want := "bee\n\nWorking...\n\n[ctrl+c] quit\n"
|
||||||
|
if view != want {
|
||||||
|
t.Fatalf("busy view mismatch\nwant:\n%s\ngot:\n%s", want, view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestViewBusyStateUsesBusyTitle(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.busy = true
|
||||||
|
m.busyTitle = "Export audit"
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"Export audit",
|
||||||
|
"Working...",
|
||||||
|
"[ctrl+c] quit",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(view, want) {
|
||||||
|
t.Fatalf("view missing %q\nview:\n%s", want, view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestViewOutputScreenRendersBodyAndBackHint(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.screen = screenOutput
|
||||||
|
m.title = "Run audit"
|
||||||
|
m.body = "audit output: /var/log/bee-audit.json\n"
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"Run audit",
|
||||||
|
"audit output: /var/log/bee-audit.json",
|
||||||
|
"[enter/esc] back [ctrl+c] quit",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(view, want) {
|
||||||
|
t.Fatalf("view missing %q\nview:\n%s", want, view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestViewExportTargetsRendersDeviceMetadata(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.screen = screenExportTargets
|
||||||
|
m.targets = []platform.RemovableTarget{
|
||||||
|
{
|
||||||
|
Device: "/dev/sdb1",
|
||||||
|
FSType: "vfat",
|
||||||
|
Size: "29G",
|
||||||
|
Label: "BEEUSB",
|
||||||
|
Mountpoint: "/media/bee",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"Export audit",
|
||||||
|
"Select removable filesystem",
|
||||||
|
"> /dev/sdb1 [vfat 29G] label=BEEUSB mounted=/media/bee",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(view, want) {
|
||||||
|
t.Fatalf("view missing %q\nview:\n%s", want, view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestViewStaticFormRendersFields(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.screen = screenStaticForm
|
||||||
|
m.selectedIface = "enp1s0"
|
||||||
|
m.formFields = []formField{
|
||||||
|
{Label: "Address", Value: "192.0.2.10/24"},
|
||||||
|
{Label: "Gateway", Value: "192.0.2.1"},
|
||||||
|
{Label: "DNS", Value: "1.1.1.1"},
|
||||||
|
}
|
||||||
|
m.formIndex = 1
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"Static IPv4: enp1s0",
|
||||||
|
" Address: 192.0.2.10/24",
|
||||||
|
"> Gateway: 192.0.2.1",
|
||||||
|
" DNS: 1.1.1.1",
|
||||||
|
"[tab/↑/↓] move [enter] next/submit [backspace] delete [esc] cancel",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(view, want) {
|
||||||
|
t.Fatalf("view missing %q\nview:\n%s", want, view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestViewConfirmScreenMatchesPendingExport(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.screen = screenConfirm
|
||||||
|
m.pendingAction = actionExportAudit
|
||||||
|
m.selectedTarget = &platform.RemovableTarget{Device: "/dev/sdb1"}
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"Export audit",
|
||||||
|
"Copy latest audit JSON to /dev/sdb1?",
|
||||||
|
"> Confirm",
|
||||||
|
" Cancel",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(view, want) {
|
||||||
|
t.Fatalf("view missing %q\nview:\n%s", want, view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResultMsgClearsBusyAndPendingAction(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.busy = true
|
||||||
|
m.busyTitle = "Export audit"
|
||||||
|
m.pendingAction = actionExportAudit
|
||||||
|
m.screen = screenConfirm
|
||||||
|
|
||||||
|
next, _ := m.Update(resultMsg{title: "Export audit", body: "done", back: screenMain})
|
||||||
|
got := next.(model)
|
||||||
|
|
||||||
|
if got.busy {
|
||||||
|
t.Fatal("busy=true want false")
|
||||||
|
}
|
||||||
|
if got.busyTitle != "" {
|
||||||
|
t.Fatalf("busyTitle=%q want empty", got.busyTitle)
|
||||||
|
}
|
||||||
|
if got.pendingAction != actionNone {
|
||||||
|
t.Fatalf("pendingAction=%q want empty", got.pendingAction)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResultMsgErrorWithoutBodyFormatsCleanly(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
|
||||||
|
next, _ := m.Update(resultMsg{title: "Export audit", err: assertErr("boom"), back: screenMain})
|
||||||
|
got := next.(model)
|
||||||
|
|
||||||
|
if got.body != "ERROR: boom" {
|
||||||
|
t.Fatalf("body=%q want %q", got.body, "ERROR: boom")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type assertErr string
|
||||||
|
|
||||||
|
func (e assertErr) Error() string { return string(e) }
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ const (
|
|||||||
actionStaticIPv4 actionKind = "static_ipv4"
|
actionStaticIPv4 actionKind = "static_ipv4"
|
||||||
actionExportAudit actionKind = "export_audit"
|
actionExportAudit actionKind = "export_audit"
|
||||||
actionRunNvidiaSAT actionKind = "run_nvidia_sat"
|
actionRunNvidiaSAT actionKind = "run_nvidia_sat"
|
||||||
|
actionRunMemorySAT actionKind = "run_memory_sat"
|
||||||
|
actionRunStorageSAT actionKind = "run_storage_sat"
|
||||||
)
|
)
|
||||||
|
|
||||||
type model struct {
|
type model struct {
|
||||||
@@ -41,6 +43,7 @@ type model struct {
|
|||||||
prevScreen screen
|
prevScreen screen
|
||||||
cursor int
|
cursor int
|
||||||
busy bool
|
busy bool
|
||||||
|
busyTitle string
|
||||||
title string
|
title string
|
||||||
body string
|
body string
|
||||||
mainMenu []string
|
mainMenu []string
|
||||||
@@ -80,28 +83,29 @@ func newModel(application *app.App, runtimeMode runtimeenv.Mode) model {
|
|||||||
runtimeMode: runtimeMode,
|
runtimeMode: runtimeMode,
|
||||||
screen: screenMain,
|
screen: screenMain,
|
||||||
mainMenu: []string{
|
mainMenu: []string{
|
||||||
"Network setup",
|
"Network",
|
||||||
"bee service management",
|
"Services",
|
||||||
"System acceptance tests",
|
"Acceptance tests",
|
||||||
"Run audit now",
|
"Run audit",
|
||||||
"Export audit to removable drive",
|
"Export audit",
|
||||||
"Check required tools",
|
"Check tools",
|
||||||
"Show last audit log tail",
|
"Show health summary",
|
||||||
|
"Show audit logs",
|
||||||
"Exit",
|
"Exit",
|
||||||
},
|
},
|
||||||
networkMenu: []string{
|
networkMenu: []string{
|
||||||
"Show network status",
|
"Show status",
|
||||||
"DHCP on all interfaces",
|
"DHCP on all interfaces",
|
||||||
"DHCP on one interface",
|
"DHCP on one interface",
|
||||||
"Set static IPv4 on one interface",
|
"Set static IPv4",
|
||||||
"Back",
|
"Back",
|
||||||
},
|
},
|
||||||
serviceMenu: []string{
|
serviceMenu: []string{
|
||||||
"status",
|
"Status",
|
||||||
"restart",
|
"Restart",
|
||||||
"start",
|
"Start",
|
||||||
"stop",
|
"Stop",
|
||||||
"back",
|
"Back",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,12 +21,19 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
return m.updateKey(msg)
|
return m.updateKey(msg)
|
||||||
case resultMsg:
|
case resultMsg:
|
||||||
m.busy = false
|
m.busy = false
|
||||||
|
m.busyTitle = ""
|
||||||
m.title = msg.title
|
m.title = msg.title
|
||||||
if msg.err != nil {
|
if msg.err != nil {
|
||||||
m.body = fmt.Sprintf("%s\n\nERROR: %v", strings.TrimSpace(msg.body), msg.err)
|
body := strings.TrimSpace(msg.body)
|
||||||
|
if body == "" {
|
||||||
|
m.body = fmt.Sprintf("ERROR: %v", msg.err)
|
||||||
|
} else {
|
||||||
|
m.body = fmt.Sprintf("%s\n\nERROR: %v", body, msg.err)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
m.body = msg.body
|
m.body = msg.body
|
||||||
}
|
}
|
||||||
|
m.pendingAction = actionNone
|
||||||
if msg.back != "" {
|
if msg.back != "" {
|
||||||
m.prevScreen = msg.back
|
m.prevScreen = msg.back
|
||||||
} else {
|
} else {
|
||||||
@@ -37,8 +44,9 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
return m, nil
|
return m, nil
|
||||||
case servicesMsg:
|
case servicesMsg:
|
||||||
m.busy = false
|
m.busy = false
|
||||||
|
m.busyTitle = ""
|
||||||
if msg.err != nil {
|
if msg.err != nil {
|
||||||
m.title = "bee services"
|
m.title = "Services"
|
||||||
m.body = msg.err.Error()
|
m.body = msg.err.Error()
|
||||||
m.prevScreen = screenMain
|
m.prevScreen = screenMain
|
||||||
m.screen = screenOutput
|
m.screen = screenOutput
|
||||||
@@ -50,6 +58,7 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
return m, nil
|
return m, nil
|
||||||
case interfacesMsg:
|
case interfacesMsg:
|
||||||
m.busy = false
|
m.busy = false
|
||||||
|
m.busyTitle = ""
|
||||||
if msg.err != nil {
|
if msg.err != nil {
|
||||||
m.title = "interfaces"
|
m.title = "interfaces"
|
||||||
m.body = msg.err.Error()
|
m.body = msg.err.Error()
|
||||||
@@ -63,6 +72,7 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
return m, nil
|
return m, nil
|
||||||
case exportTargetsMsg:
|
case exportTargetsMsg:
|
||||||
m.busy = false
|
m.busy = false
|
||||||
|
m.busyTitle = ""
|
||||||
if msg.err != nil {
|
if msg.err != nil {
|
||||||
m.title = "export"
|
m.title = "export"
|
||||||
m.body = msg.err.Error()
|
m.body = msg.err.Error()
|
||||||
@@ -90,7 +100,7 @@ func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
case screenServiceAction:
|
case screenServiceAction:
|
||||||
return m.updateMenu(msg, len(m.serviceMenu), m.handleServiceActionMenu)
|
return m.updateMenu(msg, len(m.serviceMenu), m.handleServiceActionMenu)
|
||||||
case screenAcceptance:
|
case screenAcceptance:
|
||||||
return m.updateMenu(msg, 2, m.handleAcceptanceMenu)
|
return m.updateMenu(msg, 4, m.handleAcceptanceMenu)
|
||||||
case screenExportTargets:
|
case screenExportTargets:
|
||||||
return m.updateMenu(msg, len(m.targets), m.handleExportTargetsMenu)
|
return m.updateMenu(msg, len(m.targets), m.handleExportTargetsMenu)
|
||||||
case screenInterfacePick:
|
case screenInterfacePick:
|
||||||
@@ -101,6 +111,7 @@ func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
m.screen = m.prevScreen
|
m.screen = m.prevScreen
|
||||||
m.body = ""
|
m.body = ""
|
||||||
m.title = ""
|
m.title = ""
|
||||||
|
m.pendingAction = actionNone
|
||||||
return m, nil
|
return m, nil
|
||||||
case "ctrl+c":
|
case "ctrl+c":
|
||||||
return m, tea.Quit
|
return m, tea.Quit
|
||||||
|
|||||||
@@ -11,7 +11,11 @@ import (
|
|||||||
|
|
||||||
func (m model) View() string {
|
func (m model) View() string {
|
||||||
if m.busy {
|
if m.busy {
|
||||||
return "bee\n\nWorking...\n"
|
title := "bee"
|
||||||
|
if m.busyTitle != "" {
|
||||||
|
title = m.busyTitle
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s\n\nWorking...\n\n[ctrl+c] quit\n", title)
|
||||||
}
|
}
|
||||||
switch m.screen {
|
switch m.screen {
|
||||||
case screenMain:
|
case screenMain:
|
||||||
@@ -19,13 +23,13 @@ func (m model) View() string {
|
|||||||
case screenNetwork:
|
case screenNetwork:
|
||||||
return renderMenu("Network", "Select action", m.networkMenu, m.cursor)
|
return renderMenu("Network", "Select action", m.networkMenu, m.cursor)
|
||||||
case screenServices:
|
case screenServices:
|
||||||
return renderMenu("bee services", "Select service", m.services, m.cursor)
|
return renderMenu("Services", "Select service", m.services, m.cursor)
|
||||||
case screenServiceAction:
|
case screenServiceAction:
|
||||||
items := make([]string, len(m.serviceMenu))
|
items := make([]string, len(m.serviceMenu))
|
||||||
copy(items, m.serviceMenu)
|
copy(items, m.serviceMenu)
|
||||||
return renderMenu("Service: "+m.selectedService, "Select action", items, m.cursor)
|
return renderMenu("Service: "+m.selectedService, "Select action", items, m.cursor)
|
||||||
case screenAcceptance:
|
case screenAcceptance:
|
||||||
return renderMenu("System acceptance tests", "Select action", []string{"Run NVIDIA command pack", "Back"}, m.cursor)
|
return renderMenu("Acceptance tests", "Select action", []string{"Run NVIDIA command pack", "Run memory test", "Run storage diagnostic pack", "Back"}, m.cursor)
|
||||||
case screenExportTargets:
|
case screenExportTargets:
|
||||||
return renderMenu("Export audit", "Select removable filesystem", renderTargetItems(m.targets), m.cursor)
|
return renderMenu("Export audit", "Select removable filesystem", renderTargetItems(m.targets), m.cursor)
|
||||||
case screenInterfacePick:
|
case screenInterfacePick:
|
||||||
@@ -51,6 +55,10 @@ func (m model) confirmBody() (string, string) {
|
|||||||
return "Export audit", fmt.Sprintf("Copy latest audit JSON to %s?", m.selectedTarget.Device)
|
return "Export audit", fmt.Sprintf("Copy latest audit JSON to %s?", m.selectedTarget.Device)
|
||||||
case actionRunNvidiaSAT:
|
case actionRunNvidiaSAT:
|
||||||
return "NVIDIA SAT", "Run NVIDIA acceptance command pack?"
|
return "NVIDIA SAT", "Run NVIDIA acceptance command pack?"
|
||||||
|
case actionRunMemorySAT:
|
||||||
|
return "Memory SAT", "Run runtime memory test with memtester?"
|
||||||
|
case actionRunStorageSAT:
|
||||||
|
return "Storage SAT", "Run storage diagnostic pack and start short self-tests where supported?"
|
||||||
default:
|
default:
|
||||||
return "Confirm", "Proceed?"
|
return "Confirm", "Proceed?"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ local-fs.target
|
|||||||
Reason: the modules are shipped in the ISO overlay under `/usr/local/lib/nvidia/`, not in the host module tree.
|
Reason: the modules are shipped in the ISO overlay under `/usr/local/lib/nvidia/`, not in the host module tree.
|
||||||
- `bee-audit.service` does not wait for `network-online.target`; audit is local and must run even if DHCP is broken.
|
- `bee-audit.service` does not wait for `network-online.target`; audit is local and must run even if DHCP is broken.
|
||||||
- `bee-audit.service` logs audit failures but does not turn partial collector problems into a boot blocker.
|
- `bee-audit.service` logs audit failures but does not turn partial collector problems into a boot blocker.
|
||||||
|
- Audit JSON now includes a `hardware.summary` block with overall verdict and warning/failure counts.
|
||||||
|
|
||||||
## Console and login flow
|
## Console and login flow
|
||||||
|
|
||||||
@@ -59,7 +60,7 @@ build.sh [--authorized-keys /path/to/keys]
|
|||||||
3. inject authorized_keys into staged `root/.ssh/` (or set password fallback marker)
|
3. inject authorized_keys into staged `root/.ssh/` (or set password fallback marker)
|
||||||
4. copy `bee` binary → staged `/usr/local/bin/bee`
|
4. copy `bee` binary → staged `/usr/local/bin/bee`
|
||||||
5. copy vendor binaries from `iso/vendor/` → staged `/usr/local/bin/`
|
5. copy vendor binaries from `iso/vendor/` → staged `/usr/local/bin/`
|
||||||
(`storcli64`, `sas2ircu`, `sas3ircu`, `mstflint` — each optional)
|
(`storcli64`, `sas2ircu`, `sas3ircu`, `arcconf`, `ssacli` — optional; `mstflint` comes from the Debian package set)
|
||||||
6. `build-nvidia-module.sh`:
|
6. `build-nvidia-module.sh`:
|
||||||
a. install Debian kernel headers if missing
|
a. install Debian kernel headers if missing
|
||||||
b. download NVIDIA `.run` installer (sha256 verified, cached in `dist/`)
|
b. download NVIDIA `.run` installer (sha256 verified, cached in `dist/`)
|
||||||
@@ -119,10 +120,15 @@ Current validation state:
|
|||||||
3. memory collector (dmidecode -t 17)
|
3. memory collector (dmidecode -t 17)
|
||||||
4. storage collector (lsblk -J, smartctl -j, nvme id-ctrl, nvme smart-log)
|
4. storage collector (lsblk -J, smartctl -j, nvme id-ctrl, nvme smart-log)
|
||||||
5. pcie collector (lspci -vmm -D, /sys/bus/pci/devices/)
|
5. pcie collector (lspci -vmm -D, /sys/bus/pci/devices/)
|
||||||
6. psu collector (ipmitool fru — silent if no /dev/ipmi0)
|
6. psu collector (ipmitool fru + sdr — silent if no /dev/ipmi0)
|
||||||
7. nvidia enrichment (nvidia-smi — skipped if binary absent or driver not loaded)
|
7. nvidia enrichment (nvidia-smi — skipped if binary absent or driver not loaded)
|
||||||
8. output JSON → /var/log/bee-audit.json
|
8. output JSON → /var/log/bee-audit.json
|
||||||
9. QR summary to stdout (qrencode if available)
|
9. QR summary to stdout (qrencode if available)
|
||||||
```
|
```
|
||||||
|
|
||||||
Every collector returns `nil, nil` on tool-not-found. Errors are logged, never fatal.
|
Every collector returns `nil, nil` on tool-not-found. Errors are logged, never fatal.
|
||||||
|
|
||||||
|
Acceptance flows:
|
||||||
|
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-stress`
|
||||||
|
- `bee sat memory` → `memtester` archive
|
||||||
|
- `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
## In scope
|
## In scope
|
||||||
|
|
||||||
- Read-only hardware inventory: board, CPU, memory, storage, PCIe, PSU, GPU, NIC, RAID
|
- Read-only hardware inventory: board, CPU, memory, storage, PCIe, PSU, GPU, NIC, RAID
|
||||||
|
- Machine-readable health summary derived from collector verdicts
|
||||||
|
- Operator-triggered acceptance tests for NVIDIA, memory, and storage
|
||||||
|
- NVIDIA SAT includes both diagnostic collection and lightweight GPU stress via `bee-gpu-stress`
|
||||||
- Automatic boot audit with operator-facing local console and SSH access
|
- Automatic boot audit with operator-facing local console and SSH access
|
||||||
- NVIDIA proprietary driver loaded at boot for GPU enrichment via `nvidia-smi`
|
- NVIDIA proprietary driver loaded at boot for GPU enrichment via `nvidia-smi`
|
||||||
- SSH access (OpenSSH) always available for inspection and debugging
|
- SSH access (OpenSSH) always available for inspection and debugging
|
||||||
@@ -81,7 +84,7 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
| `audit/internal/schema/` | HardwareIngestRequest types |
|
| `audit/internal/schema/` | HardwareIngestRequest types |
|
||||||
| `iso/builder/` | ISO build scripts and `live-build` profile |
|
| `iso/builder/` | ISO build scripts and `live-build` profile |
|
||||||
| `iso/overlay/` | Source overlay copied into a staged build overlay |
|
| `iso/overlay/` | Source overlay copied into a staged build overlay |
|
||||||
| `iso/vendor/` | Optional pre-built vendor binaries (storcli64, sas2ircu, sas3ircu, mstflint, …) |
|
| `iso/vendor/` | Optional pre-built vendor binaries (storcli64, sas2ircu, sas3ircu, arcconf, ssacli, …) |
|
||||||
| `iso/builder/VERSIONS` | Pinned versions: Debian, Go, NVIDIA driver, kernel ABI |
|
| `iso/builder/VERSIONS` | Pinned versions: Debian, Go, NVIDIA driver, kernel ABI |
|
||||||
| `iso/builder/smoketest.sh` | Post-boot smoke test — run via SSH to verify live ISO |
|
| `iso/builder/smoketest.sh` | Post-boot smoke test — run via SSH to verify live ISO |
|
||||||
| `iso/overlay/etc/profile.d/bee.sh` | `menu` helper + tty1 auto-start policy |
|
| `iso/overlay/etc/profile.d/bee.sh` | `menu` helper + tty1 auto-start policy |
|
||||||
|
|||||||
314
iso/builder/bee-gpu-stress.c
Normal file
314
iso/builder/bee-gpu-stress.c
Normal file
@@ -0,0 +1,314 @@
|
|||||||
|
#define _POSIX_C_SOURCE 200809L
|
||||||
|
|
||||||
|
#include <dlfcn.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
typedef int CUdevice;
|
||||||
|
typedef uint64_t CUdeviceptr;
|
||||||
|
typedef int CUresult;
|
||||||
|
typedef void *CUcontext;
|
||||||
|
typedef void *CUmodule;
|
||||||
|
typedef void *CUfunction;
|
||||||
|
typedef void *CUstream;
|
||||||
|
|
||||||
|
#define CU_SUCCESS 0
|
||||||
|
|
||||||
|
static const char *ptx_source =
|
||||||
|
".version 6.0\n"
|
||||||
|
".target sm_30\n"
|
||||||
|
".address_size 64\n"
|
||||||
|
"\n"
|
||||||
|
".visible .entry burn(\n"
|
||||||
|
" .param .u64 data,\n"
|
||||||
|
" .param .u32 words,\n"
|
||||||
|
" .param .u32 rounds\n"
|
||||||
|
")\n"
|
||||||
|
"{\n"
|
||||||
|
" .reg .pred %p<2>;\n"
|
||||||
|
" .reg .b32 %r<8>;\n"
|
||||||
|
" .reg .b64 %rd<5>;\n"
|
||||||
|
"\n"
|
||||||
|
" ld.param.u64 %rd1, [data];\n"
|
||||||
|
" ld.param.u32 %r1, [words];\n"
|
||||||
|
" ld.param.u32 %r2, [rounds];\n"
|
||||||
|
" mov.u32 %r3, %ctaid.x;\n"
|
||||||
|
" mov.u32 %r4, %ntid.x;\n"
|
||||||
|
" mov.u32 %r5, %tid.x;\n"
|
||||||
|
" mad.lo.s32 %r0, %r3, %r4, %r5;\n"
|
||||||
|
" setp.ge.u32 %p0, %r0, %r1;\n"
|
||||||
|
" @%p0 bra DONE;\n"
|
||||||
|
" mul.wide.u32 %rd2, %r0, 4;\n"
|
||||||
|
" add.s64 %rd3, %rd1, %rd2;\n"
|
||||||
|
" ld.global.u32 %r6, [%rd3];\n"
|
||||||
|
"LOOP:\n"
|
||||||
|
" setp.eq.u32 %p1, %r2, 0;\n"
|
||||||
|
" @%p1 bra STORE;\n"
|
||||||
|
" mad.lo.u32 %r6, %r6, 1664525, 1013904223;\n"
|
||||||
|
" sub.u32 %r2, %r2, 1;\n"
|
||||||
|
" bra LOOP;\n"
|
||||||
|
"STORE:\n"
|
||||||
|
" st.global.u32 [%rd3], %r6;\n"
|
||||||
|
"DONE:\n"
|
||||||
|
" ret;\n"
|
||||||
|
"}\n";
|
||||||
|
|
||||||
|
typedef CUresult (*cuInit_fn)(unsigned int);
|
||||||
|
typedef CUresult (*cuDeviceGetCount_fn)(int *);
|
||||||
|
typedef CUresult (*cuDeviceGet_fn)(CUdevice *, int);
|
||||||
|
typedef CUresult (*cuDeviceGetName_fn)(char *, int, CUdevice);
|
||||||
|
typedef CUresult (*cuCtxCreate_fn)(CUcontext *, unsigned int, CUdevice);
|
||||||
|
typedef CUresult (*cuCtxDestroy_fn)(CUcontext);
|
||||||
|
typedef CUresult (*cuCtxSynchronize_fn)(void);
|
||||||
|
typedef CUresult (*cuMemAlloc_fn)(CUdeviceptr *, size_t);
|
||||||
|
typedef CUresult (*cuMemFree_fn)(CUdeviceptr);
|
||||||
|
typedef CUresult (*cuMemcpyHtoD_fn)(CUdeviceptr, const void *, size_t);
|
||||||
|
typedef CUresult (*cuMemcpyDtoH_fn)(void *, CUdeviceptr, size_t);
|
||||||
|
typedef CUresult (*cuModuleLoadDataEx_fn)(CUmodule *, const void *, unsigned int, void *, void *);
|
||||||
|
typedef CUresult (*cuModuleGetFunction_fn)(CUfunction *, CUmodule, const char *);
|
||||||
|
typedef CUresult (*cuLaunchKernel_fn)(CUfunction,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int,
|
||||||
|
CUstream,
|
||||||
|
void **,
|
||||||
|
void **);
|
||||||
|
typedef CUresult (*cuGetErrorName_fn)(CUresult, const char **);
|
||||||
|
typedef CUresult (*cuGetErrorString_fn)(CUresult, const char **);
|
||||||
|
|
||||||
|
struct cuda_api {
|
||||||
|
void *lib;
|
||||||
|
cuInit_fn cuInit;
|
||||||
|
cuDeviceGetCount_fn cuDeviceGetCount;
|
||||||
|
cuDeviceGet_fn cuDeviceGet;
|
||||||
|
cuDeviceGetName_fn cuDeviceGetName;
|
||||||
|
cuCtxCreate_fn cuCtxCreate;
|
||||||
|
cuCtxDestroy_fn cuCtxDestroy;
|
||||||
|
cuCtxSynchronize_fn cuCtxSynchronize;
|
||||||
|
cuMemAlloc_fn cuMemAlloc;
|
||||||
|
cuMemFree_fn cuMemFree;
|
||||||
|
cuMemcpyHtoD_fn cuMemcpyHtoD;
|
||||||
|
cuMemcpyDtoH_fn cuMemcpyDtoH;
|
||||||
|
cuModuleLoadDataEx_fn cuModuleLoadDataEx;
|
||||||
|
cuModuleGetFunction_fn cuModuleGetFunction;
|
||||||
|
cuLaunchKernel_fn cuLaunchKernel;
|
||||||
|
cuGetErrorName_fn cuGetErrorName;
|
||||||
|
cuGetErrorString_fn cuGetErrorString;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int load_symbol(void *lib, const char *name, void **out) {
|
||||||
|
*out = dlsym(lib, name);
|
||||||
|
return *out != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int load_cuda(struct cuda_api *api) {
|
||||||
|
memset(api, 0, sizeof(*api));
|
||||||
|
api->lib = dlopen("libcuda.so.1", RTLD_NOW | RTLD_LOCAL);
|
||||||
|
if (!api->lib) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return
|
||||||
|
load_symbol(api->lib, "cuInit", (void **)&api->cuInit) &&
|
||||||
|
load_symbol(api->lib, "cuDeviceGetCount", (void **)&api->cuDeviceGetCount) &&
|
||||||
|
load_symbol(api->lib, "cuDeviceGet", (void **)&api->cuDeviceGet) &&
|
||||||
|
load_symbol(api->lib, "cuDeviceGetName", (void **)&api->cuDeviceGetName) &&
|
||||||
|
load_symbol(api->lib, "cuCtxCreate_v2", (void **)&api->cuCtxCreate) &&
|
||||||
|
load_symbol(api->lib, "cuCtxDestroy_v2", (void **)&api->cuCtxDestroy) &&
|
||||||
|
load_symbol(api->lib, "cuCtxSynchronize", (void **)&api->cuCtxSynchronize) &&
|
||||||
|
load_symbol(api->lib, "cuMemAlloc_v2", (void **)&api->cuMemAlloc) &&
|
||||||
|
load_symbol(api->lib, "cuMemFree_v2", (void **)&api->cuMemFree) &&
|
||||||
|
load_symbol(api->lib, "cuMemcpyHtoD_v2", (void **)&api->cuMemcpyHtoD) &&
|
||||||
|
load_symbol(api->lib, "cuMemcpyDtoH_v2", (void **)&api->cuMemcpyDtoH) &&
|
||||||
|
load_symbol(api->lib, "cuModuleLoadDataEx", (void **)&api->cuModuleLoadDataEx) &&
|
||||||
|
load_symbol(api->lib, "cuModuleGetFunction", (void **)&api->cuModuleGetFunction) &&
|
||||||
|
load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *cu_error_name(struct cuda_api *api, CUresult rc) {
|
||||||
|
const char *value = NULL;
|
||||||
|
if (api->cuGetErrorName && api->cuGetErrorName(rc, &value) == CU_SUCCESS && value) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
return "CUDA_ERROR";
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *cu_error_string(struct cuda_api *api, CUresult rc) {
|
||||||
|
const char *value = NULL;
|
||||||
|
if (api->cuGetErrorString && api->cuGetErrorString(rc, &value) == CU_SUCCESS && value) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_rc(struct cuda_api *api, const char *step, CUresult rc) {
|
||||||
|
if (rc == CU_SUCCESS) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fprintf(stderr, "%s failed: %s (%s)\n", step, cu_error_name(api, rc), cu_error_string(api, rc));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double now_seconds(void) {
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return (double)ts.tv_sec + ((double)ts.tv_nsec / 1000000000.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
int seconds = 5;
|
||||||
|
int size_mb = 64;
|
||||||
|
for (int i = 1; i < argc; i++) {
|
||||||
|
if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
|
||||||
|
seconds = atoi(argv[++i]);
|
||||||
|
} else if ((strcmp(argv[i], "--size-mb") == 0 || strcmp(argv[i], "-m") == 0) && i + 1 < argc) {
|
||||||
|
size_mb = atoi(argv[++i]);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "usage: %s [--seconds N] [--size-mb N]\n", argv[0]);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (seconds <= 0) {
|
||||||
|
seconds = 5;
|
||||||
|
}
|
||||||
|
if (size_mb <= 0) {
|
||||||
|
size_mb = 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct cuda_api api;
|
||||||
|
if (!load_cuda(&api)) {
|
||||||
|
fprintf(stderr, "failed to load libcuda.so.1 or required Driver API symbols\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
load_symbol(api.lib, "cuGetErrorName", (void **)&api.cuGetErrorName);
|
||||||
|
load_symbol(api.lib, "cuGetErrorString", (void **)&api.cuGetErrorString);
|
||||||
|
|
||||||
|
if (!check_rc(&api, "cuInit", api.cuInit(0))) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
|
if (!check_rc(&api, "cuDeviceGetCount", api.cuDeviceGetCount(&count))) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (count <= 0) {
|
||||||
|
fprintf(stderr, "no CUDA devices found\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUdevice dev = 0;
|
||||||
|
if (!check_rc(&api, "cuDeviceGet", api.cuDeviceGet(&dev, 0))) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
char name[128] = {0};
|
||||||
|
if (!check_rc(&api, "cuDeviceGetName", api.cuDeviceGetName(name, (int)sizeof(name), dev))) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUcontext ctx = NULL;
|
||||||
|
if (!check_rc(&api, "cuCtxCreate", api.cuCtxCreate(&ctx, 0, dev))) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t bytes = (size_t)size_mb * 1024 * 1024;
|
||||||
|
uint32_t words = (uint32_t)(bytes / sizeof(uint32_t));
|
||||||
|
if (words < 1024) {
|
||||||
|
words = 1024;
|
||||||
|
bytes = (size_t)words * sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t *host = (uint32_t *)malloc(bytes);
|
||||||
|
if (!host) {
|
||||||
|
fprintf(stderr, "malloc failed\n");
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < words; i++) {
|
||||||
|
host[i] = i ^ 0x12345678u;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUdeviceptr device_mem = 0;
|
||||||
|
if (!check_rc(&api, "cuMemAlloc", api.cuMemAlloc(&device_mem, bytes))) {
|
||||||
|
free(host);
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!check_rc(&api, "cuMemcpyHtoD", api.cuMemcpyHtoD(device_mem, host, bytes))) {
|
||||||
|
api.cuMemFree(device_mem);
|
||||||
|
free(host);
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUmodule module = NULL;
|
||||||
|
if (!check_rc(&api, "cuModuleLoadDataEx", api.cuModuleLoadDataEx(&module, ptx_source, 0, NULL, NULL))) {
|
||||||
|
api.cuMemFree(device_mem);
|
||||||
|
free(host);
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUfunction kernel = NULL;
|
||||||
|
if (!check_rc(&api, "cuModuleGetFunction", api.cuModuleGetFunction(&kernel, module, "burn"))) {
|
||||||
|
api.cuMemFree(device_mem);
|
||||||
|
free(host);
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int threads = 256;
|
||||||
|
unsigned int blocks = (words + threads - 1) / threads;
|
||||||
|
uint32_t rounds = 256;
|
||||||
|
void *params[] = {&device_mem, &words, &rounds};
|
||||||
|
|
||||||
|
double start = now_seconds();
|
||||||
|
double deadline = start + (double)seconds;
|
||||||
|
unsigned long iterations = 0;
|
||||||
|
while (now_seconds() < deadline) {
|
||||||
|
if (!check_rc(&api, "cuLaunchKernel",
|
||||||
|
api.cuLaunchKernel(kernel, blocks, 1, 1, threads, 1, 1, 0, NULL, params, NULL))) {
|
||||||
|
api.cuMemFree(device_mem);
|
||||||
|
free(host);
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
iterations++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!check_rc(&api, "cuCtxSynchronize", api.cuCtxSynchronize())) {
|
||||||
|
api.cuMemFree(device_mem);
|
||||||
|
free(host);
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!check_rc(&api, "cuMemcpyDtoH", api.cuMemcpyDtoH(host, device_mem, bytes))) {
|
||||||
|
api.cuMemFree(device_mem);
|
||||||
|
free(host);
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t checksum = 0;
|
||||||
|
for (uint32_t i = 0; i < words; i += words / 256 ? words / 256 : 1) {
|
||||||
|
checksum += host[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
double elapsed = now_seconds() - start;
|
||||||
|
printf("device=%s\n", name);
|
||||||
|
printf("duration_s=%.2f\n", elapsed);
|
||||||
|
printf("buffer_mb=%d\n", size_mb);
|
||||||
|
printf("iterations=%lu\n", iterations);
|
||||||
|
printf("checksum=%llu\n", (unsigned long long)checksum);
|
||||||
|
printf("status=OK\n");
|
||||||
|
|
||||||
|
api.cuMemFree(device_mem);
|
||||||
|
free(host);
|
||||||
|
api.cuCtxDestroy(ctx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -41,6 +41,7 @@ echo ""
|
|||||||
|
|
||||||
# --- compile bee binary (static, Linux amd64) ---
|
# --- compile bee binary (static, Linux amd64) ---
|
||||||
BEE_BIN="${DIST_DIR}/bee-linux-amd64"
|
BEE_BIN="${DIST_DIR}/bee-linux-amd64"
|
||||||
|
GPU_STRESS_BIN="${DIST_DIR}/bee-gpu-stress-linux-amd64"
|
||||||
NEED_BUILD=1
|
NEED_BUILD=1
|
||||||
if [ -f "$BEE_BIN" ]; then
|
if [ -f "$BEE_BIN" ]; then
|
||||||
NEWEST_SRC=$(find "${REPO_ROOT}/audit" -name '*.go' -newer "$BEE_BIN" | head -1)
|
NEWEST_SRC=$(find "${REPO_ROOT}/audit" -name '*.go' -newer "$BEE_BIN" | head -1)
|
||||||
@@ -70,6 +71,22 @@ else
|
|||||||
echo "=== bee binary up to date, skipping build ==="
|
echo "=== bee binary up to date, skipping build ==="
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
GPU_STRESS_NEED_BUILD=1
|
||||||
|
if [ -f "$GPU_STRESS_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_STRESS_BIN" ]; then
|
||||||
|
GPU_STRESS_NEED_BUILD=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
|
||||||
|
echo "=== building bee-gpu-stress ==="
|
||||||
|
gcc -O2 -s -Wall -Wextra \
|
||||||
|
-o "$GPU_STRESS_BIN" \
|
||||||
|
"${BUILDER_DIR}/bee-gpu-stress.c" \
|
||||||
|
-ldl
|
||||||
|
echo "binary: $GPU_STRESS_BIN"
|
||||||
|
else
|
||||||
|
echo "=== bee-gpu-stress up to date, skipping build ==="
|
||||||
|
fi
|
||||||
|
|
||||||
echo "=== preparing staged overlay ==="
|
echo "=== preparing staged overlay ==="
|
||||||
rm -rf "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"
|
rm -rf "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"
|
||||||
mkdir -p "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"
|
mkdir -p "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"
|
||||||
@@ -80,6 +97,7 @@ rm -f \
|
|||||||
"${OVERLAY_STAGE_DIR}/etc/bee-release" \
|
"${OVERLAY_STAGE_DIR}/etc/bee-release" \
|
||||||
"${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys" \
|
"${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys" \
|
||||||
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee" \
|
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee" \
|
||||||
|
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress" \
|
||||||
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
|
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
|
||||||
|
|
||||||
# --- inject authorized_keys for SSH access ---
|
# --- inject authorized_keys for SSH access ---
|
||||||
@@ -119,13 +137,15 @@ fi
|
|||||||
mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin"
|
mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin"
|
||||||
cp "${DIST_DIR}/bee-linux-amd64" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee"
|
cp "${DIST_DIR}/bee-linux-amd64" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee"
|
||||||
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee"
|
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee"
|
||||||
|
cp "${GPU_STRESS_BIN}" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
|
||||||
|
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
|
||||||
|
|
||||||
# --- inject smoketest into overlay so it runs directly on the live CD ---
|
# --- inject smoketest into overlay so it runs directly on the live CD ---
|
||||||
cp "${BUILDER_DIR}/smoketest.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
|
cp "${BUILDER_DIR}/smoketest.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
|
||||||
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
|
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
|
||||||
|
|
||||||
# --- vendor utilities (optional pre-fetched binaries) ---
|
# --- vendor utilities (optional pre-fetched binaries) ---
|
||||||
for tool in storcli64 sas2ircu sas3ircu mstflint; do
|
for tool in storcli64 sas2ircu sas3ircu arcconf ssacli; do
|
||||||
if [ -f "${VENDOR_DIR}/${tool}" ]; then
|
if [ -f "${VENDOR_DIR}/${tool}" ]; then
|
||||||
cp "${VENDOR_DIR}/${tool}" "${OVERLAY_STAGE_DIR}/usr/local/bin/${tool}"
|
cp "${VENDOR_DIR}/${tool}" "${OVERLAY_STAGE_DIR}/usr/local/bin/${tool}"
|
||||||
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/${tool}" || true
|
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/${tool}" || true
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ lshw
|
|||||||
iproute2
|
iproute2
|
||||||
isc-dhcp-client
|
isc-dhcp-client
|
||||||
iputils-ping
|
iputils-ping
|
||||||
|
ethtool
|
||||||
qemu-guest-agent
|
qemu-guest-agent
|
||||||
|
|
||||||
# SSH
|
# SSH
|
||||||
@@ -27,6 +28,8 @@ mc
|
|||||||
htop
|
htop
|
||||||
sudo
|
sudo
|
||||||
zstd
|
zstd
|
||||||
|
mstflint
|
||||||
|
memtester
|
||||||
|
|
||||||
# QR codes (for displaying audit results)
|
# QR codes (for displaying audit results)
|
||||||
qrencode
|
qrencode
|
||||||
|
|||||||
Reference in New Issue
Block a user