Compare commits
8 Commits
0c16616cc9
...
audit/v1.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9a1df9b1ba | ||
|
|
30cf014d58 | ||
|
|
27d478aed6 | ||
|
|
d36e8442a9 | ||
|
|
b345b0d14d | ||
|
|
0a1ac2ab9f | ||
|
|
1e62f828c6 | ||
|
|
f8c997d272 |
@@ -173,11 +173,20 @@ func (a *App) RuntimeHealthResult() ActionResult {
|
||||
if err != nil {
|
||||
return ActionResult{Title: "Runtime issues", Body: "No runtime health found."}
|
||||
}
|
||||
driverLabel := "Driver ready"
|
||||
accelLabel := "CUDA ready"
|
||||
switch a.sat.DetectGPUVendor() {
|
||||
case "amd":
|
||||
driverLabel = "AMDGPU ready"
|
||||
accelLabel = "ROCm SMI ready"
|
||||
case "nvidia":
|
||||
driverLabel = "NVIDIA ready"
|
||||
}
|
||||
var body strings.Builder
|
||||
fmt.Fprintf(&body, "Status: %s\n", firstNonEmpty(health.Status, "UNKNOWN"))
|
||||
fmt.Fprintf(&body, "Export dir: %s\n", firstNonEmpty(health.ExportDir, DefaultExportDir))
|
||||
fmt.Fprintf(&body, "Driver ready: %t\n", health.DriverReady)
|
||||
fmt.Fprintf(&body, "CUDA ready: %t\n", health.CUDAReady)
|
||||
fmt.Fprintf(&body, "%s: %t\n", driverLabel, health.DriverReady)
|
||||
fmt.Fprintf(&body, "%s: %t\n", accelLabel, health.CUDAReady)
|
||||
fmt.Fprintf(&body, "Network: %s", firstNonEmpty(health.NetworkStatus, "UNKNOWN"))
|
||||
if len(health.Issues) > 0 {
|
||||
body.WriteString("\n\nIssues:\n")
|
||||
@@ -238,9 +247,9 @@ func (a *App) ExportSupportBundle(target platform.RemovableTarget) (string, erro
|
||||
|
||||
func (a *App) ExportSupportBundleResult(target platform.RemovableTarget) (ActionResult, error) {
|
||||
path, err := a.ExportSupportBundle(target)
|
||||
body := "Support bundle exported."
|
||||
body := "Support bundle exported. USB target unmounted and safe to remove."
|
||||
if path != "" {
|
||||
body = "Support bundle exported to " + path
|
||||
body = "Support bundle exported to " + path + ".\n\nUSB target unmounted and safe to remove."
|
||||
}
|
||||
return ActionResult{Title: "Export support bundle", Body: body}, err
|
||||
}
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
@@ -57,13 +60,22 @@ func (f fakeServices) ServiceDo(name string, action platform.ServiceAction) (str
|
||||
return f.serviceDoFn(name, action)
|
||||
}
|
||||
|
||||
type fakeExports struct{}
|
||||
type fakeExports struct {
|
||||
listTargetsFn func() ([]platform.RemovableTarget, error)
|
||||
exportToTargetFn func(string, platform.RemovableTarget) (string, error)
|
||||
}
|
||||
|
||||
func (f fakeExports) ListRemovableTargets() ([]platform.RemovableTarget, error) {
|
||||
if f.listTargetsFn != nil {
|
||||
return f.listTargetsFn()
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (f fakeExports) ExportFileToTarget(src string, target platform.RemovableTarget) (string, error) {
|
||||
if f.exportToTargetFn != nil {
|
||||
return f.exportToTargetFn(src, target)
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
@@ -97,10 +109,14 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
|
||||
}
|
||||
|
||||
type fakeSAT struct {
|
||||
runNvidiaFn func(string) (string, error)
|
||||
runMemoryFn func(string) (string, error)
|
||||
runStorageFn func(string) (string, error)
|
||||
runCPUFn func(string, int) (string, error)
|
||||
runNvidiaFn func(string) (string, error)
|
||||
runMemoryFn func(string) (string, error)
|
||||
runStorageFn func(string) (string, error)
|
||||
runCPUFn func(string, int) (string, error)
|
||||
detectVendorFn func() string
|
||||
listAMDGPUsFn func() ([]platform.AMDGPUInfo, error)
|
||||
runAMDPackFn func(string) (string, error)
|
||||
listNvidiaGPUsFn func() ([]platform.NvidiaGPU, error)
|
||||
}
|
||||
|
||||
func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
||||
@@ -112,6 +128,9 @@ func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir s
|
||||
}
|
||||
|
||||
func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
|
||||
if f.listNvidiaGPUsFn != nil {
|
||||
return f.listNvidiaGPUsFn()
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -130,11 +149,26 @@ func (f fakeSAT) RunCPUAcceptancePack(baseDir string, durationSec int) (string,
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (f fakeSAT) DetectGPUVendor() string { return "" }
|
||||
func (f fakeSAT) DetectGPUVendor() string {
|
||||
if f.detectVendorFn != nil {
|
||||
return f.detectVendorFn()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) { return nil, nil }
|
||||
func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
|
||||
if f.listAMDGPUsFn != nil {
|
||||
return f.listAMDGPUsFn()
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (f fakeSAT) RunAMDAcceptancePack(baseDir string) (string, error) { return "", nil }
|
||||
func (f fakeSAT) RunAMDAcceptancePack(baseDir string) (string, error) {
|
||||
if f.runAMDPackFn != nil {
|
||||
return f.runAMDPackFn(baseDir)
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
|
||||
t.Parallel()
|
||||
@@ -394,6 +428,44 @@ func TestActionResultsUseFallbackBody(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tmp := t.TempDir()
|
||||
oldExportDir := DefaultExportDir
|
||||
DefaultExportDir = tmp
|
||||
t.Cleanup(func() { DefaultExportDir = oldExportDir })
|
||||
|
||||
if err := os.WriteFile(filepath.Join(tmp, "bee-audit.json"), []byte("{}\n"), 0644); err != nil {
|
||||
t.Fatalf("write bee-audit.json: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(tmp, "bee-audit.log"), []byte("audit ok\n"), 0644); err != nil {
|
||||
t.Fatalf("write bee-audit.log: %v", err)
|
||||
}
|
||||
|
||||
a := &App{
|
||||
exports: fakeExports{
|
||||
exportToTargetFn: func(src string, target platform.RemovableTarget) (string, error) {
|
||||
if filepath.Base(src) == "" {
|
||||
t.Fatalf("expected non-empty source path")
|
||||
}
|
||||
return "/media/bee/" + filepath.Base(src), nil
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
result, err := a.ExportSupportBundleResult(platform.RemovableTarget{Device: "/dev/sdb1"})
|
||||
if err != nil {
|
||||
t.Fatalf("ExportSupportBundleResult error: %v", err)
|
||||
}
|
||||
if result.Title != "Export support bundle" {
|
||||
t.Fatalf("title=%q want %q", result.Title, "Export support bundle")
|
||||
}
|
||||
if want := "USB target unmounted and safe to remove."; !contains(result.Body, want) {
|
||||
t.Fatalf("body missing %q\nbody=%s", want, result.Body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunNvidiaAcceptancePackResult(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@@ -516,6 +588,9 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
|
||||
if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run.tar.gz"), []byte("nested sat archive"), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
archive, err := BuildSupportBundle(exportDir)
|
||||
if err != nil {
|
||||
@@ -524,6 +599,44 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
|
||||
if _, err := os.Stat(archive); err != nil {
|
||||
t.Fatalf("archive stat: %v", err)
|
||||
}
|
||||
|
||||
file, err := os.Open(archive)
|
||||
if err != nil {
|
||||
t.Fatalf("open archive: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
gzr, err := gzip.NewReader(file)
|
||||
if err != nil {
|
||||
t.Fatalf("gzip reader: %v", err)
|
||||
}
|
||||
defer gzr.Close()
|
||||
|
||||
tr := tar.NewReader(gzr)
|
||||
var names []string
|
||||
for {
|
||||
hdr, err := tr.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("read tar entry: %v", err)
|
||||
}
|
||||
names = append(names, hdr.Name)
|
||||
}
|
||||
|
||||
var foundRaw bool
|
||||
for _, name := range names {
|
||||
if contains(name, "/export/bee-sat/memory-run/verbose.log") {
|
||||
foundRaw = true
|
||||
}
|
||||
if contains(name, "/export/bee-sat/memory-run.tar.gz") {
|
||||
t.Fatalf("support bundle should not contain nested SAT archive: %s", name)
|
||||
}
|
||||
}
|
||||
if !foundRaw {
|
||||
t.Fatalf("support bundle missing raw SAT log, names=%v", names)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMainBanner(t *testing.T) {
|
||||
@@ -600,6 +713,44 @@ func TestMainBanner(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRuntimeHealthResultUsesAMDLabels(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
oldRuntimePath := DefaultRuntimeJSONPath
|
||||
DefaultRuntimeJSONPath = filepath.Join(tmp, "runtime-health.json")
|
||||
t.Cleanup(func() { DefaultRuntimeJSONPath = oldRuntimePath })
|
||||
|
||||
raw, err := json.Marshal(schema.RuntimeHealth{
|
||||
Status: "OK",
|
||||
ExportDir: "/appdata/bee/export",
|
||||
DriverReady: true,
|
||||
CUDAReady: true,
|
||||
NetworkStatus: "OK",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("marshal runtime health: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(DefaultRuntimeJSONPath, raw, 0644); err != nil {
|
||||
t.Fatalf("write runtime health: %v", err)
|
||||
}
|
||||
|
||||
a := &App{
|
||||
sat: fakeSAT{
|
||||
detectVendorFn: func() string { return "amd" },
|
||||
},
|
||||
}
|
||||
|
||||
result := a.RuntimeHealthResult()
|
||||
if !contains(result.Body, "AMDGPU ready: true") {
|
||||
t.Fatalf("body missing AMD driver label:\n%s", result.Body)
|
||||
}
|
||||
if !contains(result.Body, "ROCm SMI ready: true") {
|
||||
t.Fatalf("body missing ROCm label:\n%s", result.Body)
|
||||
}
|
||||
if contains(result.Body, "CUDA ready") {
|
||||
t.Fatalf("body should not mention CUDA on AMD:\n%s", result.Body)
|
||||
}
|
||||
}
|
||||
|
||||
func intPtr(v int) *int { return &v }
|
||||
|
||||
func contains(haystack, needle string) bool {
|
||||
|
||||
@@ -56,7 +56,7 @@ func BuildSupportBundle(exportDir string) (string, error) {
|
||||
}
|
||||
defer os.RemoveAll(stageRoot)
|
||||
|
||||
if err := copyDirContents(exportDir, filepath.Join(stageRoot, "export")); err != nil {
|
||||
if err := copyExportDirForSupportBundle(exportDir, filepath.Join(stageRoot, "export")); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := writeJournalDump(filepath.Join(stageRoot, "systemd", "combined.journal.log")); err != nil {
|
||||
@@ -214,6 +214,40 @@ func copyDirContents(srcDir, dstDir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyExportDirForSupportBundle(srcDir, dstDir string) error {
|
||||
return copyDirContentsFiltered(srcDir, dstDir, func(rel string, info os.FileInfo) bool {
|
||||
cleanRel := filepath.ToSlash(strings.TrimPrefix(filepath.Clean(rel), "./"))
|
||||
if cleanRel == "" {
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(cleanRel, "bee-sat/") && strings.HasSuffix(cleanRel, ".tar.gz") {
|
||||
return false
|
||||
}
|
||||
if strings.HasPrefix(filepath.Base(cleanRel), "bee-support-") && strings.HasSuffix(cleanRel, ".tar.gz") {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func copyDirContentsFiltered(srcDir, dstDir string, keep func(rel string, info os.FileInfo) bool) error {
|
||||
entries, err := os.ReadDir(srcDir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
for _, entry := range entries {
|
||||
src := filepath.Join(srcDir, entry.Name())
|
||||
dst := filepath.Join(dstDir, entry.Name())
|
||||
if err := copyPathFiltered(srcDir, src, dst, keep); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyPath(src, dst string) error {
|
||||
info, err := os.Stat(src)
|
||||
if err != nil {
|
||||
@@ -254,6 +288,36 @@ func copyPath(src, dst string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func copyPathFiltered(rootSrc, src, dst string, keep func(rel string, info os.FileInfo) bool) error {
|
||||
info, err := os.Stat(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rel, err := filepath.Rel(rootSrc, src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if keep != nil && !keep(rel, info) {
|
||||
return nil
|
||||
}
|
||||
if info.IsDir() {
|
||||
if err := os.MkdirAll(dst, info.Mode().Perm()); err != nil {
|
||||
return err
|
||||
}
|
||||
entries, err := os.ReadDir(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, entry := range entries {
|
||||
if err := copyPathFiltered(rootSrc, filepath.Join(src, entry.Name()), filepath.Join(dst, entry.Name()), keep); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return copyPath(src, dst)
|
||||
}
|
||||
|
||||
func createSupportTarGz(dst, srcDir string) error {
|
||||
file, err := os.Create(dst)
|
||||
if err != nil {
|
||||
|
||||
@@ -9,8 +9,10 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
var exportExecCommand = exec.Command
|
||||
|
||||
func (s *System) ListRemovableTargets() ([]RemovableTarget, error) {
|
||||
raw, err := exec.Command("lsblk", "-P", "-o", "NAME,TYPE,PKNAME,RM,FSTYPE,MOUNTPOINT,SIZE,LABEL,MODEL").Output()
|
||||
raw, err := exportExecCommand("lsblk", "-P", "-o", "NAME,TYPE,PKNAME,RM,FSTYPE,MOUNTPOINT,SIZE,LABEL,MODEL").Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -52,7 +54,7 @@ func (s *System) ListRemovableTargets() ([]RemovableTarget, error) {
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (s *System) ExportFileToTarget(src string, target RemovableTarget) (string, error) {
|
||||
func (s *System) ExportFileToTarget(src string, target RemovableTarget) (dst string, retErr error) {
|
||||
if src == "" || target.Device == "" {
|
||||
return "", fmt.Errorf("source and target are required")
|
||||
}
|
||||
@@ -62,20 +64,39 @@ func (s *System) ExportFileToTarget(src string, target RemovableTarget) (string,
|
||||
|
||||
mountpoint := strings.TrimSpace(target.Mountpoint)
|
||||
mountedHere := false
|
||||
mounted := mountpoint != ""
|
||||
if mountpoint == "" {
|
||||
mountpoint = filepath.Join("/tmp", "bee-export-"+filepath.Base(target.Device))
|
||||
if err := os.MkdirAll(mountpoint, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if raw, err := exec.Command("mount", target.Device, mountpoint).CombinedOutput(); err != nil {
|
||||
if raw, err := exportExecCommand("mount", target.Device, mountpoint).CombinedOutput(); err != nil {
|
||||
_ = os.Remove(mountpoint)
|
||||
return string(raw), err
|
||||
}
|
||||
mountedHere = true
|
||||
mounted = true
|
||||
}
|
||||
defer func() {
|
||||
if !mounted {
|
||||
return
|
||||
}
|
||||
_ = exportExecCommand("sync").Run()
|
||||
if raw, err := exportExecCommand("umount", mountpoint).CombinedOutput(); err != nil && retErr == nil {
|
||||
msg := strings.TrimSpace(string(raw))
|
||||
if msg == "" {
|
||||
retErr = err
|
||||
} else {
|
||||
retErr = fmt.Errorf("%s: %w", msg, err)
|
||||
}
|
||||
}
|
||||
if mountedHere {
|
||||
_ = os.Remove(mountpoint)
|
||||
}
|
||||
}()
|
||||
|
||||
filename := filepath.Base(src)
|
||||
dst := filepath.Join(mountpoint, filename)
|
||||
dst = filepath.Join(mountpoint, filename)
|
||||
data, err := os.ReadFile(src)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -83,12 +104,6 @@ func (s *System) ExportFileToTarget(src string, target RemovableTarget) (string,
|
||||
if err := os.WriteFile(dst, data, 0644); err != nil {
|
||||
return "", err
|
||||
}
|
||||
_ = exec.Command("sync").Run()
|
||||
|
||||
if mountedHere {
|
||||
_ = exec.Command("umount", mountpoint).Run()
|
||||
_ = os.Remove(mountpoint)
|
||||
}
|
||||
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
56
audit/internal/platform/export_test.go
Normal file
56
audit/internal/platform/export_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package platform
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExportFileToTargetUnmountsExistingMountpoint(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tmp := t.TempDir()
|
||||
src := filepath.Join(tmp, "bundle.tar.gz")
|
||||
mountpoint := filepath.Join(tmp, "mnt")
|
||||
if err := os.MkdirAll(mountpoint, 0755); err != nil {
|
||||
t.Fatalf("mkdir mountpoint: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(src, []byte("bundle"), 0644); err != nil {
|
||||
t.Fatalf("write src: %v", err)
|
||||
}
|
||||
|
||||
var calls [][]string
|
||||
oldExec := exportExecCommand
|
||||
exportExecCommand = func(name string, args ...string) *exec.Cmd {
|
||||
calls = append(calls, append([]string{name}, args...))
|
||||
return exec.Command("sh", "-c", "exit 0")
|
||||
}
|
||||
t.Cleanup(func() { exportExecCommand = oldExec })
|
||||
|
||||
s := &System{}
|
||||
dst, err := s.ExportFileToTarget(src, RemovableTarget{
|
||||
Device: "/dev/sdb1",
|
||||
Mountpoint: mountpoint,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("ExportFileToTarget error: %v", err)
|
||||
}
|
||||
if got, want := dst, filepath.Join(mountpoint, "bundle.tar.gz"); got != want {
|
||||
t.Fatalf("dst=%q want %q", got, want)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(mountpoint, "bundle.tar.gz")); err != nil {
|
||||
t.Fatalf("exported file missing: %v", err)
|
||||
}
|
||||
|
||||
foundUmount := false
|
||||
for _, call := range calls {
|
||||
if len(call) == 2 && call[0] == "umount" && call[1] == mountpoint {
|
||||
foundUmount = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !foundUmount {
|
||||
t.Fatalf("expected umount %q call, got %#v", mountpoint, calls)
|
||||
}
|
||||
}
|
||||
@@ -16,9 +16,6 @@ var runtimeRequiredTools = []string{
|
||||
"smartctl",
|
||||
"nvme",
|
||||
"ipmitool",
|
||||
"nvidia-smi",
|
||||
"nvidia-bug-report.sh",
|
||||
"bee-gpu-stress",
|
||||
"dhclient",
|
||||
"mount",
|
||||
}
|
||||
@@ -93,7 +90,8 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
|
||||
}
|
||||
}
|
||||
|
||||
for _, tool := range s.CheckTools(runtimeRequiredTools) {
|
||||
vendor := s.DetectGPUVendor()
|
||||
for _, tool := range s.runtimeToolStatuses(vendor) {
|
||||
health.Tools = append(health.Tools, schema.RuntimeToolStatus{
|
||||
Name: tool.Name,
|
||||
Path: tool.Path,
|
||||
@@ -115,39 +113,7 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
|
||||
})
|
||||
}
|
||||
|
||||
lsmodText := commandText("lsmod")
|
||||
health.DriverReady = strings.Contains(lsmodText, "nvidia ")
|
||||
if !health.DriverReady {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "nvidia_kernel_module_missing",
|
||||
Severity: "warning",
|
||||
Description: "NVIDIA kernel module is not loaded.",
|
||||
})
|
||||
}
|
||||
if health.DriverReady && !strings.Contains(lsmodText, "nvidia_modeset") {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "nvidia_modeset_failed",
|
||||
Severity: "warning",
|
||||
Description: "nvidia-modeset is not loaded; display/CUDA stack may be partial.",
|
||||
})
|
||||
}
|
||||
if out, err := exec.Command("nvidia-smi", "-L").CombinedOutput(); err == nil && strings.TrimSpace(string(out)) != "" {
|
||||
health.DriverReady = true
|
||||
}
|
||||
|
||||
health.CUDAReady = false
|
||||
if lookErr := exec.Command("sh", "-c", "command -v bee-gpu-stress >/dev/null 2>&1").Run(); lookErr == nil {
|
||||
out, err := exec.Command("bee-gpu-stress", "--seconds", "1", "--size-mb", "1").CombinedOutput()
|
||||
if err == nil {
|
||||
health.CUDAReady = true
|
||||
} else if strings.Contains(strings.ToLower(string(out)), "cuda_error_system_not_ready") {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "cuda_runtime_not_ready",
|
||||
Severity: "warning",
|
||||
Description: "CUDA runtime is not ready for GPU SAT.",
|
||||
})
|
||||
}
|
||||
}
|
||||
s.collectGPURuntimeHealth(vendor, &health)
|
||||
|
||||
if health.Status != "FAILED" && len(health.Issues) > 0 {
|
||||
health.Status = "PARTIAL"
|
||||
@@ -162,3 +128,87 @@ func commandText(name string, args ...string) string {
|
||||
}
|
||||
return string(raw)
|
||||
}
|
||||
|
||||
func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
|
||||
tools := s.CheckTools(runtimeRequiredTools)
|
||||
switch vendor {
|
||||
case "nvidia":
|
||||
tools = append(tools, s.CheckTools([]string{
|
||||
"nvidia-smi",
|
||||
"nvidia-bug-report.sh",
|
||||
"bee-gpu-stress",
|
||||
})...)
|
||||
case "amd":
|
||||
tool := ToolStatus{Name: "rocm-smi"}
|
||||
if cmd, err := resolveROCmSMICommand(); err == nil && len(cmd) > 0 {
|
||||
tool.Path = cmd[0]
|
||||
if len(cmd) > 1 && strings.HasSuffix(cmd[1], "rocm_smi.py") {
|
||||
tool.Path = cmd[1]
|
||||
}
|
||||
tool.OK = true
|
||||
}
|
||||
tools = append(tools, tool)
|
||||
}
|
||||
return tools
|
||||
}
|
||||
|
||||
func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
|
||||
lsmodText := commandText("lsmod")
|
||||
|
||||
switch vendor {
|
||||
case "nvidia":
|
||||
health.DriverReady = strings.Contains(lsmodText, "nvidia ")
|
||||
if !health.DriverReady {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "nvidia_kernel_module_missing",
|
||||
Severity: "warning",
|
||||
Description: "NVIDIA kernel module is not loaded.",
|
||||
})
|
||||
}
|
||||
if health.DriverReady && !strings.Contains(lsmodText, "nvidia_modeset") {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "nvidia_modeset_failed",
|
||||
Severity: "warning",
|
||||
Description: "nvidia-modeset is not loaded; display/CUDA stack may be partial.",
|
||||
})
|
||||
}
|
||||
if out, err := exec.Command("nvidia-smi", "-L").CombinedOutput(); err == nil && strings.TrimSpace(string(out)) != "" {
|
||||
health.DriverReady = true
|
||||
}
|
||||
|
||||
if lookErr := exec.Command("sh", "-c", "command -v bee-gpu-stress >/dev/null 2>&1").Run(); lookErr == nil {
|
||||
out, err := exec.Command("bee-gpu-stress", "--seconds", "1", "--size-mb", "1").CombinedOutput()
|
||||
if err == nil {
|
||||
health.CUDAReady = true
|
||||
} else if strings.Contains(strings.ToLower(string(out)), "cuda_error_system_not_ready") {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "cuda_runtime_not_ready",
|
||||
Severity: "warning",
|
||||
Description: "CUDA runtime is not ready for GPU SAT.",
|
||||
})
|
||||
}
|
||||
}
|
||||
case "amd":
|
||||
health.DriverReady = strings.Contains(lsmodText, "amdgpu ") || strings.Contains(lsmodText, "amdkfd")
|
||||
if !health.DriverReady {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "amdgpu_kernel_module_missing",
|
||||
Severity: "warning",
|
||||
Description: "AMD GPU driver is not loaded.",
|
||||
})
|
||||
}
|
||||
|
||||
out, err := runROCmSMI("--showproductname", "--csv")
|
||||
if err == nil && strings.TrimSpace(string(out)) != "" {
|
||||
health.CUDAReady = true
|
||||
health.DriverReady = true
|
||||
return
|
||||
}
|
||||
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "rocm_smi_unavailable",
|
||||
Severity: "warning",
|
||||
Description: "ROCm SMI is not available for AMD GPU SAT.",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@@ -15,6 +16,22 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
satExecCommand = exec.Command
|
||||
satLookPath = exec.LookPath
|
||||
satGlob = filepath.Glob
|
||||
satStat = os.Stat
|
||||
|
||||
rocmSMIExecutableGlobs = []string{
|
||||
"/opt/rocm/bin/rocm-smi",
|
||||
"/opt/rocm-*/bin/rocm-smi",
|
||||
}
|
||||
rocmSMIScriptGlobs = []string{
|
||||
"/opt/rocm/libexec/rocm_smi/rocm_smi.py",
|
||||
"/opt/rocm-*/libexec/rocm_smi/rocm_smi.py",
|
||||
}
|
||||
)
|
||||
|
||||
// NvidiaGPU holds basic GPU info from nvidia-smi.
|
||||
type NvidiaGPU struct {
|
||||
Index int
|
||||
@@ -41,7 +58,7 @@ func (s *System) DetectGPUVendor() string {
|
||||
|
||||
// ListAMDGPUs returns AMD GPUs visible to rocm-smi.
|
||||
func (s *System) ListAMDGPUs() ([]AMDGPUInfo, error) {
|
||||
out, err := exec.Command("rocm-smi", "--showproductname", "--csv").Output()
|
||||
out, err := runROCmSMI("--showproductname", "--csv")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("rocm-smi: %w", err)
|
||||
}
|
||||
@@ -337,12 +354,22 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
|
||||
|
||||
func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string, env []string) ([]byte, error) {
|
||||
start := time.Now().UTC()
|
||||
resolvedCmd, err := resolveSATCommand(cmd)
|
||||
appendSATVerboseLog(verboseLog,
|
||||
fmt.Sprintf("[%s] start %s", start.Format(time.RFC3339), name),
|
||||
"cmd: "+strings.Join(cmd, " "),
|
||||
"cmd: "+strings.Join(resolvedCmd, " "),
|
||||
)
|
||||
if err != nil {
|
||||
appendSATVerboseLog(verboseLog,
|
||||
fmt.Sprintf("[%s] finish %s", time.Now().UTC().Format(time.RFC3339), name),
|
||||
"rc: 1",
|
||||
fmt.Sprintf("duration_ms: %d", time.Since(start).Milliseconds()),
|
||||
"",
|
||||
)
|
||||
return []byte(err.Error() + "\n"), err
|
||||
}
|
||||
|
||||
c := exec.CommandContext(ctx, cmd[0], cmd[1:]...)
|
||||
c := exec.CommandContext(ctx, resolvedCmd[0], resolvedCmd[1:]...)
|
||||
if len(env) > 0 {
|
||||
c.Env = append(os.Environ(), env...)
|
||||
}
|
||||
@@ -362,19 +389,11 @@ func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string
|
||||
}
|
||||
|
||||
func listStorageDevices() ([]string, error) {
|
||||
out, err := exec.Command("lsblk", "-dn", "-o", "NAME,TYPE").Output()
|
||||
out, err := satExecCommand("lsblk", "-dn", "-o", "NAME,TYPE,TRAN").Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var devices []string
|
||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||
fields := strings.Fields(strings.TrimSpace(line))
|
||||
if len(fields) != 2 || fields[1] != "disk" {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, "/dev/"+fields[0])
|
||||
}
|
||||
return devices, nil
|
||||
return parseStorageDevices(string(out)), nil
|
||||
}
|
||||
|
||||
func storageSATCommands(devPath string) []satJob {
|
||||
@@ -445,12 +464,22 @@ func classifySATResult(name string, out []byte, err error) (string, int) {
|
||||
|
||||
func runSATCommand(verboseLog, name string, cmd []string) ([]byte, error) {
|
||||
start := time.Now().UTC()
|
||||
resolvedCmd, err := resolveSATCommand(cmd)
|
||||
appendSATVerboseLog(verboseLog,
|
||||
fmt.Sprintf("[%s] start %s", start.Format(time.RFC3339), name),
|
||||
"cmd: "+strings.Join(cmd, " "),
|
||||
"cmd: "+strings.Join(resolvedCmd, " "),
|
||||
)
|
||||
if err != nil {
|
||||
appendSATVerboseLog(verboseLog,
|
||||
fmt.Sprintf("[%s] finish %s", time.Now().UTC().Format(time.RFC3339), name),
|
||||
"rc: 1",
|
||||
fmt.Sprintf("duration_ms: %d", time.Since(start).Milliseconds()),
|
||||
"",
|
||||
)
|
||||
return []byte(err.Error() + "\n"), err
|
||||
}
|
||||
|
||||
out, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput()
|
||||
out, err := satExecCommand(resolvedCmd[0], resolvedCmd[1:]...).CombinedOutput()
|
||||
|
||||
rc := 0
|
||||
if err != nil {
|
||||
@@ -465,6 +494,91 @@ func runSATCommand(verboseLog, name string, cmd []string) ([]byte, error) {
|
||||
return out, err
|
||||
}
|
||||
|
||||
func runROCmSMI(args ...string) ([]byte, error) {
|
||||
cmd, err := resolveROCmSMICommand(args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return satExecCommand(cmd[0], cmd[1:]...).CombinedOutput()
|
||||
}
|
||||
|
||||
func resolveSATCommand(cmd []string) ([]string, error) {
|
||||
if len(cmd) == 0 {
|
||||
return nil, errors.New("empty SAT command")
|
||||
}
|
||||
if cmd[0] != "rocm-smi" {
|
||||
return cmd, nil
|
||||
}
|
||||
return resolveROCmSMICommand(cmd[1:]...)
|
||||
}
|
||||
|
||||
func resolveROCmSMICommand(args ...string) ([]string, error) {
|
||||
if path, err := satLookPath("rocm-smi"); err == nil {
|
||||
return append([]string{path}, args...), nil
|
||||
}
|
||||
|
||||
for _, path := range rocmSMIExecutableCandidates() {
|
||||
return append([]string{path}, args...), nil
|
||||
}
|
||||
|
||||
pythonPath, pyErr := satLookPath("python3")
|
||||
if pyErr == nil {
|
||||
for _, script := range rocmSMIScriptCandidates() {
|
||||
cmd := []string{pythonPath, script}
|
||||
cmd = append(cmd, args...)
|
||||
return cmd, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
|
||||
}
|
||||
|
||||
func rocmSMIExecutableCandidates() []string {
|
||||
return expandExistingPaths(rocmSMIExecutableGlobs)
|
||||
}
|
||||
|
||||
func rocmSMIScriptCandidates() []string {
|
||||
return expandExistingPaths(rocmSMIScriptGlobs)
|
||||
}
|
||||
|
||||
func expandExistingPaths(patterns []string) []string {
|
||||
seen := make(map[string]struct{})
|
||||
var paths []string
|
||||
for _, pattern := range patterns {
|
||||
matches, err := satGlob(pattern)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
sort.Strings(matches)
|
||||
for _, match := range matches {
|
||||
if _, err := satStat(match); err != nil {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[match]; ok {
|
||||
continue
|
||||
}
|
||||
seen[match] = struct{}{}
|
||||
paths = append(paths, match)
|
||||
}
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
func parseStorageDevices(raw string) []string {
|
||||
var devices []string
|
||||
for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
|
||||
fields := strings.Fields(strings.TrimSpace(line))
|
||||
if len(fields) < 2 || fields[1] != "disk" {
|
||||
continue
|
||||
}
|
||||
if len(fields) >= 3 && strings.EqualFold(fields[2], "usb") {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, "/dev/"+fields[0])
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
// runSATCommandWithMetrics runs a command while collecting GPU metrics in the background.
|
||||
// On completion it writes gpu-metrics.csv and gpu-metrics.html into runDir.
|
||||
func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd []string, env []string, gpuIndices []int, runDir string) ([]byte, error) {
|
||||
|
||||
@@ -3,6 +3,8 @@ package platform
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -91,3 +93,90 @@ func TestClassifySATResult(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseStorageDevicesSkipsUSBDisks(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
raw := "nvme0n1 disk nvme\nsda disk usb\nloop0 loop\nsdb disk sata\n"
|
||||
got := parseStorageDevices(raw)
|
||||
want := []string{"/dev/nvme0n1", "/dev/sdb"}
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("len(devices)=%d want %d (%v)", len(got), len(want), got)
|
||||
}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("devices[%d]=%q want %q", i, got[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveROCmSMICommandFromPATH(t *testing.T) {
|
||||
t.Setenv("PATH", t.TempDir())
|
||||
|
||||
toolPath := filepath.Join(os.Getenv("PATH"), "rocm-smi")
|
||||
if err := os.WriteFile(toolPath, []byte("#!/bin/sh\nexit 0\n"), 0755); err != nil {
|
||||
t.Fatalf("write rocm-smi: %v", err)
|
||||
}
|
||||
|
||||
cmd, err := resolveROCmSMICommand("--showproductname")
|
||||
if err != nil {
|
||||
t.Fatalf("resolveROCmSMICommand error: %v", err)
|
||||
}
|
||||
if len(cmd) != 2 {
|
||||
t.Fatalf("cmd len=%d want 2 (%v)", len(cmd), cmd)
|
||||
}
|
||||
if cmd[0] != toolPath {
|
||||
t.Fatalf("cmd[0]=%q want %q", cmd[0], toolPath)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveROCmSMICommandFallsBackToROCmTree(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
execPath := filepath.Join(tmp, "opt", "rocm", "bin", "rocm-smi")
|
||||
if err := os.MkdirAll(filepath.Dir(execPath), 0755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(execPath, []byte("#!/bin/sh\nexit 0\n"), 0755); err != nil {
|
||||
t.Fatalf("write rocm-smi: %v", err)
|
||||
}
|
||||
|
||||
oldGlob := rocmSMIExecutableGlobs
|
||||
oldScriptGlobs := rocmSMIScriptGlobs
|
||||
rocmSMIExecutableGlobs = []string{execPath}
|
||||
rocmSMIScriptGlobs = nil
|
||||
t.Cleanup(func() {
|
||||
rocmSMIExecutableGlobs = oldGlob
|
||||
rocmSMIScriptGlobs = oldScriptGlobs
|
||||
})
|
||||
|
||||
t.Setenv("PATH", "")
|
||||
|
||||
cmd, err := resolveROCmSMICommand("--showallinfo")
|
||||
if err != nil {
|
||||
t.Fatalf("resolveROCmSMICommand error: %v", err)
|
||||
}
|
||||
if len(cmd) != 2 {
|
||||
t.Fatalf("cmd len=%d want 2 (%v)", len(cmd), cmd)
|
||||
}
|
||||
if cmd[0] != execPath {
|
||||
t.Fatalf("cmd[0]=%q want %q", cmd[0], execPath)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunROCmSMIReportsMissingCommand(t *testing.T) {
|
||||
oldLookPath := satLookPath
|
||||
oldExecGlobs := rocmSMIExecutableGlobs
|
||||
oldScriptGlobs := rocmSMIScriptGlobs
|
||||
satLookPath = func(string) (string, error) { return "", exec.ErrNotFound }
|
||||
rocmSMIExecutableGlobs = nil
|
||||
rocmSMIScriptGlobs = nil
|
||||
t.Cleanup(func() {
|
||||
satLookPath = oldLookPath
|
||||
rocmSMIExecutableGlobs = oldExecGlobs
|
||||
rocmSMIScriptGlobs = oldScriptGlobs
|
||||
})
|
||||
|
||||
if _, err := runROCmSMI("--showproductname"); err == nil {
|
||||
t.Fatal("expected missing rocm-smi error")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,15 +24,23 @@ var techDumpFixedCommands = []struct {
|
||||
{Name: "sensors", Args: []string{"-j"}, File: "sensors.json"},
|
||||
{Name: "ipmitool", Args: []string{"fru", "print"}, File: "ipmitool-fru.txt"},
|
||||
{Name: "ipmitool", Args: []string{"sdr"}, File: "ipmitool-sdr.txt"},
|
||||
{Name: "nvme", Args: []string{"list", "-o", "json"}, File: "nvme-list.json"},
|
||||
}
|
||||
|
||||
var techDumpNvidiaCommands = []struct {
|
||||
Name string
|
||||
Args []string
|
||||
File string
|
||||
}{
|
||||
{Name: "nvidia-smi", Args: []string{"-q"}, File: "nvidia-smi-q.txt"},
|
||||
{Name: "nvidia-smi", Args: []string{"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown", "--format=csv,noheader,nounits"}, File: "nvidia-smi-query.csv"},
|
||||
{Name: "nvme", Args: []string{"list", "-o", "json"}, File: "nvme-list.json"},
|
||||
}
|
||||
|
||||
type lsblkDumpRoot struct {
|
||||
Blockdevices []struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Tran string `json:"tran"`
|
||||
} `json:"blockdevices"`
|
||||
}
|
||||
|
||||
@@ -50,6 +58,15 @@ func (s *System) CaptureTechnicalDump(baseDir string) error {
|
||||
for _, cmd := range techDumpFixedCommands {
|
||||
writeCommandDump(filepath.Join(baseDir, cmd.File), cmd.Name, cmd.Args...)
|
||||
}
|
||||
switch s.DetectGPUVendor() {
|
||||
case "nvidia":
|
||||
for _, cmd := range techDumpNvidiaCommands {
|
||||
writeCommandDump(filepath.Join(baseDir, cmd.File), cmd.Name, cmd.Args...)
|
||||
}
|
||||
case "amd":
|
||||
writeROCmSMIDump(filepath.Join(baseDir, "rocm-smi.txt"))
|
||||
writeROCmSMIDump(filepath.Join(baseDir, "rocm-smi-showallinfo.txt"), "--showallinfo")
|
||||
}
|
||||
|
||||
for _, dev := range lsblkDumpDevices(filepath.Join(baseDir, "lsblk.json")) {
|
||||
writeCommandDump(filepath.Join(baseDir, "smartctl-"+sanitizeDumpName(dev)+".json"), "smartctl", "-j", "-a", "/dev/"+dev)
|
||||
@@ -69,6 +86,14 @@ func writeCommandDump(path, name string, args ...string) {
|
||||
_ = os.WriteFile(path, out, 0644)
|
||||
}
|
||||
|
||||
func writeROCmSMIDump(path string, args ...string) {
|
||||
out, err := runROCmSMI(args...)
|
||||
if err != nil && len(out) == 0 {
|
||||
return
|
||||
}
|
||||
_ = os.WriteFile(path, out, 0644)
|
||||
}
|
||||
|
||||
func lsblkDumpDevices(path string) []string {
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
@@ -80,6 +105,9 @@ func lsblkDumpDevices(path string) []string {
|
||||
}
|
||||
var devices []string
|
||||
for _, dev := range root.Blockdevices {
|
||||
if strings.EqualFold(strings.TrimSpace(dev.Tran), "usb") {
|
||||
continue
|
||||
}
|
||||
if dev.Type == "disk" && strings.TrimSpace(dev.Name) != "" {
|
||||
devices = append(devices, strings.TrimSpace(dev.Name))
|
||||
}
|
||||
|
||||
@@ -12,12 +12,12 @@ func TestLSBLKDumpDevices(t *testing.T) {
|
||||
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "lsblk.json")
|
||||
if err := os.WriteFile(path, []byte(`{"blockdevices":[{"name":"sda","type":"disk"},{"name":"sda1","type":"part"},{"name":"nvme0n1","type":"disk"}]}`), 0644); err != nil {
|
||||
if err := os.WriteFile(path, []byte(`{"blockdevices":[{"name":"sda","type":"disk","tran":"usb"},{"name":"sda1","type":"part"},{"name":"nvme0n1","type":"disk","tran":"nvme"},{"name":"sdb","type":"disk","tran":"sata"}]}`), 0644); err != nil {
|
||||
t.Fatalf("write lsblk fixture: %v", err)
|
||||
}
|
||||
|
||||
got := lsblkDumpDevices(path)
|
||||
want := []string{"nvme0n1", "sda"}
|
||||
want := []string{"nvme0n1", "sdb"}
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Fatalf("lsblkDumpDevices=%v want %v", got, want)
|
||||
}
|
||||
|
||||
@@ -27,8 +27,9 @@ type exportTargetsMsg struct {
|
||||
err error
|
||||
}
|
||||
|
||||
type panelMsg struct {
|
||||
data app.HardwarePanelData
|
||||
type snapshotMsg struct {
|
||||
banner string
|
||||
panel app.HardwarePanelData
|
||||
}
|
||||
|
||||
type nvidiaGPUsMsg struct {
|
||||
|
||||
131
audit/internal/tui/sat_progress.go
Normal file
131
audit/internal/tui/sat_progress.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"bee/audit/internal/app"
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
)
|
||||
|
||||
type satProgressMsg struct {
|
||||
lines []string
|
||||
}
|
||||
|
||||
// pollSATProgress returns a Cmd that waits 300ms then reads the latest verbose.log
|
||||
// for the given SAT prefix and returns parsed step progress lines.
|
||||
func pollSATProgress(prefix string, since time.Time) tea.Cmd {
|
||||
return tea.Tick(300*time.Millisecond, func(_ time.Time) tea.Msg {
|
||||
return satProgressMsg{lines: readSATProgressLines(prefix, since)}
|
||||
})
|
||||
}
|
||||
|
||||
func readSATProgressLines(prefix string, since time.Time) []string {
|
||||
pattern := filepath.Join(app.DefaultSATBaseDir, prefix+"-*/verbose.log")
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil || len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
sort.Strings(matches)
|
||||
// Find the latest file created at or after (since - 5s) to account for clock skew.
|
||||
cutoff := since.Add(-5 * time.Second)
|
||||
candidate := ""
|
||||
for _, m := range matches {
|
||||
info, statErr := os.Stat(m)
|
||||
if statErr == nil && info.ModTime().After(cutoff) {
|
||||
candidate = m
|
||||
}
|
||||
}
|
||||
if candidate == "" {
|
||||
return nil
|
||||
}
|
||||
raw, err := os.ReadFile(candidate)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return parseSATVerboseProgress(string(raw))
|
||||
}
|
||||
|
||||
// parseSATVerboseProgress parses verbose.log content and returns display lines like:
|
||||
//
|
||||
// "PASS lscpu (234ms)"
|
||||
// "FAIL stress-ng (60.0s)"
|
||||
// "... sensors-after"
|
||||
func parseSATVerboseProgress(content string) []string {
|
||||
type step struct {
|
||||
name string
|
||||
rc int
|
||||
durationMs int
|
||||
done bool
|
||||
}
|
||||
|
||||
lines := strings.Split(content, "\n")
|
||||
var steps []step
|
||||
stepIdx := map[string]int{}
|
||||
|
||||
for i, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if idx := strings.Index(line, "] start "); idx >= 0 {
|
||||
name := strings.TrimSpace(line[idx+len("] start "):])
|
||||
if _, exists := stepIdx[name]; !exists {
|
||||
stepIdx[name] = len(steps)
|
||||
steps = append(steps, step{name: name})
|
||||
}
|
||||
} else if idx := strings.Index(line, "] finish "); idx >= 0 {
|
||||
name := strings.TrimSpace(line[idx+len("] finish "):])
|
||||
si, exists := stepIdx[name]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
steps[si].done = true
|
||||
for j := i + 1; j < len(lines) && j <= i+3; j++ {
|
||||
l := strings.TrimSpace(lines[j])
|
||||
if strings.HasPrefix(l, "rc: ") {
|
||||
steps[si].rc, _ = strconv.Atoi(strings.TrimPrefix(l, "rc: "))
|
||||
} else if strings.HasPrefix(l, "duration_ms: ") {
|
||||
steps[si].durationMs, _ = strconv.Atoi(strings.TrimPrefix(l, "duration_ms: "))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var result []string
|
||||
for _, s := range steps {
|
||||
display := cleanSATStepName(s.name)
|
||||
if s.done {
|
||||
status := "PASS"
|
||||
if s.rc != 0 {
|
||||
status = "FAIL"
|
||||
}
|
||||
result = append(result, fmt.Sprintf("%-4s %s (%s)", status, display, fmtDurMs(s.durationMs)))
|
||||
} else {
|
||||
result = append(result, fmt.Sprintf("... %s", display))
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// cleanSATStepName strips leading digits and dash: "01-lscpu.log" → "lscpu".
|
||||
func cleanSATStepName(name string) string {
|
||||
name = strings.TrimSuffix(name, ".log")
|
||||
i := 0
|
||||
for i < len(name) && name[i] >= '0' && name[i] <= '9' {
|
||||
i++
|
||||
}
|
||||
if i < len(name) && name[i] == '-' {
|
||||
name = name[i+1:]
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func fmtDurMs(ms int) string {
|
||||
if ms < 1000 {
|
||||
return fmt.Sprintf("%dms", ms)
|
||||
}
|
||||
return fmt.Sprintf("%.1fs", float64(ms)/1000)
|
||||
}
|
||||
30
audit/internal/tui/snapshot.go
Normal file
30
audit/internal/tui/snapshot.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"bee/audit/internal/app"
|
||||
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
)
|
||||
|
||||
func (m model) refreshSnapshotCmd() tea.Cmd {
|
||||
if m.app == nil {
|
||||
return nil
|
||||
}
|
||||
return func() tea.Msg {
|
||||
return snapshotMsg{
|
||||
banner: m.app.MainBanner(),
|
||||
panel: m.app.LoadHardwarePanel(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func shouldRefreshSnapshot(prev, next model) bool {
|
||||
return prev.screen != next.screen || prev.busy != next.busy
|
||||
}
|
||||
|
||||
func emptySnapshot() snapshotMsg {
|
||||
return snapshotMsg{
|
||||
banner: "",
|
||||
panel: app.HardwarePanelData{},
|
||||
}
|
||||
}
|
||||
@@ -53,9 +53,9 @@ func TestUpdateMainMenuEnterActions(t *testing.T) {
|
||||
wantBusy bool
|
||||
wantCmd bool
|
||||
}{
|
||||
{name: "health_check", cursor: 0, wantScreen: screenHealthCheck},
|
||||
{name: "health_check", cursor: 0, wantScreen: screenHealthCheck, wantCmd: true},
|
||||
{name: "export", cursor: 1, wantScreen: screenMain, wantBusy: true, wantCmd: true},
|
||||
{name: "settings", cursor: 2, wantScreen: screenSettings},
|
||||
{name: "settings", cursor: 2, wantScreen: screenSettings, wantCmd: true},
|
||||
{name: "exit", cursor: 3, wantScreen: screenMain, wantCmd: true},
|
||||
}
|
||||
|
||||
@@ -460,6 +460,55 @@ func TestViewOutputScreenRendersBodyAndBackHint(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestViewRendersBannerModuleAboveScreenBody(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
m := newTestModel()
|
||||
m.banner = "System: Demo Server\nIP: 10.0.0.10"
|
||||
m.width = 60
|
||||
|
||||
view := m.View()
|
||||
|
||||
for _, want := range []string{
|
||||
"┌ MOTD ",
|
||||
"System: Demo Server",
|
||||
"IP: 10.0.0.10",
|
||||
"Health Check",
|
||||
"Export support bundle",
|
||||
} {
|
||||
if !strings.Contains(view, want) {
|
||||
t.Fatalf("view missing %q\nview:\n%s", want, view)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSnapshotMsgUpdatesBannerAndPanel(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
m := newTestModel()
|
||||
|
||||
next, cmd := m.Update(snapshotMsg{
|
||||
banner: "System: Demo",
|
||||
panel: app.HardwarePanelData{
|
||||
Header: []string{"Demo header"},
|
||||
Rows: []app.ComponentRow{
|
||||
{Key: "CPU", Status: "PASS", Detail: "ok"},
|
||||
},
|
||||
},
|
||||
})
|
||||
got := next.(model)
|
||||
|
||||
if cmd != nil {
|
||||
t.Fatal("expected nil cmd")
|
||||
}
|
||||
if got.banner != "System: Demo" {
|
||||
t.Fatalf("banner=%q want %q", got.banner, "System: Demo")
|
||||
}
|
||||
if len(got.panel.Rows) != 1 || got.panel.Rows[0].Key != "CPU" {
|
||||
t.Fatalf("panel rows=%+v", got.panel.Rows)
|
||||
}
|
||||
}
|
||||
|
||||
func TestViewExportTargetsRendersDeviceMetadata(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
@@ -32,32 +32,32 @@ const (
|
||||
type actionKind string
|
||||
|
||||
const (
|
||||
actionNone actionKind = ""
|
||||
actionDHCPOne actionKind = "dhcp_one"
|
||||
actionStaticIPv4 actionKind = "static_ipv4"
|
||||
actionExportBundle actionKind = "export_bundle"
|
||||
actionRunAll actionKind = "run_all"
|
||||
actionRunMemorySAT actionKind = "run_memory_sat"
|
||||
actionRunStorageSAT actionKind = "run_storage_sat"
|
||||
actionRunCPUSAT actionKind = "run_cpu_sat"
|
||||
actionRunAMDGPUSAT actionKind = "run_amd_gpu_sat"
|
||||
actionNone actionKind = ""
|
||||
actionDHCPOne actionKind = "dhcp_one"
|
||||
actionStaticIPv4 actionKind = "static_ipv4"
|
||||
actionExportBundle actionKind = "export_bundle"
|
||||
actionRunAll actionKind = "run_all"
|
||||
actionRunMemorySAT actionKind = "run_memory_sat"
|
||||
actionRunStorageSAT actionKind = "run_storage_sat"
|
||||
actionRunCPUSAT actionKind = "run_cpu_sat"
|
||||
actionRunAMDGPUSAT actionKind = "run_amd_gpu_sat"
|
||||
)
|
||||
|
||||
type model struct {
|
||||
app *app.App
|
||||
runtimeMode runtimeenv.Mode
|
||||
|
||||
screen screen
|
||||
prevScreen screen
|
||||
cursor int
|
||||
busy bool
|
||||
busyTitle string
|
||||
title string
|
||||
body string
|
||||
mainMenu []string
|
||||
screen screen
|
||||
prevScreen screen
|
||||
cursor int
|
||||
busy bool
|
||||
busyTitle string
|
||||
title string
|
||||
body string
|
||||
mainMenu []string
|
||||
settingsMenu []string
|
||||
networkMenu []string
|
||||
serviceMenu []string
|
||||
networkMenu []string
|
||||
serviceMenu []string
|
||||
|
||||
services []string
|
||||
interfaces []platform.InterfaceInfo
|
||||
@@ -74,6 +74,7 @@ type model struct {
|
||||
panel app.HardwarePanelData
|
||||
panelFocus bool
|
||||
panelCursor int
|
||||
banner string
|
||||
|
||||
// Health Check screen
|
||||
hcSel [4]bool
|
||||
@@ -95,6 +96,9 @@ type model struct {
|
||||
progressLines []string
|
||||
progressPrefix string
|
||||
progressSince time.Time
|
||||
|
||||
// Terminal size
|
||||
width int
|
||||
}
|
||||
|
||||
type formField struct {
|
||||
@@ -151,9 +155,7 @@ func newModel(application *app.App, runtimeMode runtimeenv.Mode) model {
|
||||
}
|
||||
|
||||
func (m model) Init() tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
return panelMsg{data: m.app.LoadHardwarePanel()}
|
||||
}
|
||||
return m.refreshSnapshotCmd()
|
||||
}
|
||||
|
||||
func (m model) confirmBody() (string, string) {
|
||||
|
||||
@@ -9,6 +9,9 @@ import (
|
||||
|
||||
func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
switch msg := msg.(type) {
|
||||
case tea.WindowSizeMsg:
|
||||
m.width = msg.Width
|
||||
return m, nil
|
||||
case tea.KeyMsg:
|
||||
if m.busy {
|
||||
if msg.String() == "ctrl+c" {
|
||||
@@ -16,7 +19,12 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
return m.updateKey(msg)
|
||||
next, cmd := m.updateKey(msg)
|
||||
nextModel := next.(model)
|
||||
if shouldRefreshSnapshot(m, nextModel) {
|
||||
return nextModel, tea.Batch(cmd, nextModel.refreshSnapshotCmd())
|
||||
}
|
||||
return nextModel, cmd
|
||||
case satProgressMsg:
|
||||
if m.busy && m.progressPrefix != "" {
|
||||
if len(msg.lines) > 0 {
|
||||
@@ -25,6 +33,10 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
return m, pollSATProgress(m.progressPrefix, m.progressSince)
|
||||
}
|
||||
return m, nil
|
||||
case snapshotMsg:
|
||||
m.banner = msg.banner
|
||||
m.panel = msg.panel
|
||||
return m, nil
|
||||
case resultMsg:
|
||||
m.busy = false
|
||||
m.busyTitle = ""
|
||||
@@ -49,7 +61,7 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
}
|
||||
m.screen = screenOutput
|
||||
m.cursor = 0
|
||||
return m, nil
|
||||
return m, m.refreshSnapshotCmd()
|
||||
case servicesMsg:
|
||||
m.busy = false
|
||||
m.busyTitle = ""
|
||||
@@ -58,12 +70,12 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.body = msg.err.Error()
|
||||
m.prevScreen = screenSettings
|
||||
m.screen = screenOutput
|
||||
return m, nil
|
||||
return m, m.refreshSnapshotCmd()
|
||||
}
|
||||
m.services = msg.services
|
||||
m.screen = screenServices
|
||||
m.cursor = 0
|
||||
return m, nil
|
||||
return m, m.refreshSnapshotCmd()
|
||||
case interfacesMsg:
|
||||
m.busy = false
|
||||
m.busyTitle = ""
|
||||
@@ -72,12 +84,12 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.body = msg.err.Error()
|
||||
m.prevScreen = screenNetwork
|
||||
m.screen = screenOutput
|
||||
return m, nil
|
||||
return m, m.refreshSnapshotCmd()
|
||||
}
|
||||
m.interfaces = msg.ifaces
|
||||
m.screen = screenInterfacePick
|
||||
m.cursor = 0
|
||||
return m, nil
|
||||
return m, m.refreshSnapshotCmd()
|
||||
case exportTargetsMsg:
|
||||
m.busy = false
|
||||
m.busyTitle = ""
|
||||
@@ -86,15 +98,12 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.body = msg.err.Error()
|
||||
m.prevScreen = screenMain
|
||||
m.screen = screenOutput
|
||||
return m, nil
|
||||
return m, m.refreshSnapshotCmd()
|
||||
}
|
||||
m.targets = msg.targets
|
||||
m.screen = screenExportTargets
|
||||
m.cursor = 0
|
||||
return m, nil
|
||||
case panelMsg:
|
||||
m.panel = msg.data
|
||||
return m, nil
|
||||
return m, m.refreshSnapshotCmd()
|
||||
case nvidiaGPUsMsg:
|
||||
return m.handleNvidiaGPUsMsg(msg)
|
||||
case nvtopClosedMsg:
|
||||
@@ -120,7 +129,7 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
} else {
|
||||
m.body = msg.body
|
||||
}
|
||||
return m, nil
|
||||
return m, m.refreshSnapshotCmd()
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
@@ -154,10 +163,6 @@ func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
||||
m.body = ""
|
||||
m.title = ""
|
||||
m.pendingAction = actionNone
|
||||
// Refresh panel when returning to main screen.
|
||||
if m.prevScreen == screenMain {
|
||||
return m, func() tea.Msg { return panelMsg{data: m.app.LoadHardwarePanel()} }
|
||||
}
|
||||
return m, nil
|
||||
case "ctrl+c":
|
||||
return m, tea.Quit
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
|
||||
"bee/audit/internal/platform"
|
||||
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
)
|
||||
|
||||
// Column widths for two-column main layout.
|
||||
@@ -34,6 +34,7 @@ func colorStatus(status string) string {
|
||||
}
|
||||
|
||||
func (m model) View() string {
|
||||
var body string
|
||||
if m.busy {
|
||||
title := "bee"
|
||||
if m.busyTitle != "" {
|
||||
@@ -46,41 +47,44 @@ func (m model) View() string {
|
||||
fmt.Fprintf(&b, " %s\n", l)
|
||||
}
|
||||
b.WriteString("\n[ctrl+c] quit\n")
|
||||
return b.String()
|
||||
body = b.String()
|
||||
} else {
|
||||
body = fmt.Sprintf("%s\n\nWorking...\n\n[ctrl+c] quit\n", title)
|
||||
}
|
||||
} else {
|
||||
switch m.screen {
|
||||
case screenMain:
|
||||
body = renderTwoColumnMain(m)
|
||||
case screenHealthCheck:
|
||||
body = renderHealthCheck(m)
|
||||
case screenSettings:
|
||||
body = renderMenu("Settings", "Select action", m.settingsMenu, m.cursor)
|
||||
case screenNetwork:
|
||||
body = renderMenu("Network", "Select action", m.networkMenu, m.cursor)
|
||||
case screenServices:
|
||||
body = renderMenu("Services", "Select service", m.services, m.cursor)
|
||||
case screenServiceAction:
|
||||
body = renderMenu("Service: "+m.selectedService, "Select action", m.serviceMenu, m.cursor)
|
||||
case screenExportTargets:
|
||||
body = renderMenu("Export support bundle", "Select removable filesystem", renderTargetItems(m.targets), m.cursor)
|
||||
case screenInterfacePick:
|
||||
body = renderMenu("Interfaces", "Select interface", renderInterfaceItems(m.interfaces), m.cursor)
|
||||
case screenStaticForm:
|
||||
body = renderForm("Static IPv4: "+m.selectedIface, m.formFields, m.formIndex)
|
||||
case screenConfirm:
|
||||
title, confirmBody := m.confirmBody()
|
||||
body = renderConfirm(title, confirmBody, m.cursor)
|
||||
case screenNvidiaSATSetup:
|
||||
body = renderNvidiaSATSetup(m)
|
||||
case screenNvidiaSATRunning:
|
||||
body = renderNvidiaSATRunning()
|
||||
case screenOutput:
|
||||
body = fmt.Sprintf("%s\n\n%s\n\n[enter/esc] back [ctrl+c] quit\n", m.title, strings.TrimSpace(m.body))
|
||||
default:
|
||||
body = "bee\n"
|
||||
}
|
||||
return fmt.Sprintf("%s\n\nWorking...\n\n[ctrl+c] quit\n", title)
|
||||
}
|
||||
switch m.screen {
|
||||
case screenMain:
|
||||
return renderTwoColumnMain(m)
|
||||
case screenHealthCheck:
|
||||
return renderHealthCheck(m)
|
||||
case screenSettings:
|
||||
return renderMenu("Settings", "Select action", m.settingsMenu, m.cursor)
|
||||
case screenNetwork:
|
||||
return renderMenu("Network", "Select action", m.networkMenu, m.cursor)
|
||||
case screenServices:
|
||||
return renderMenu("Services", "Select service", m.services, m.cursor)
|
||||
case screenServiceAction:
|
||||
return renderMenu("Service: "+m.selectedService, "Select action", m.serviceMenu, m.cursor)
|
||||
case screenExportTargets:
|
||||
return renderMenu("Export support bundle", "Select removable filesystem", renderTargetItems(m.targets), m.cursor)
|
||||
case screenInterfacePick:
|
||||
return renderMenu("Interfaces", "Select interface", renderInterfaceItems(m.interfaces), m.cursor)
|
||||
case screenStaticForm:
|
||||
return renderForm("Static IPv4: "+m.selectedIface, m.formFields, m.formIndex)
|
||||
case screenConfirm:
|
||||
title, body := m.confirmBody()
|
||||
return renderConfirm(title, body, m.cursor)
|
||||
case screenNvidiaSATSetup:
|
||||
return renderNvidiaSATSetup(m)
|
||||
case screenNvidiaSATRunning:
|
||||
return renderNvidiaSATRunning()
|
||||
case screenOutput:
|
||||
return fmt.Sprintf("%s\n\n%s\n\n[enter/esc] back [ctrl+c] quit\n", m.title, strings.TrimSpace(m.body))
|
||||
default:
|
||||
return "bee\n"
|
||||
}
|
||||
return m.renderWithBanner(body)
|
||||
}
|
||||
|
||||
// renderTwoColumnMain renders the main screen with menu on the left and hardware panel on the right.
|
||||
@@ -231,3 +235,60 @@ func resultCmd(title, body string, err error, back screen) tea.Cmd {
|
||||
return resultMsg{title: title, body: body, err: err, back: back}
|
||||
}
|
||||
}
|
||||
|
||||
func (m model) renderWithBanner(body string) string {
|
||||
body = strings.TrimRight(body, "\n")
|
||||
banner := renderBannerModule(m.banner, m.width)
|
||||
if banner == "" {
|
||||
if body == "" {
|
||||
return ""
|
||||
}
|
||||
return body + "\n"
|
||||
}
|
||||
if body == "" {
|
||||
return banner + "\n"
|
||||
}
|
||||
return banner + "\n\n" + body + "\n"
|
||||
}
|
||||
|
||||
func renderBannerModule(banner string, width int) string {
|
||||
banner = strings.TrimSpace(banner)
|
||||
if banner == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
lines := strings.Split(banner, "\n")
|
||||
contentWidth := 0
|
||||
for _, line := range lines {
|
||||
if w := lipgloss.Width(line); w > contentWidth {
|
||||
contentWidth = w
|
||||
}
|
||||
}
|
||||
if width > 0 && width-4 > contentWidth {
|
||||
contentWidth = width - 4
|
||||
}
|
||||
if contentWidth < 20 {
|
||||
contentWidth = 20
|
||||
}
|
||||
|
||||
label := " MOTD "
|
||||
topFill := contentWidth + 2 - lipgloss.Width(label)
|
||||
if topFill < 0 {
|
||||
topFill = 0
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
b.WriteString("┌" + label + strings.Repeat("─", topFill) + "┐\n")
|
||||
for _, line := range lines {
|
||||
b.WriteString("│ " + padRight(line, contentWidth) + " │\n")
|
||||
}
|
||||
b.WriteString("└" + strings.Repeat("─", contentWidth+2) + "┘")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func padRight(value string, width int) string {
|
||||
if gap := width - lipgloss.Width(value); gap > 0 {
|
||||
return value + strings.Repeat(" ", gap)
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
@@ -32,6 +32,6 @@ lb config noauto \
|
||||
--memtest none \
|
||||
--iso-volume "EASY-BEE" \
|
||||
--iso-application "EASY-BEE" \
|
||||
--bootappend-live "boot=live components console=tty0 console=ttyS0,115200n8 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
||||
--bootappend-live "boot=live components console=ttyS0,115200n8 console=ttyS1,115200n8 loglevel=7 systemd.log_target=console systemd.journald.forward_to_console=1 systemd.journald.max_level_console=debug username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
||||
--apt-recommends false \
|
||||
"${@}"
|
||||
|
||||
@@ -34,6 +34,43 @@ mkdir -p "${CACHE_ROOT}"
|
||||
: "${GOMODCACHE:=${CACHE_ROOT}/go-mod}"
|
||||
export GOCACHE GOMODCACHE
|
||||
|
||||
resolve_audit_version() {
|
||||
if [ -n "${BEE_AUDIT_VERSION:-}" ]; then
|
||||
echo "${BEE_AUDIT_VERSION}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
tag="$(git -C "${REPO_ROOT}" describe --tags --match 'audit/v*' --abbrev=7 --dirty 2>/dev/null || true)"
|
||||
if [ -z "${tag}" ]; then
|
||||
tag="$(git -C "${REPO_ROOT}" describe --tags --match 'v*' --abbrev=7 --dirty 2>/dev/null || true)"
|
||||
fi
|
||||
case "${tag}" in
|
||||
audit/v*)
|
||||
echo "${tag#audit/v}"
|
||||
return 0
|
||||
;;
|
||||
v*)
|
||||
echo "${tag#v}"
|
||||
return 0
|
||||
;;
|
||||
"")
|
||||
;;
|
||||
*)
|
||||
echo "${tag}"
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ -n "${AUDIT_VERSION:-}" ]; then
|
||||
echo "${AUDIT_VERSION}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
date +%Y%m%d
|
||||
}
|
||||
|
||||
AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)"
|
||||
|
||||
# Auto-detect kernel ABI: refresh apt index, then query current linux-image-amd64 dependency.
|
||||
# If headers for the detected ABI are not yet installed (kernel updated since image build),
|
||||
# install them on the fly so NVIDIA modules and ISO kernel always match.
|
||||
@@ -64,6 +101,7 @@ fi
|
||||
|
||||
echo "=== bee ISO build ==="
|
||||
echo "Debian: ${DEBIAN_VERSION}, Kernel ABI: ${DEBIAN_KERNEL_ABI}, Go: ${GO_VERSION}"
|
||||
echo "Audit version: ${AUDIT_VERSION_EFFECTIVE}"
|
||||
echo ""
|
||||
|
||||
echo "=== syncing git submodules ==="
|
||||
@@ -83,7 +121,7 @@ if [ "$NEED_BUILD" = "1" ]; then
|
||||
cd "${REPO_ROOT}/audit"
|
||||
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
|
||||
go build \
|
||||
-ldflags "-s -w -X main.Version=${AUDIT_VERSION:-$(date +%Y%m%d)}" \
|
||||
-ldflags "-s -w -X main.Version=${AUDIT_VERSION_EFFECTIVE}" \
|
||||
-o "$BEE_BIN" \
|
||||
./cmd/bee
|
||||
echo "binary: $BEE_BIN"
|
||||
@@ -230,8 +268,8 @@ mkdir -p "${OVERLAY_STAGE_DIR}/etc"
|
||||
BUILD_DATE="$(date +%Y-%m-%d)"
|
||||
GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)"
|
||||
cat > "${OVERLAY_STAGE_DIR}/etc/bee-release" <<EOF
|
||||
BEE_ISO_VERSION=${AUDIT_VERSION}
|
||||
BEE_AUDIT_VERSION=${AUDIT_VERSION}
|
||||
BEE_ISO_VERSION=${AUDIT_VERSION_EFFECTIVE}
|
||||
BEE_AUDIT_VERSION=${AUDIT_VERSION_EFFECTIVE}
|
||||
BUILD_DATE=${BUILD_DATE}
|
||||
GIT_COMMIT=${GIT_COMMIT}
|
||||
DEBIAN_VERSION=${DEBIAN_VERSION}
|
||||
@@ -272,7 +310,7 @@ lb build 2>&1
|
||||
|
||||
# live-build outputs live-image-amd64.hybrid.iso in LB_DIR
|
||||
ISO_RAW="${LB_DIR}/live-image-amd64.hybrid.iso"
|
||||
ISO_OUT="${DIST_DIR}/bee-debian${DEBIAN_VERSION}-v${AUDIT_VERSION}-amd64.iso"
|
||||
ISO_OUT="${DIST_DIR}/bee-debian${DEBIAN_VERSION}-v${AUDIT_VERSION_EFFECTIVE}-amd64.iso"
|
||||
if [ -f "$ISO_RAW" ]; then
|
||||
cp "$ISO_RAW" "$ISO_OUT"
|
||||
echo ""
|
||||
|
||||
@@ -10,12 +10,17 @@ echo " ╚══════╝╚═╝ ╚═╝╚══════╝
|
||||
echo ""
|
||||
|
||||
menuentry "EASY-BEE" {
|
||||
linux @KERNEL_LIVE@ @APPEND_LIVE@
|
||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal
|
||||
initrd @INITRD_LIVE@
|
||||
}
|
||||
|
||||
menuentry "EASY-BEE (NVIDIA GSP=off)" {
|
||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off
|
||||
initrd @INITRD_LIVE@
|
||||
}
|
||||
|
||||
menuentry "EASY-BEE (fail-safe)" {
|
||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ memtest noapic noapm nodma nomce nolapic nosmp vga=normal
|
||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
|
||||
initrd @INITRD_LIVE@
|
||||
}
|
||||
|
||||
|
||||
18
iso/builder/config/bootloaders/isolinux/live.cfg.in
Normal file
18
iso/builder/config/bootloaders/isolinux/live.cfg.in
Normal file
@@ -0,0 +1,18 @@
|
||||
label live-@FLAVOUR@-normal
|
||||
menu label ^EASY-BEE
|
||||
menu default
|
||||
linux @LINUX@
|
||||
initrd @INITRD@
|
||||
append @APPEND_LIVE@ bee.nvidia.mode=normal
|
||||
|
||||
label live-@FLAVOUR@-gsp-off
|
||||
menu label EASY-BEE (^NVIDIA GSP=off)
|
||||
linux @LINUX@
|
||||
initrd @INITRD@
|
||||
append @APPEND_LIVE@ bee.nvidia.mode=gsp-off
|
||||
|
||||
label live-@FLAVOUR@-failsafe
|
||||
menu label EASY-BEE (^fail-safe)
|
||||
linux @LINUX@
|
||||
initrd @INITRD@
|
||||
append @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
|
||||
@@ -5,6 +5,21 @@ set -e
|
||||
|
||||
echo "=== bee chroot setup ==="
|
||||
|
||||
ensure_bee_console_user() {
|
||||
if id bee >/dev/null 2>&1; then
|
||||
usermod -d /home/bee -s /bin/sh bee 2>/dev/null || true
|
||||
else
|
||||
useradd -d /home/bee -m -s /bin/sh -U bee
|
||||
fi
|
||||
|
||||
mkdir -p /home/bee
|
||||
chown -R bee:bee /home/bee
|
||||
echo "bee:eeb" | chpasswd
|
||||
usermod -aG sudo bee 2>/dev/null || true
|
||||
}
|
||||
|
||||
ensure_bee_console_user
|
||||
|
||||
# Enable bee services
|
||||
systemctl enable bee-network.service
|
||||
systemctl enable bee-nvidia.service
|
||||
@@ -15,6 +30,8 @@ systemctl enable bee-sshsetup.service
|
||||
systemctl enable ssh.service
|
||||
systemctl enable qemu-guest-agent.service 2>/dev/null || true
|
||||
systemctl enable serial-getty@ttyS0.service 2>/dev/null || true
|
||||
systemctl enable serial-getty@ttyS1.service 2>/dev/null || true
|
||||
systemctl enable bee-journal-mirror@ttyS1.service 2>/dev/null || true
|
||||
|
||||
# Ensure scripts are executable
|
||||
chmod +x /usr/local/bin/bee-network.sh 2>/dev/null || true
|
||||
@@ -23,6 +40,7 @@ chmod +x /usr/local/bin/bee-sshsetup 2>/dev/null || true
|
||||
chmod +x /usr/local/bin/bee-smoketest 2>/dev/null || true
|
||||
chmod +x /usr/local/bin/bee-tui 2>/dev/null || true
|
||||
chmod +x /usr/local/bin/bee 2>/dev/null || true
|
||||
chmod +x /usr/local/bin/bee-log-run 2>/dev/null || true
|
||||
|
||||
# Reload udev rules
|
||||
udevadm control --reload-rules 2>/dev/null || true
|
||||
|
||||
@@ -8,14 +8,45 @@ set -e
|
||||
ROCM_VERSION="6.4"
|
||||
ROCM_KEYRING="/etc/apt/keyrings/rocm.gpg"
|
||||
ROCM_LIST="/etc/apt/sources.list.d/rocm.list"
|
||||
APT_UPDATED=0
|
||||
|
||||
echo "=== AMD ROCm ${ROCM_VERSION}: adding repository ==="
|
||||
|
||||
mkdir -p /etc/apt/keyrings
|
||||
|
||||
ensure_tool() {
|
||||
tool="$1"
|
||||
pkg="$2"
|
||||
if command -v "${tool}" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
if [ "${APT_UPDATED}" -eq 0 ]; then
|
||||
apt-get update -qq
|
||||
APT_UPDATED=1
|
||||
fi
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "${pkg}"
|
||||
}
|
||||
|
||||
ensure_cert_bundle() {
|
||||
if [ -s /etc/ssl/certs/ca-certificates.crt ]; then
|
||||
return 0
|
||||
fi
|
||||
if [ "${APT_UPDATED}" -eq 0 ]; then
|
||||
apt-get update -qq
|
||||
APT_UPDATED=1
|
||||
fi
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates
|
||||
}
|
||||
|
||||
# live-build chroot may not include fetch/signing tools yet
|
||||
if ! ensure_cert_bundle || ! ensure_tool wget wget || ! ensure_tool gpg gpg; then
|
||||
echo "WARN: failed to install wget/gpg/ca-certificates prerequisites — skipping ROCm install"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Download and import AMD GPG key
|
||||
if ! wget -qO- "https://repo.radeon.com/rocm/rocm.gpg.key" \
|
||||
| gpg --dearmor > "${ROCM_KEYRING}"; then
|
||||
| gpg --dearmor --yes --output "${ROCM_KEYRING}"; then
|
||||
echo "WARN: failed to fetch AMD ROCm GPG key — skipping ROCm install"
|
||||
exit 0
|
||||
fi
|
||||
@@ -29,6 +60,14 @@ apt-get update -qq
|
||||
# rocm-smi-lib provides the rocm-smi CLI tool for GPU monitoring
|
||||
if apt-get install -y --no-install-recommends rocm-smi-lib 2>/dev/null; then
|
||||
echo "=== AMD ROCm: rocm-smi installed ==="
|
||||
if [ -x /opt/rocm/bin/rocm-smi ]; then
|
||||
ln -sf /opt/rocm/bin/rocm-smi /usr/local/bin/rocm-smi
|
||||
else
|
||||
candidate="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
|
||||
if [ -n "${candidate}" ]; then
|
||||
ln -sf "${candidate}" /usr/local/bin/rocm-smi
|
||||
fi
|
||||
fi
|
||||
rocm-smi --version 2>/dev/null || true
|
||||
else
|
||||
echo "WARN: rocm-smi-lib install failed — GPU monitoring unavailable"
|
||||
|
||||
@@ -26,6 +26,15 @@ echo ""
|
||||
|
||||
KVER=$(uname -r)
|
||||
info "kernel: $KVER"
|
||||
NVIDIA_BOOT_MODE="normal"
|
||||
for arg in $(cat /proc/cmdline 2>/dev/null); do
|
||||
case "$arg" in
|
||||
bee.nvidia.mode=*)
|
||||
NVIDIA_BOOT_MODE="${arg#*=}"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
info "nvidia boot mode: ${NVIDIA_BOOT_MODE}"
|
||||
|
||||
# --- PATH & binaries ---
|
||||
echo "-- PATH & binaries --"
|
||||
@@ -53,17 +62,25 @@ else
|
||||
fail "NVIDIA ko dir missing: $KO_DIR"
|
||||
fi
|
||||
|
||||
for mod in nvidia nvidia_modeset nvidia_uvm; do
|
||||
if /sbin/lsmod 2>/dev/null | grep -q "^nvidia "; then
|
||||
ok "module loaded: nvidia"
|
||||
else
|
||||
fail "module NOT loaded: nvidia"
|
||||
fi
|
||||
|
||||
for mod in nvidia_modeset nvidia_uvm; do
|
||||
if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then
|
||||
ok "module loaded: $mod"
|
||||
elif [ "${NVIDIA_BOOT_MODE}" = "normal" ] || [ "${NVIDIA_BOOT_MODE}" = "full" ]; then
|
||||
fail "module NOT loaded in normal mode: $mod"
|
||||
else
|
||||
fail "module NOT loaded: $mod"
|
||||
warn "module not loaded in GSP-off mode: $mod"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "-- NVIDIA device nodes --"
|
||||
for dev in nvidiactl nvidia0 nvidia-uvm; do
|
||||
for dev in nvidiactl nvidia0; do
|
||||
if [ -e "/dev/$dev" ]; then
|
||||
ok "/dev/$dev exists"
|
||||
else
|
||||
@@ -71,6 +88,14 @@ for dev in nvidiactl nvidia0 nvidia-uvm; do
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -e /dev/nvidia-uvm ]; then
|
||||
ok "/dev/nvidia-uvm exists"
|
||||
elif [ "${NVIDIA_BOOT_MODE}" = "normal" ] || [ "${NVIDIA_BOOT_MODE}" = "full" ]; then
|
||||
fail "/dev/nvidia-uvm missing in normal mode"
|
||||
else
|
||||
warn "/dev/nvidia-uvm missing — CUDA stress path may be unavailable until loaded on demand"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "-- nvidia-smi --"
|
||||
if PATH="/usr/local/bin:$PATH" command -v nvidia-smi >/dev/null 2>&1; then
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export PATH="$PATH:/usr/local/bin"
|
||||
export PATH="$PATH:/usr/local/bin:/opt/rocm/bin:/opt/rocm/sbin"
|
||||
|
||||
menu() {
|
||||
if [ -x /usr/local/bin/bee-tui ]; then
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
[Journal]
|
||||
ForwardToConsole=yes
|
||||
TTYPath=/dev/ttyS0
|
||||
MaxLevelConsole=debug
|
||||
@@ -5,9 +5,9 @@ Before=bee-web.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
|
||||
StandardOutput=append:/appdata/bee/export/bee-audit.log
|
||||
StandardError=append:/appdata/bee/export/bee-audit.log
|
||||
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-audit.log /bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
|
||||
16
iso/overlay/etc/systemd/system/bee-journal-mirror@.service
Normal file
16
iso/overlay/etc/systemd/system/bee-journal-mirror@.service
Normal file
@@ -0,0 +1,16 @@
|
||||
[Unit]
|
||||
Description=Bee: mirror system journal to %I
|
||||
After=systemd-journald.service
|
||||
Requires=systemd-journald.service
|
||||
ConditionPathExists=/dev/%I
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/bin/sh -c 'exec journalctl -f -n 200 -o short-monotonic > /dev/%I'
|
||||
Restart=always
|
||||
RestartSec=1
|
||||
StandardOutput=null
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -5,9 +5,9 @@ Before=network-online.target bee-audit.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/bee-network.sh
|
||||
StandardOutput=append:/appdata/bee/export/bee-network.log
|
||||
StandardError=append:/appdata/bee/export/bee-network.log
|
||||
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-network.log /usr/local/bin/bee-network.sh
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
|
||||
@@ -5,9 +5,9 @@ Before=bee-audit.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/bee-nvidia-load
|
||||
StandardOutput=append:/appdata/bee/export/bee-nvidia.log
|
||||
StandardError=append:/appdata/bee/export/bee-nvidia.log
|
||||
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-nvidia.log /usr/local/bin/bee-nvidia-load
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
|
||||
@@ -5,9 +5,9 @@ Before=bee-audit.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/usr/local/bin/bee preflight --output file:/appdata/bee/export/runtime-health.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-preflight] WARN: preflight exited with rc=$rc"; fi; exit 0'
|
||||
StandardOutput=append:/appdata/bee/export/runtime-health.log
|
||||
StandardError=append:/appdata/bee/export/runtime-health.log
|
||||
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/runtime-health.log /bin/sh -c '/usr/local/bin/bee preflight --output file:/appdata/bee/export/runtime-health.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-preflight] WARN: preflight exited with rc=$rc"; fi; exit 0'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
|
||||
@@ -5,9 +5,9 @@ Before=ssh.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/bee-sshsetup
|
||||
StandardOutput=append:/appdata/bee/export/bee-sshsetup.log
|
||||
StandardError=append:/appdata/bee/export/bee-sshsetup.log
|
||||
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-sshsetup.log /usr/local/bin/bee-sshsetup
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
|
||||
@@ -5,11 +5,11 @@ Wants=bee-audit.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit"
|
||||
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-web.log /usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit"
|
||||
Restart=always
|
||||
RestartSec=2
|
||||
StandardOutput=append:/appdata/bee/export/bee-web.log
|
||||
StandardError=append:/appdata/bee/export/bee-web.log
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
29
iso/overlay/usr/local/bin/bee-log-run
Normal file
29
iso/overlay/usr/local/bin/bee-log-run
Normal file
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
# bee-log-run — run a command, append its output to a file, and keep stdout/stderr
|
||||
# connected to systemd so journald and the serial console also receive the logs.
|
||||
|
||||
set -o pipefail
|
||||
|
||||
log_file="$1"
|
||||
shift
|
||||
|
||||
if [ -z "$log_file" ] || [ "$#" -eq 0 ]; then
|
||||
echo "usage: $0 <log-file> <command> [args...]" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$log_file")"
|
||||
|
||||
serial_sink() {
|
||||
local tty="$1"
|
||||
if [ -w "$tty" ]; then
|
||||
cat > "$tty"
|
||||
else
|
||||
cat > /dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
"$@" 2>&1 | tee -a "$log_file" \
|
||||
>(serial_sink /dev/ttyS0) \
|
||||
>(serial_sink /dev/ttyS1)
|
||||
exit "${PIPESTATUS[0]}"
|
||||
@@ -22,24 +22,61 @@ fi
|
||||
log "module dir: $NVIDIA_KO_DIR"
|
||||
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
|
||||
|
||||
# Some kernels expose backlight helper symbols only after loading `video`.
|
||||
modprobe video >/dev/null 2>&1 && log "loaded helper module: video" || log "helper module unavailable: video"
|
||||
cmdline_param() {
|
||||
key="$1"
|
||||
for token in $(cat /proc/cmdline 2>/dev/null); do
|
||||
case "$token" in
|
||||
"$key"=*)
|
||||
echo "${token#*=}"
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Load modules via insmod (direct load — no depmod needed)
|
||||
for mod in nvidia nvidia-modeset nvidia-uvm; do
|
||||
nvidia_mode="$(cmdline_param bee.nvidia.mode || true)"
|
||||
if [ -z "$nvidia_mode" ]; then
|
||||
nvidia_mode="normal"
|
||||
fi
|
||||
log "boot mode: $nvidia_mode"
|
||||
|
||||
load_module() {
|
||||
mod="$1"
|
||||
shift
|
||||
ko="$NVIDIA_KO_DIR/${mod}.ko"
|
||||
[ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
|
||||
if [ -f "$ko" ]; then
|
||||
if insmod "$ko"; then
|
||||
log "loaded: $mod"
|
||||
else
|
||||
log "WARN: failed to load: $mod"
|
||||
dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true
|
||||
fi
|
||||
else
|
||||
if [ ! -f "$ko" ]; then
|
||||
log "WARN: not found: $ko"
|
||||
return 1
|
||||
fi
|
||||
done
|
||||
if insmod "$ko" "$@"; then
|
||||
log "loaded: $mod $*"
|
||||
return 0
|
||||
fi
|
||||
log "WARN: failed to load: $mod"
|
||||
dmesg | tail -n 10 | sed 's/^/ dmesg: /' || true
|
||||
return 1
|
||||
}
|
||||
|
||||
case "$nvidia_mode" in
|
||||
normal|full)
|
||||
if ! load_module nvidia; then
|
||||
exit 1
|
||||
fi
|
||||
load_module nvidia-modeset || true
|
||||
load_module nvidia-uvm || true
|
||||
;;
|
||||
gsp-off|safe|*)
|
||||
# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
|
||||
# be disabled via NVreg_EnableGpuFirmware=0. Safe mode keeps the live ISO on the
|
||||
# conservative path for platforms where full boot-time GSP init is unstable.
|
||||
if ! load_module nvidia NVreg_EnableGpuFirmware=0; then
|
||||
exit 1
|
||||
fi
|
||||
log "GSP-off mode: skipping nvidia-modeset and nvidia-uvm during boot"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
|
||||
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')
|
||||
@@ -61,8 +98,6 @@ if [ -n "$uvm_major" ]; then
|
||||
&& log "created /dev/nvidia-uvm (major $uvm_major)" \
|
||||
|| log "WARN: /dev/nvidia-uvm already exists"
|
||||
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
|
||||
else
|
||||
log "WARN: nvidia-uvm not in /proc/devices"
|
||||
fi
|
||||
|
||||
log "done"
|
||||
|
||||
Reference in New Issue
Block a user