iso: improve burn-in, export, and live boot
This commit is contained in:
@@ -231,8 +231,11 @@ func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error)
|
|||||||
|
|
||||||
func (a *App) ExportLatestAuditResult(target platform.RemovableTarget) (ActionResult, error) {
|
func (a *App) ExportLatestAuditResult(target platform.RemovableTarget) (ActionResult, error) {
|
||||||
path, err := a.ExportLatestAudit(target)
|
path, err := a.ExportLatestAudit(target)
|
||||||
body := "Audit exported."
|
body := "Audit export failed."
|
||||||
if path != "" {
|
if err == nil {
|
||||||
|
body = "Audit exported."
|
||||||
|
}
|
||||||
|
if err == nil && path != "" {
|
||||||
body = "Audit exported to " + path
|
body = "Audit exported to " + path
|
||||||
}
|
}
|
||||||
return ActionResult{Title: "Export audit", Body: body}, err
|
return ActionResult{Title: "Export audit", Body: body}, err
|
||||||
@@ -249,8 +252,11 @@ func (a *App) ExportSupportBundle(target platform.RemovableTarget) (string, erro
|
|||||||
|
|
||||||
func (a *App) ExportSupportBundleResult(target platform.RemovableTarget) (ActionResult, error) {
|
func (a *App) ExportSupportBundleResult(target platform.RemovableTarget) (ActionResult, error) {
|
||||||
path, err := a.ExportSupportBundle(target)
|
path, err := a.ExportSupportBundle(target)
|
||||||
body := "Support bundle exported. USB target unmounted and safe to remove."
|
body := "Support bundle export failed."
|
||||||
if path != "" {
|
if err == nil {
|
||||||
|
body = "Support bundle exported. USB target unmounted and safe to remove."
|
||||||
|
}
|
||||||
|
if err == nil && path != "" {
|
||||||
body = "Support bundle exported to " + path + ".\n\nUSB target unmounted and safe to remove."
|
body = "Support bundle exported to " + path + ".\n\nUSB target unmounted and safe to remove."
|
||||||
}
|
}
|
||||||
return ActionResult{Title: "Export support bundle", Body: body}, err
|
return ActionResult{Title: "Export support bundle", Body: body}, err
|
||||||
|
|||||||
@@ -470,6 +470,41 @@ func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExportSupportBundleResultDoesNotPretendSuccessOnError(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tmp := t.TempDir()
|
||||||
|
oldExportDir := DefaultExportDir
|
||||||
|
DefaultExportDir = tmp
|
||||||
|
t.Cleanup(func() { DefaultExportDir = oldExportDir })
|
||||||
|
|
||||||
|
if err := os.WriteFile(filepath.Join(tmp, "bee-audit.json"), []byte("{}\n"), 0644); err != nil {
|
||||||
|
t.Fatalf("write bee-audit.json: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(tmp, "bee-audit.log"), []byte("audit ok\n"), 0644); err != nil {
|
||||||
|
t.Fatalf("write bee-audit.log: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
a := &App{
|
||||||
|
exports: fakeExports{
|
||||||
|
exportToTargetFn: func(string, platform.RemovableTarget) (string, error) {
|
||||||
|
return "", errors.New("mount /dev/sda1: exFAT support is missing in this ISO build")
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := a.ExportSupportBundleResult(platform.RemovableTarget{Device: "/dev/sda1", FSType: "exfat"})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected export error")
|
||||||
|
}
|
||||||
|
if contains(result.Body, "exported to") {
|
||||||
|
t.Fatalf("body should not claim success:\n%s", result.Body)
|
||||||
|
}
|
||||||
|
if result.Body != "Support bundle export failed." {
|
||||||
|
t.Fatalf("body=%q want %q", result.Body, "Support bundle export failed.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRunNvidiaAcceptancePackResult(t *testing.T) {
|
func TestRunNvidiaAcceptancePackResult(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
@@ -11,8 +11,48 @@ import (
|
|||||||
|
|
||||||
var exportExecCommand = exec.Command
|
var exportExecCommand = exec.Command
|
||||||
|
|
||||||
|
func formatMountTargetError(target RemovableTarget, raw string, err error) error {
|
||||||
|
msg := strings.TrimSpace(raw)
|
||||||
|
fstype := strings.ToLower(strings.TrimSpace(target.FSType))
|
||||||
|
if fstype == "exfat" && strings.Contains(strings.ToLower(msg), "unknown filesystem type 'exfat'") {
|
||||||
|
return fmt.Errorf("mount %s: exFAT support is missing in this ISO build: %w", target.Device, err)
|
||||||
|
}
|
||||||
|
if msg == "" {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return fmt.Errorf("%s: %w", msg, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func removableTargetReadOnly(fields map[string]string) bool {
|
||||||
|
if fields["RO"] == "1" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
switch strings.ToLower(strings.TrimSpace(fields["FSTYPE"])) {
|
||||||
|
case "iso9660", "squashfs":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ensureWritableMountpoint(mountpoint string) error {
|
||||||
|
probe, err := os.CreateTemp(mountpoint, ".bee-write-test-*")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("target filesystem is not writable: %w", err)
|
||||||
|
}
|
||||||
|
name := probe.Name()
|
||||||
|
if closeErr := probe.Close(); closeErr != nil {
|
||||||
|
_ = os.Remove(name)
|
||||||
|
return closeErr
|
||||||
|
}
|
||||||
|
if err := os.Remove(name); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *System) ListRemovableTargets() ([]RemovableTarget, error) {
|
func (s *System) ListRemovableTargets() ([]RemovableTarget, error) {
|
||||||
raw, err := exportExecCommand("lsblk", "-P", "-o", "NAME,TYPE,PKNAME,RM,FSTYPE,MOUNTPOINT,SIZE,LABEL,MODEL").Output()
|
raw, err := exportExecCommand("lsblk", "-P", "-o", "NAME,TYPE,PKNAME,RM,RO,FSTYPE,MOUNTPOINT,SIZE,LABEL,MODEL").Output()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -36,7 +76,7 @@ func (s *System) ListRemovableTargets() ([]RemovableTarget, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !removable || fields["FSTYPE"] == "" {
|
if !removable || fields["FSTYPE"] == "" || removableTargetReadOnly(fields) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,7 +112,7 @@ func (s *System) ExportFileToTarget(src string, target RemovableTarget) (dst str
|
|||||||
}
|
}
|
||||||
if raw, err := exportExecCommand("mount", target.Device, mountpoint).CombinedOutput(); err != nil {
|
if raw, err := exportExecCommand("mount", target.Device, mountpoint).CombinedOutput(); err != nil {
|
||||||
_ = os.Remove(mountpoint)
|
_ = os.Remove(mountpoint)
|
||||||
return string(raw), err
|
return "", formatMountTargetError(target, string(raw), err)
|
||||||
}
|
}
|
||||||
mountedHere = true
|
mountedHere = true
|
||||||
mounted = true
|
mounted = true
|
||||||
@@ -95,6 +135,10 @@ func (s *System) ExportFileToTarget(src string, target RemovableTarget) (dst str
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if err := ensureWritableMountpoint(mountpoint); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
filename := filepath.Base(src)
|
filename := filepath.Base(src)
|
||||||
dst = filepath.Join(mountpoint, filename)
|
dst = filepath.Join(mountpoint, filename)
|
||||||
data, err := os.ReadFile(src)
|
data, err := os.ReadFile(src)
|
||||||
|
|||||||
@@ -4,12 +4,11 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestExportFileToTargetUnmountsExistingMountpoint(t *testing.T) {
|
func TestExportFileToTargetUnmountsExistingMountpoint(t *testing.T) {
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
tmp := t.TempDir()
|
tmp := t.TempDir()
|
||||||
src := filepath.Join(tmp, "bundle.tar.gz")
|
src := filepath.Join(tmp, "bundle.tar.gz")
|
||||||
mountpoint := filepath.Join(tmp, "mnt")
|
mountpoint := filepath.Join(tmp, "mnt")
|
||||||
@@ -54,3 +53,60 @@ func TestExportFileToTargetUnmountsExistingMountpoint(t *testing.T) {
|
|||||||
t.Fatalf("expected umount %q call, got %#v", mountpoint, calls)
|
t.Fatalf("expected umount %q call, got %#v", mountpoint, calls)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExportFileToTargetRejectsNonWritableMountpoint(t *testing.T) {
|
||||||
|
tmp := t.TempDir()
|
||||||
|
src := filepath.Join(tmp, "bundle.tar.gz")
|
||||||
|
mountpoint := filepath.Join(tmp, "mnt")
|
||||||
|
if err := os.MkdirAll(mountpoint, 0755); err != nil {
|
||||||
|
t.Fatalf("mkdir mountpoint: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(src, []byte("bundle"), 0644); err != nil {
|
||||||
|
t.Fatalf("write src: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.Chmod(mountpoint, 0555); err != nil {
|
||||||
|
t.Fatalf("chmod mountpoint: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
oldExec := exportExecCommand
|
||||||
|
exportExecCommand = func(name string, args ...string) *exec.Cmd {
|
||||||
|
return exec.Command("sh", "-c", "exit 0")
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { exportExecCommand = oldExec })
|
||||||
|
|
||||||
|
s := &System{}
|
||||||
|
_, err := s.ExportFileToTarget(src, RemovableTarget{
|
||||||
|
Device: "/dev/sdb1",
|
||||||
|
Mountpoint: mountpoint,
|
||||||
|
})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error for non-writable mountpoint")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "target filesystem is not writable") {
|
||||||
|
t.Fatalf("err=%q want writable message", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestListRemovableTargetsSkipsReadOnlyMedia(t *testing.T) {
|
||||||
|
oldExec := exportExecCommand
|
||||||
|
lsblkOut := `NAME="sda1" TYPE="part" PKNAME="sda" RM="1" RO="1" FSTYPE="iso9660" MOUNTPOINT="/run/live/medium" SIZE="3.7G" LABEL="BEE" MODEL=""
|
||||||
|
NAME="sdb1" TYPE="part" PKNAME="sdb" RM="1" RO="0" FSTYPE="vfat" MOUNTPOINT="/media/bee/USB" SIZE="29.8G" LABEL="USB" MODEL=""`
|
||||||
|
exportExecCommand = func(name string, args ...string) *exec.Cmd {
|
||||||
|
cmd := exec.Command("sh", "-c", "printf '%s\n' \"$LSBLK_OUT\"")
|
||||||
|
cmd.Env = append(os.Environ(), "LSBLK_OUT="+lsblkOut)
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { exportExecCommand = oldExec })
|
||||||
|
|
||||||
|
s := &System{}
|
||||||
|
targets, err := s.ListRemovableTargets()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListRemovableTargets error: %v", err)
|
||||||
|
}
|
||||||
|
if len(targets) != 1 {
|
||||||
|
t.Fatalf("len(targets)=%d want 1 (%+v)", len(targets), targets)
|
||||||
|
}
|
||||||
|
if got := targets[0].Device; got != "/dev/sdb1" {
|
||||||
|
t.Fatalf("device=%q want /dev/sdb1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -151,8 +151,10 @@ func (m model) confirmCancelTarget() screen {
|
|||||||
switch m.pendingAction {
|
switch m.pendingAction {
|
||||||
case actionExportBundle:
|
case actionExportBundle:
|
||||||
return screenExportTargets
|
return screenExportTargets
|
||||||
case actionRunAll, actionRunMemorySAT, actionRunStorageSAT, actionRunCPUSAT, actionRunAMDGPUSAT, actionRunFanStress:
|
case actionRunAll, actionRunMemorySAT, actionRunStorageSAT, actionRunCPUSAT, actionRunAMDGPUSAT:
|
||||||
return screenHealthCheck
|
return screenHealthCheck
|
||||||
|
case actionRunFanStress:
|
||||||
|
return screenBurnInTests
|
||||||
default:
|
default:
|
||||||
return screenMain
|
return screenMain
|
||||||
}
|
}
|
||||||
@@ -165,9 +167,9 @@ func hcFanStressOpts(hcMode int, application interface {
|
|||||||
// Phase durations per mode: [baseline, load1, pause, load2]
|
// Phase durations per mode: [baseline, load1, pause, load2]
|
||||||
type durations struct{ baseline, load1, pause, load2 int }
|
type durations struct{ baseline, load1, pause, load2 int }
|
||||||
modes := [3]durations{
|
modes := [3]durations{
|
||||||
{30, 120, 30, 120}, // Quick: ~5 min total
|
{30, 120, 30, 120}, // Quick: ~5 min total
|
||||||
{60, 300, 60, 300}, // Standard: ~12 min total
|
{60, 300, 60, 300}, // Standard: ~12 min total
|
||||||
{60, 600, 120, 600}, // Express: ~24 min total
|
{60, 600, 120, 600}, // Express: ~24 min total
|
||||||
}
|
}
|
||||||
if hcMode < 0 || hcMode >= len(modes) {
|
if hcMode < 0 || hcMode >= len(modes) {
|
||||||
hcMode = 0
|
hcMode = 0
|
||||||
|
|||||||
117
audit/internal/tui/screen_burn_in.go
Normal file
117
audit/internal/tui/screen_burn_in.go
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
package tui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
tea "github.com/charmbracelet/bubbletea"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
burnCurGPUStress = 0
|
||||||
|
burnCurModeQuick = 1
|
||||||
|
burnCurModeStd = 2
|
||||||
|
burnCurModeExpr = 3
|
||||||
|
burnCurRun = 4
|
||||||
|
burnCurTotal = 5
|
||||||
|
)
|
||||||
|
|
||||||
|
func (m model) enterBurnInTests() (tea.Model, tea.Cmd) {
|
||||||
|
m.screen = screenBurnInTests
|
||||||
|
m.cursor = 0
|
||||||
|
if !m.burnInitialized {
|
||||||
|
m.burnMode = 0
|
||||||
|
m.burnCursor = 0
|
||||||
|
m.burnInitialized = true
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m model) updateBurnInTests(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
||||||
|
switch msg.String() {
|
||||||
|
case "up", "k":
|
||||||
|
if m.burnCursor > 0 {
|
||||||
|
m.burnCursor--
|
||||||
|
}
|
||||||
|
case "down", "j":
|
||||||
|
if m.burnCursor < burnCurTotal-1 {
|
||||||
|
m.burnCursor++
|
||||||
|
}
|
||||||
|
case " ":
|
||||||
|
switch m.burnCursor {
|
||||||
|
case burnCurModeQuick, burnCurModeStd, burnCurModeExpr:
|
||||||
|
m.burnMode = m.burnCursor - burnCurModeQuick
|
||||||
|
}
|
||||||
|
case "enter":
|
||||||
|
switch m.burnCursor {
|
||||||
|
case burnCurGPUStress, burnCurRun:
|
||||||
|
return m.burnRunSelected()
|
||||||
|
case burnCurModeQuick, burnCurModeStd, burnCurModeExpr:
|
||||||
|
m.burnMode = m.burnCursor - burnCurModeQuick
|
||||||
|
}
|
||||||
|
case "f", "F", "r", "R":
|
||||||
|
return m.burnRunSelected()
|
||||||
|
case "1":
|
||||||
|
m.burnMode = 0
|
||||||
|
case "2":
|
||||||
|
m.burnMode = 1
|
||||||
|
case "3":
|
||||||
|
m.burnMode = 2
|
||||||
|
case "esc":
|
||||||
|
m.screen = screenMain
|
||||||
|
m.cursor = 1
|
||||||
|
case "q", "ctrl+c":
|
||||||
|
return m, tea.Quit
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m model) burnRunSelected() (tea.Model, tea.Cmd) {
|
||||||
|
return m.hcRunFanStress()
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderBurnInTests(m model) string {
|
||||||
|
var b strings.Builder
|
||||||
|
|
||||||
|
fmt.Fprintln(&b, "BURN-IN TESTS")
|
||||||
|
fmt.Fprintln(&b)
|
||||||
|
fmt.Fprintln(&b, " Stress tests:")
|
||||||
|
fmt.Fprintln(&b)
|
||||||
|
|
||||||
|
pfx := " "
|
||||||
|
if m.burnCursor == burnCurGPUStress {
|
||||||
|
pfx = "> "
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, "%s[ GPU PLATFORM STRESS TEST [F] ] (thermal cycling, fan lag, throttle check)\n", pfx)
|
||||||
|
|
||||||
|
fmt.Fprintln(&b)
|
||||||
|
fmt.Fprintln(&b, " Mode:")
|
||||||
|
modes := []struct{ label, key string }{
|
||||||
|
{"Quick", "1"},
|
||||||
|
{"Standard", "2"},
|
||||||
|
{"Express", "3"},
|
||||||
|
}
|
||||||
|
for i, mode := range modes {
|
||||||
|
pfx := " "
|
||||||
|
if m.burnCursor == burnCurModeQuick+i {
|
||||||
|
pfx = "> "
|
||||||
|
}
|
||||||
|
radio := "( )"
|
||||||
|
if m.burnMode == i {
|
||||||
|
radio = "(*)"
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, "%s%s %-10s [%s]\n", pfx, radio, mode.label, mode.key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintln(&b)
|
||||||
|
pfx = " "
|
||||||
|
if m.burnCursor == burnCurRun {
|
||||||
|
pfx = "> "
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, "%s[ RUN SELECTED [R] ]\n", pfx)
|
||||||
|
|
||||||
|
fmt.Fprintln(&b)
|
||||||
|
fmt.Fprintln(&b, "─────────────────────────────────────────────────────────────────")
|
||||||
|
fmt.Fprint(&b, "[↑↓] move [space/enter] select [1/2/3] mode [R/F] run [Esc] back")
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
@@ -4,7 +4,12 @@ import tea "github.com/charmbracelet/bubbletea"
|
|||||||
|
|
||||||
func (m model) handleExportTargetsMenu() (tea.Model, tea.Cmd) {
|
func (m model) handleExportTargetsMenu() (tea.Model, tea.Cmd) {
|
||||||
if len(m.targets) == 0 {
|
if len(m.targets) == 0 {
|
||||||
return m, resultCmd("Export support bundle", "No removable filesystems found", nil, screenMain)
|
return m, resultCmd(
|
||||||
|
"Export support bundle",
|
||||||
|
"No writable removable filesystems found.\n\nRead-only or boot media are hidden from this list.",
|
||||||
|
nil,
|
||||||
|
screenMain,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
target := m.targets[m.cursor]
|
target := m.targets[m.cursor]
|
||||||
m.selectedTarget = &target
|
m.selectedTarget = &target
|
||||||
|
|||||||
@@ -21,17 +21,16 @@ const (
|
|||||||
|
|
||||||
// Cursor positions in Health Check screen.
|
// Cursor positions in Health Check screen.
|
||||||
const (
|
const (
|
||||||
hcCurGPU = 0
|
hcCurGPU = 0
|
||||||
hcCurMemory = 1
|
hcCurMemory = 1
|
||||||
hcCurStorage = 2
|
hcCurStorage = 2
|
||||||
hcCurCPU = 3
|
hcCurCPU = 3
|
||||||
hcCurSelectAll = 4
|
hcCurSelectAll = 4
|
||||||
hcCurModeQuick = 5
|
hcCurModeQuick = 5
|
||||||
hcCurModeStd = 6
|
hcCurModeStd = 6
|
||||||
hcCurModeExpr = 7
|
hcCurModeExpr = 7
|
||||||
hcCurRunAll = 8
|
hcCurRunAll = 8
|
||||||
hcCurFanStress = 9
|
hcCurTotal = 9
|
||||||
hcCurTotal = 10
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// hcModeDurations maps mode index (0=Quick,1=Standard,2=Express) to GPU stress seconds.
|
// hcModeDurations maps mode index (0=Quick,1=Standard,2=Express) to GPU stress seconds.
|
||||||
@@ -86,8 +85,6 @@ func (m model) updateHealthCheck(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
m.hcMode = m.hcCursor - hcCurModeQuick
|
m.hcMode = m.hcCursor - hcCurModeQuick
|
||||||
case hcCurRunAll:
|
case hcCurRunAll:
|
||||||
return m.hcRunAll()
|
return m.hcRunAll()
|
||||||
case hcCurFanStress:
|
|
||||||
return m.hcRunFanStress()
|
|
||||||
}
|
}
|
||||||
case "g", "G":
|
case "g", "G":
|
||||||
return m.hcRunSingle(hcGPU)
|
return m.hcRunSingle(hcGPU)
|
||||||
@@ -99,8 +96,6 @@ func (m model) updateHealthCheck(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
return m.hcRunSingle(hcCPU)
|
return m.hcRunSingle(hcCPU)
|
||||||
case "r", "R":
|
case "r", "R":
|
||||||
return m.hcRunAll()
|
return m.hcRunAll()
|
||||||
case "f", "F":
|
|
||||||
return m.hcRunFanStress()
|
|
||||||
case "a", "A":
|
case "a", "A":
|
||||||
allOn := m.hcSel[0] && m.hcSel[1] && m.hcSel[2] && m.hcSel[3]
|
allOn := m.hcSel[0] && m.hcSel[1] && m.hcSel[2] && m.hcSel[3]
|
||||||
for i := range m.hcSel {
|
for i := range m.hcSel {
|
||||||
@@ -160,7 +155,7 @@ func (m model) hcRunFanStress() (tea.Model, tea.Cmd) {
|
|||||||
|
|
||||||
// startGPUStressTest launches the GPU Platform Stress Test with a live in-TUI chart.
|
// startGPUStressTest launches the GPU Platform Stress Test with a live in-TUI chart.
|
||||||
func (m model) startGPUStressTest() (tea.Model, tea.Cmd) {
|
func (m model) startGPUStressTest() (tea.Model, tea.Cmd) {
|
||||||
opts := hcFanStressOpts(m.hcMode, m.app)
|
opts := hcFanStressOpts(m.burnMode, m.app)
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
m.gpuStressCancel = cancel
|
m.gpuStressCancel = cancel
|
||||||
@@ -197,7 +192,8 @@ func (m model) updateGPUStressRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
m.gpuStressCancel = nil
|
m.gpuStressCancel = nil
|
||||||
}
|
}
|
||||||
m.gpuStressAborted = true
|
m.gpuStressAborted = true
|
||||||
m.screen = screenHealthCheck
|
m.screen = screenBurnInTests
|
||||||
|
m.burnCursor = burnCurGPUStress
|
||||||
m.cursor = 0
|
m.cursor = 0
|
||||||
case "ctrl+c":
|
case "ctrl+c":
|
||||||
return m, tea.Quit
|
return m, tea.Quit
|
||||||
@@ -380,16 +376,8 @@ func renderHealthCheck(m model) string {
|
|||||||
fmt.Fprintf(&b, "%s[ RUN ALL [R] ]\n", pfx)
|
fmt.Fprintf(&b, "%s[ RUN ALL [R] ]\n", pfx)
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
pfx := " "
|
|
||||||
if m.hcCursor == hcCurFanStress {
|
|
||||||
pfx = "> "
|
|
||||||
}
|
|
||||||
fmt.Fprintf(&b, "%s[ GPU PLATFORM STRESS TEST [F] ] (thermal cycling, fan lag, throttle check)\n", pfx)
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Fprintln(&b)
|
fmt.Fprintln(&b)
|
||||||
fmt.Fprintln(&b, "─────────────────────────────────────────────────────────────────")
|
fmt.Fprintln(&b, "─────────────────────────────────────────────────────────────────")
|
||||||
fmt.Fprint(&b, "[↑↓] move [space/enter] toggle [letter] single test [R] run all [F] gpu stress [Esc] back")
|
fmt.Fprint(&b, "[↑↓] move [space/enter] toggle [letter] single test [R] run all [Esc] back")
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,9 @@ func (m model) handleMainMenu() (tea.Model, tea.Cmd) {
|
|||||||
switch m.cursor {
|
switch m.cursor {
|
||||||
case 0: // Health Check
|
case 0: // Health Check
|
||||||
return m.enterHealthCheck()
|
return m.enterHealthCheck()
|
||||||
case 1: // Export support bundle
|
case 1: // Burn-in tests
|
||||||
|
return m.enterBurnInTests()
|
||||||
|
case 2: // Export support bundle
|
||||||
m.pendingAction = actionExportBundle
|
m.pendingAction = actionExportBundle
|
||||||
m.busy = true
|
m.busy = true
|
||||||
m.busyTitle = "Export support bundle"
|
m.busyTitle = "Export support bundle"
|
||||||
@@ -16,11 +18,11 @@ func (m model) handleMainMenu() (tea.Model, tea.Cmd) {
|
|||||||
targets, err := m.app.ListRemovableTargets()
|
targets, err := m.app.ListRemovableTargets()
|
||||||
return exportTargetsMsg{targets: targets, err: err}
|
return exportTargetsMsg{targets: targets, err: err}
|
||||||
}
|
}
|
||||||
case 2: // Settings
|
case 3: // Settings
|
||||||
m.screen = screenSettings
|
m.screen = screenSettings
|
||||||
m.cursor = 0
|
m.cursor = 0
|
||||||
return m, nil
|
return m, nil
|
||||||
case 3: // Exit
|
case 4: // Exit
|
||||||
return m, tea.Quit
|
return m, tea.Quit
|
||||||
}
|
}
|
||||||
return m, nil
|
return m, nil
|
||||||
|
|||||||
@@ -54,9 +54,10 @@ func TestUpdateMainMenuEnterActions(t *testing.T) {
|
|||||||
wantCmd bool
|
wantCmd bool
|
||||||
}{
|
}{
|
||||||
{name: "health_check", cursor: 0, wantScreen: screenHealthCheck, wantCmd: true},
|
{name: "health_check", cursor: 0, wantScreen: screenHealthCheck, wantCmd: true},
|
||||||
{name: "export", cursor: 1, wantScreen: screenMain, wantBusy: true, wantCmd: true},
|
{name: "burn_in_tests", cursor: 1, wantScreen: screenBurnInTests, wantCmd: true},
|
||||||
{name: "settings", cursor: 2, wantScreen: screenSettings, wantCmd: true},
|
{name: "export", cursor: 2, wantScreen: screenMain, wantBusy: true, wantCmd: true},
|
||||||
{name: "exit", cursor: 3, wantScreen: screenMain, wantCmd: true},
|
{name: "settings", cursor: 3, wantScreen: screenSettings, wantCmd: true},
|
||||||
|
{name: "exit", cursor: 4, wantScreen: screenMain, wantCmd: true},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
@@ -115,7 +116,8 @@ func TestMainMenuSimpleTransitions(t *testing.T) {
|
|||||||
wantScreen screen
|
wantScreen screen
|
||||||
}{
|
}{
|
||||||
{name: "health_check", cursor: 0, wantScreen: screenHealthCheck},
|
{name: "health_check", cursor: 0, wantScreen: screenHealthCheck},
|
||||||
{name: "settings", cursor: 2, wantScreen: screenSettings},
|
{name: "burn_in_tests", cursor: 1, wantScreen: screenBurnInTests},
|
||||||
|
{name: "settings", cursor: 3, wantScreen: screenSettings},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
@@ -146,7 +148,7 @@ func TestMainMenuExportSetsBusy(t *testing.T) {
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
m := newTestModel()
|
m := newTestModel()
|
||||||
m.cursor = 1 // Export support bundle
|
m.cursor = 2 // Export support bundle
|
||||||
|
|
||||||
next, cmd := m.handleMainMenu()
|
next, cmd := m.handleMainMenu()
|
||||||
got := next.(model)
|
got := next.(model)
|
||||||
@@ -163,12 +165,13 @@ func TestMainViewRendersTwoColumns(t *testing.T) {
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
m := newTestModel()
|
m := newTestModel()
|
||||||
m.cursor = 1
|
m.cursor = 2
|
||||||
|
|
||||||
view := m.View()
|
view := m.View()
|
||||||
for _, want := range []string{
|
for _, want := range []string{
|
||||||
"bee",
|
"bee",
|
||||||
"Health Check",
|
"Health Check",
|
||||||
|
"Burn-in tests",
|
||||||
"> Export support bundle",
|
"> Export support bundle",
|
||||||
"Settings",
|
"Settings",
|
||||||
"Exit",
|
"Exit",
|
||||||
@@ -400,6 +403,11 @@ func TestConfirmCancelTarget(t *testing.T) {
|
|||||||
t.Fatalf("storage sat cancel target=%q want %q", got, screenHealthCheck)
|
t.Fatalf("storage sat cancel target=%q want %q", got, screenHealthCheck)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m.pendingAction = actionRunFanStress
|
||||||
|
if got := m.confirmCancelTarget(); got != screenBurnInTests {
|
||||||
|
t.Fatalf("fan stress cancel target=%q want %q", got, screenBurnInTests)
|
||||||
|
}
|
||||||
|
|
||||||
m.pendingAction = actionNone
|
m.pendingAction = actionNone
|
||||||
if got := m.confirmCancelTarget(); got != screenMain {
|
if got := m.confirmCancelTarget(); got != screenMain {
|
||||||
t.Fatalf("default cancel target=%q want %q", got, screenMain)
|
t.Fatalf("default cancel target=%q want %q", got, screenMain)
|
||||||
@@ -439,6 +447,68 @@ func TestViewBusyStateUsesBusyTitle(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBurnInTestsEscReturnsToMain(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.screen = screenBurnInTests
|
||||||
|
m.burnCursor = 3
|
||||||
|
|
||||||
|
next, _ := m.updateBurnInTests(tea.KeyMsg{Type: tea.KeyEsc})
|
||||||
|
got := next.(model)
|
||||||
|
|
||||||
|
if got.screen != screenMain {
|
||||||
|
t.Fatalf("screen=%q want %q", got.screen, screenMain)
|
||||||
|
}
|
||||||
|
if got.cursor != 1 {
|
||||||
|
t.Fatalf("cursor=%d want 1", got.cursor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBurnInTestsRunOpensConfirm(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.screen = screenBurnInTests
|
||||||
|
m.burnInitialized = true
|
||||||
|
m.burnMode = 2
|
||||||
|
|
||||||
|
next, _ := m.burnRunSelected()
|
||||||
|
got := next.(model)
|
||||||
|
|
||||||
|
if got.screen != screenConfirm {
|
||||||
|
t.Fatalf("screen=%q want %q", got.screen, screenConfirm)
|
||||||
|
}
|
||||||
|
if got.pendingAction != actionRunFanStress {
|
||||||
|
t.Fatalf("pendingAction=%q want %q", got.pendingAction, actionRunFanStress)
|
||||||
|
}
|
||||||
|
if got.cursor != 0 {
|
||||||
|
t.Fatalf("cursor=%d want 0", got.cursor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestViewBurnInTestsRendersGPUStressEntry(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.screen = screenBurnInTests
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"BURN-IN TESTS",
|
||||||
|
"GPU PLATFORM STRESS TEST",
|
||||||
|
"Quick",
|
||||||
|
"Standard",
|
||||||
|
"Express",
|
||||||
|
"[ RUN SELECTED [R] ]",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(view, want) {
|
||||||
|
t.Fatalf("view missing %q\nview:\n%s", want, view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestViewOutputScreenRendersBodyAndBackHint(t *testing.T) {
|
func TestViewOutputScreenRendersBodyAndBackHint(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
@@ -528,7 +598,7 @@ func TestViewExportTargetsRendersDeviceMetadata(t *testing.T) {
|
|||||||
|
|
||||||
for _, want := range []string{
|
for _, want := range []string{
|
||||||
"Export support bundle",
|
"Export support bundle",
|
||||||
"Select removable filesystem",
|
"Select writable removable filesystem (read-only/boot media hidden)",
|
||||||
"> /dev/sdb1 [vfat 29G] label=BEEUSB mounted=/media/bee",
|
"> /dev/sdb1 [vfat 29G] label=BEEUSB mounted=/media/bee",
|
||||||
} {
|
} {
|
||||||
if !strings.Contains(view, want) {
|
if !strings.Contains(view, want) {
|
||||||
@@ -537,6 +607,32 @@ func TestViewExportTargetsRendersDeviceMetadata(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExportTargetsMsgEmptyShowsHiddenBootMediaHint(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.busy = true
|
||||||
|
m.busyTitle = "Export support bundle"
|
||||||
|
|
||||||
|
next, _ := m.Update(exportTargetsMsg{})
|
||||||
|
got := next.(model)
|
||||||
|
|
||||||
|
if got.screen != screenOutput {
|
||||||
|
t.Fatalf("screen=%q want %q", got.screen, screenOutput)
|
||||||
|
}
|
||||||
|
if got.title != "Export support bundle" {
|
||||||
|
t.Fatalf("title=%q want %q", got.title, "Export support bundle")
|
||||||
|
}
|
||||||
|
for _, want := range []string{
|
||||||
|
"No writable removable filesystems found.",
|
||||||
|
"Read-only or boot media are hidden from this list.",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(got.body, want) {
|
||||||
|
t.Fatalf("body missing %q\nbody:\n%s", want, got.body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestViewStaticFormRendersFields(t *testing.T) {
|
func TestViewStaticFormRendersFields(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ type screen string
|
|||||||
const (
|
const (
|
||||||
screenMain screen = "main"
|
screenMain screen = "main"
|
||||||
screenHealthCheck screen = "health_check"
|
screenHealthCheck screen = "health_check"
|
||||||
|
screenBurnInTests screen = "burn_in_tests"
|
||||||
screenSettings screen = "settings"
|
screenSettings screen = "settings"
|
||||||
screenNetwork screen = "network"
|
screenNetwork screen = "network"
|
||||||
screenInterfacePick screen = "interface_pick"
|
screenInterfacePick screen = "interface_pick"
|
||||||
@@ -41,8 +42,8 @@ const (
|
|||||||
actionRunMemorySAT actionKind = "run_memory_sat"
|
actionRunMemorySAT actionKind = "run_memory_sat"
|
||||||
actionRunStorageSAT actionKind = "run_storage_sat"
|
actionRunStorageSAT actionKind = "run_storage_sat"
|
||||||
actionRunCPUSAT actionKind = "run_cpu_sat"
|
actionRunCPUSAT actionKind = "run_cpu_sat"
|
||||||
actionRunAMDGPUSAT actionKind = "run_amd_gpu_sat"
|
actionRunAMDGPUSAT actionKind = "run_amd_gpu_sat"
|
||||||
actionRunFanStress actionKind = "run_fan_stress"
|
actionRunFanStress actionKind = "run_fan_stress"
|
||||||
)
|
)
|
||||||
|
|
||||||
type model struct {
|
type model struct {
|
||||||
@@ -84,6 +85,11 @@ type model struct {
|
|||||||
hcCursor int
|
hcCursor int
|
||||||
hcInitialized bool
|
hcInitialized bool
|
||||||
|
|
||||||
|
// Burn-in tests screen
|
||||||
|
burnMode int
|
||||||
|
burnCursor int
|
||||||
|
burnInitialized bool
|
||||||
|
|
||||||
// NVIDIA SAT setup
|
// NVIDIA SAT setup
|
||||||
nvidiaGPUs []platform.NvidiaGPU
|
nvidiaGPUs []platform.NvidiaGPU
|
||||||
nvidiaGPUSel []bool
|
nvidiaGPUSel []bool
|
||||||
@@ -97,9 +103,9 @@ type model struct {
|
|||||||
// GPU Platform Stress Test running
|
// GPU Platform Stress Test running
|
||||||
gpuStressCancel func()
|
gpuStressCancel func()
|
||||||
gpuStressAborted bool
|
gpuStressAborted bool
|
||||||
gpuLiveRows []platform.GPUMetricRow
|
gpuLiveRows []platform.GPUMetricRow
|
||||||
gpuLiveIndices []int
|
gpuLiveIndices []int
|
||||||
gpuLiveStart time.Time
|
gpuLiveStart time.Time
|
||||||
|
|
||||||
// SAT verbose progress (CPU / Memory / Storage / AMD GPU)
|
// SAT verbose progress (CPU / Memory / Storage / AMD GPU)
|
||||||
progressLines []string
|
progressLines []string
|
||||||
@@ -132,6 +138,7 @@ func newModel(application *app.App, runtimeMode runtimeenv.Mode) model {
|
|||||||
screen: screenMain,
|
screen: screenMain,
|
||||||
mainMenu: []string{
|
mainMenu: []string{
|
||||||
"Health Check",
|
"Health Check",
|
||||||
|
"Burn-in tests",
|
||||||
"Export support bundle",
|
"Export support bundle",
|
||||||
"Settings",
|
"Settings",
|
||||||
"Exit",
|
"Exit",
|
||||||
@@ -201,7 +208,7 @@ func (m model) confirmBody() (string, string) {
|
|||||||
modes := []string{"Quick (2×2min)", "Standard (2×5min)", "Express (2×10min)"}
|
modes := []string{"Quick (2×2min)", "Standard (2×5min)", "Express (2×10min)"}
|
||||||
return "GPU Platform Stress Test", "Two-phase GPU thermal cycling test.\n" +
|
return "GPU Platform Stress Test", "Two-phase GPU thermal cycling test.\n" +
|
||||||
"Monitors fans, temps, power — detects throttling.\n" +
|
"Monitors fans, temps, power — detects throttling.\n" +
|
||||||
"Mode: " + modes[m.hcMode] + "\n\nAll NVIDIA GPUs will be stressed."
|
"Mode: " + modes[m.burnMode] + "\n\nAll NVIDIA GPUs will be stressed."
|
||||||
default:
|
default:
|
||||||
return "Confirm", "Proceed?"
|
return "Confirm", "Proceed?"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -101,6 +101,13 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
m.screen = screenOutput
|
m.screen = screenOutput
|
||||||
return m, m.refreshSnapshotCmd()
|
return m, m.refreshSnapshotCmd()
|
||||||
}
|
}
|
||||||
|
if len(msg.targets) == 0 {
|
||||||
|
m.title = "Export support bundle"
|
||||||
|
m.body = "No writable removable filesystems found.\n\nRead-only or boot media are hidden from this list."
|
||||||
|
m.prevScreen = screenMain
|
||||||
|
m.screen = screenOutput
|
||||||
|
return m, m.refreshSnapshotCmd()
|
||||||
|
}
|
||||||
m.targets = msg.targets
|
m.targets = msg.targets
|
||||||
m.screen = screenExportTargets
|
m.screen = screenExportTargets
|
||||||
m.cursor = 0
|
m.cursor = 0
|
||||||
@@ -117,7 +124,7 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
m.gpuStressCancel()
|
m.gpuStressCancel()
|
||||||
m.gpuStressCancel = nil
|
m.gpuStressCancel = nil
|
||||||
}
|
}
|
||||||
m.prevScreen = screenHealthCheck
|
m.prevScreen = screenBurnInTests
|
||||||
m.screen = screenOutput
|
m.screen = screenOutput
|
||||||
m.title = msg.title
|
m.title = msg.title
|
||||||
if msg.err != nil {
|
if msg.err != nil {
|
||||||
@@ -179,6 +186,8 @@ func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|||||||
return m.updateMain(msg)
|
return m.updateMain(msg)
|
||||||
case screenHealthCheck:
|
case screenHealthCheck:
|
||||||
return m.updateHealthCheck(msg)
|
return m.updateHealthCheck(msg)
|
||||||
|
case screenBurnInTests:
|
||||||
|
return m.updateBurnInTests(msg)
|
||||||
case screenSettings:
|
case screenSettings:
|
||||||
return m.updateMenu(msg, len(m.settingsMenu), m.handleSettingsMenu)
|
return m.updateMenu(msg, len(m.settingsMenu), m.handleSettingsMenu)
|
||||||
case screenNetwork:
|
case screenNetwork:
|
||||||
|
|||||||
@@ -57,6 +57,8 @@ func (m model) View() string {
|
|||||||
body = renderTwoColumnMain(m)
|
body = renderTwoColumnMain(m)
|
||||||
case screenHealthCheck:
|
case screenHealthCheck:
|
||||||
body = renderHealthCheck(m)
|
body = renderHealthCheck(m)
|
||||||
|
case screenBurnInTests:
|
||||||
|
body = renderBurnInTests(m)
|
||||||
case screenSettings:
|
case screenSettings:
|
||||||
body = renderMenu("Settings", "Select action", m.settingsMenu, m.cursor)
|
body = renderMenu("Settings", "Select action", m.settingsMenu, m.cursor)
|
||||||
case screenNetwork:
|
case screenNetwork:
|
||||||
@@ -66,7 +68,12 @@ func (m model) View() string {
|
|||||||
case screenServiceAction:
|
case screenServiceAction:
|
||||||
body = renderMenu("Service: "+m.selectedService, "Select action", m.serviceMenu, m.cursor)
|
body = renderMenu("Service: "+m.selectedService, "Select action", m.serviceMenu, m.cursor)
|
||||||
case screenExportTargets:
|
case screenExportTargets:
|
||||||
body = renderMenu("Export support bundle", "Select removable filesystem", renderTargetItems(m.targets), m.cursor)
|
body = renderMenu(
|
||||||
|
"Export support bundle",
|
||||||
|
"Select writable removable filesystem (read-only/boot media hidden)",
|
||||||
|
renderTargetItems(m.targets),
|
||||||
|
m.cursor,
|
||||||
|
)
|
||||||
case screenInterfacePick:
|
case screenInterfacePick:
|
||||||
body = renderMenu("Interfaces", "Select interface", renderInterfaceItems(m.interfaces), m.cursor)
|
body = renderMenu("Interfaces", "Select interface", renderInterfaceItems(m.interfaces), m.cursor)
|
||||||
case screenStaticForm:
|
case screenStaticForm:
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ DHCP is used only for LAN (operator SSH access). Internet is NOT available.
|
|||||||
|
|
||||||
## Boot sequence (single ISO)
|
## Boot sequence (single ISO)
|
||||||
|
|
||||||
|
The live system is expected to boot with `toram`, so `live-boot` copies the full read-only medium into RAM before mounting the root filesystem. After that point, runtime must not depend on the original USB/BMC virtual media staying readable.
|
||||||
|
|
||||||
`systemd` boot order:
|
`systemd` boot order:
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -25,6 +27,7 @@ local-fs.target
|
|||||||
```
|
```
|
||||||
|
|
||||||
**Critical invariants:**
|
**Critical invariants:**
|
||||||
|
- The live ISO boots with `boot=live toram`. Runtime binaries must continue working even if the original boot media disappears after early boot.
|
||||||
- OpenSSH MUST start without network. `bee-sshsetup.service` runs before `ssh.service`.
|
- OpenSSH MUST start without network. `bee-sshsetup.service` runs before `ssh.service`.
|
||||||
- `bee-network.service` uses `dhclient -nw` (background) — network bring-up is best effort and non-blocking.
|
- `bee-network.service` uses `dhclient -nw` (background) — network bring-up is best effort and non-blocking.
|
||||||
- `bee-nvidia.service` loads modules via `insmod` with absolute paths — NOT `modprobe`.
|
- `bee-nvidia.service` loads modules via `insmod` with absolute paths — NOT `modprobe`.
|
||||||
@@ -71,24 +74,39 @@ build-in-container.sh [--authorized-keys /path/to/keys]
|
|||||||
d. build kernel modules against Debian headers
|
d. build kernel modules against Debian headers
|
||||||
e. create `libnvidia-ml.so.1` / `libcuda.so.1` symlinks in cache
|
e. create `libnvidia-ml.so.1` / `libcuda.so.1` symlinks in cache
|
||||||
f. cache in `dist/nvidia-<version>-<kver>/`
|
f. cache in `dist/nvidia-<version>-<kver>/`
|
||||||
7. inject NVIDIA `.ko` → staged `/usr/local/lib/nvidia/`
|
7. `build-cublas.sh`:
|
||||||
8. inject `nvidia-smi` → staged `/usr/local/bin/nvidia-smi`
|
a. download `libcublas`, `libcublasLt`, `libcudart` runtime + dev packages from the NVIDIA CUDA Debian repo
|
||||||
9. inject `libnvidia-ml` + `libcuda` → staged `/usr/lib/`
|
b. verify packages against repo `Packages.gz`
|
||||||
10. write staged `/etc/bee-release` (versions + git commit)
|
c. extract headers for `bee-gpu-stress` build
|
||||||
11. patch staged `motd` with build metadata
|
d. cache userspace libs in `dist/cublas-<version>+cuda<series>/`
|
||||||
12. copy `iso/builder/` into a temporary live-build workdir under `dist/`
|
8. build `bee-gpu-stress` against extracted cuBLASLt/cudart headers
|
||||||
13. sync staged overlay into workdir `config/includes.chroot/`
|
9. inject NVIDIA `.ko` → staged `/usr/local/lib/nvidia/`
|
||||||
14. run `lb config && lb build` inside the privileged builder container
|
10. inject `nvidia-smi` → staged `/usr/local/bin/nvidia-smi`
|
||||||
|
11. inject `libnvidia-ml` + `libcuda` + `libcublas` + `libcublasLt` + `libcudart` → staged `/usr/lib/`
|
||||||
|
12. write staged `/etc/bee-release` (versions + git commit)
|
||||||
|
13. patch staged `motd` with build metadata
|
||||||
|
14. copy `iso/builder/` into a temporary live-build workdir under `dist/`
|
||||||
|
15. sync staged overlay into workdir `config/includes.chroot/`
|
||||||
|
16. run `lb config && lb build` inside the privileged builder container
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Build host notes:
|
||||||
|
- `build-in-container.sh` targets `linux/amd64` builder containers by default, including Docker Desktop on macOS / Apple Silicon.
|
||||||
|
- Override with `BEE_BUILDER_PLATFORM=<os/arch>` only if you intentionally need a different container platform.
|
||||||
|
- If the local builder image under the same tag was previously built for the wrong architecture, the script rebuilds it automatically.
|
||||||
|
|
||||||
**Critical invariants:**
|
**Critical invariants:**
|
||||||
- `DEBIAN_KERNEL_ABI` in `iso/builder/VERSIONS` pins the exact kernel ABI used in BOTH places:
|
- `DEBIAN_KERNEL_ABI` in `iso/builder/VERSIONS` pins the exact kernel ABI used in BOTH places:
|
||||||
1. `build-in-container.sh` / `build-nvidia-module.sh` — Debian kernel headers for module build
|
1. `build-in-container.sh` / `build-nvidia-module.sh` — Debian kernel headers for module build
|
||||||
2. `auto/config` — `linux-image-${DEBIAN_KERNEL_ABI}` in the ISO
|
2. `auto/config` — `linux-image-${DEBIAN_KERNEL_ABI}` in the ISO
|
||||||
- NVIDIA modules go to staged `usr/local/lib/nvidia/` — NOT to `/lib/modules/<kver>/extra/`.
|
- NVIDIA modules go to staged `usr/local/lib/nvidia/` — NOT to `/lib/modules/<kver>/extra/`.
|
||||||
|
- `bee-gpu-stress` must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
|
||||||
|
- The live ISO must ship `libcublas`, `libcublasLt`, and `libcudart` together with `libcuda` so tensor-core stress works without internet or package installs at boot.
|
||||||
- The source overlay in `iso/overlay/` is treated as immutable source. Build-time files are injected only into the staged overlay.
|
- The source overlay in `iso/overlay/` is treated as immutable source. Build-time files are injected only into the staged overlay.
|
||||||
- The live-build workdir under `dist/` is disposable; source files under `iso/builder/` stay clean.
|
- The live-build workdir under `dist/` is disposable; source files under `iso/builder/` stay clean.
|
||||||
- Container build requires `--privileged` because `live-build` uses mounts/chroots/loop devices during ISO assembly.
|
- Container build requires `--privileged` because `live-build` uses mounts/chroots/loop devices during ISO assembly.
|
||||||
|
- On macOS / Docker Desktop, the builder still must run as `linux/amd64` so the shipped ISO binaries remain `amd64`.
|
||||||
|
- Operators must provision enough RAM to hold the full compressed live medium plus normal runtime overhead, because `toram` copies the entire read-only ISO payload into memory before the system reaches steady state.
|
||||||
|
|
||||||
## Post-boot smoke test
|
## Post-boot smoke test
|
||||||
|
|
||||||
@@ -131,10 +149,15 @@ Current validation state:
|
|||||||
Every collector returns `nil, nil` on tool-not-found. Errors are logged, never fatal.
|
Every collector returns `nil, nil` on tool-not-found. Errors are logged, never fatal.
|
||||||
|
|
||||||
Acceptance flows:
|
Acceptance flows:
|
||||||
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-stress`
|
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + mixed-precision `bee-gpu-stress`
|
||||||
- `bee sat memory` → `memtester` archive
|
- `bee sat memory` → `memtester` archive
|
||||||
- `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
|
- `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
|
||||||
- SAT `summary.txt` now includes `overall_status` and per-job `*_status` values (`OK`, `FAILED`, `UNSUPPORTED`)
|
- SAT `summary.txt` now includes `overall_status` and per-job `*_status` values (`OK`, `FAILED`, `UNSUPPORTED`)
|
||||||
|
- `bee-gpu-stress` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
|
||||||
|
- Ampere: `fp16` + `fp32`/TF32 tensor-core load
|
||||||
|
- Ada / Hopper: add `fp8`
|
||||||
|
- Blackwell+: add `fp4`
|
||||||
|
- PTX fallback is only for missing cuBLASLt/userspace or unsupported narrow datatypes
|
||||||
- Runtime overrides:
|
- Runtime overrides:
|
||||||
- `BEE_GPU_STRESS_SECONDS`
|
- `BEE_GPU_STRESS_SECONDS`
|
||||||
- `BEE_GPU_STRESS_SIZE_MB`
|
- `BEE_GPU_STRESS_SIZE_MB`
|
||||||
|
|||||||
@@ -21,7 +21,8 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
- Read-only hardware inventory: board, CPU, memory, storage, PCIe, PSU, GPU, NIC, RAID
|
- Read-only hardware inventory: board, CPU, memory, storage, PCIe, PSU, GPU, NIC, RAID
|
||||||
- Machine-readable health summary derived from collector verdicts
|
- Machine-readable health summary derived from collector verdicts
|
||||||
- Operator-triggered acceptance tests for NVIDIA, memory, and storage
|
- Operator-triggered acceptance tests for NVIDIA, memory, and storage
|
||||||
- NVIDIA SAT includes both diagnostic collection and lightweight GPU stress via `bee-gpu-stress`
|
- NVIDIA SAT includes both diagnostic collection and mixed-precision GPU stress via `bee-gpu-stress`
|
||||||
|
- `bee-gpu-stress` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
|
||||||
- Automatic boot audit with operator-facing local console and SSH access
|
- Automatic boot audit with operator-facing local console and SSH access
|
||||||
- NVIDIA proprietary driver loaded at boot for GPU enrichment via `nvidia-smi`
|
- NVIDIA proprietary driver loaded at boot for GPU enrichment via `nvidia-smi`
|
||||||
- SSH access (OpenSSH) always available for inspection and debugging
|
- SSH access (OpenSSH) always available for inspection and debugging
|
||||||
@@ -69,6 +70,7 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
| SSH | OpenSSH server |
|
| SSH | OpenSSH server |
|
||||||
| NVIDIA driver | Proprietary `.run` installer, built against Debian kernel headers |
|
| NVIDIA driver | Proprietary `.run` installer, built against Debian kernel headers |
|
||||||
| NVIDIA modules | Loaded via `insmod` from `/usr/local/lib/nvidia/` |
|
| NVIDIA modules | Loaded via `insmod` from `/usr/local/lib/nvidia/` |
|
||||||
|
| GPU stress backend | `bee-gpu-stress` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
|
||||||
| Builder | Debian 12 host/VM or Debian 12 container image |
|
| Builder | Debian 12 host/VM or Debian 12 container image |
|
||||||
|
|
||||||
## Operator UX
|
## Operator UX
|
||||||
@@ -78,6 +80,7 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
- The TUI itself executes privileged actions as `root` via `sudo -n`
|
- The TUI itself executes privileged actions as `root` via `sudo -n`
|
||||||
- SSH remains available independently of the local console path
|
- SSH remains available independently of the local console path
|
||||||
- VM-oriented builds also include `qemu-guest-agent` and serial console support for debugging
|
- VM-oriented builds also include `qemu-guest-agent` and serial console support for debugging
|
||||||
|
- The ISO boots with `toram`, so loss of the original USB/BMC virtual media after boot should not break already-installed runtime binaries
|
||||||
|
|
||||||
## Runtime split
|
## Runtime split
|
||||||
|
|
||||||
@@ -85,6 +88,7 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
- Live-ISO-only responsibilities stay in `iso/` integration code
|
- Live-ISO-only responsibilities stay in `iso/` integration code
|
||||||
- Live ISO launches the Go CLI with `--runtime livecd`
|
- Live ISO launches the Go CLI with `--runtime livecd`
|
||||||
- Local/manual runs use `--runtime auto` or `--runtime local`
|
- Local/manual runs use `--runtime auto` or `--runtime local`
|
||||||
|
- Live ISO targets must have enough RAM for the full compressed live medium plus runtime working set because the boot medium is copied into memory at startup
|
||||||
|
|
||||||
## Key paths
|
## Key paths
|
||||||
|
|
||||||
|
|||||||
58
iso/README.md
Normal file
58
iso/README.md
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# ISO Build
|
||||||
|
|
||||||
|
`bee` ISO is built inside a Debian 12 builder container via `iso/builder/build-in-container.sh`.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Docker Desktop or another Docker-compatible container runtime
|
||||||
|
- Privileged containers enabled
|
||||||
|
- Enough free disk space for builder cache, Debian live-build artifacts, NVIDIA driver cache, and CUDA userspace packages
|
||||||
|
|
||||||
|
## Build On macOS
|
||||||
|
|
||||||
|
From the repository root:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
sh iso/builder/build-in-container.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
The script defaults to `linux/amd64` builder containers, so it works on:
|
||||||
|
|
||||||
|
- Intel Mac
|
||||||
|
- Apple Silicon (`M1` / `M2` / `M3` / `M4`) via Docker Desktop's Linux VM
|
||||||
|
|
||||||
|
You do not need to pass `--platform` manually for normal ISO builds.
|
||||||
|
|
||||||
|
## Useful Options
|
||||||
|
|
||||||
|
Build with explicit SSH keys baked into the ISO:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
sh iso/builder/build-in-container.sh --authorized-keys ~/.ssh/id_ed25519.pub
|
||||||
|
```
|
||||||
|
|
||||||
|
Rebuild the builder image:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
sh iso/builder/build-in-container.sh --rebuild-image
|
||||||
|
```
|
||||||
|
|
||||||
|
Use a custom cache directory:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
sh iso/builder/build-in-container.sh --cache-dir /path/to/cache
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- The builder image is automatically rebuilt if the local tag exists for the wrong architecture.
|
||||||
|
- The live ISO boots with Debian `live-boot` `toram`, so the read-only medium is copied into RAM during boot and the runtime no longer depends on the original USB/BMC virtual media staying present.
|
||||||
|
- Target systems need enough RAM for the full compressed live medium plus normal runtime overhead, or boot may fail before reaching the TUI.
|
||||||
|
- Override the container platform only if you know why:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
BEE_BUILDER_PLATFORM=linux/amd64 sh iso/builder/build-in-container.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
- The shipped ISO is still `amd64`.
|
||||||
|
- Output ISO artifacts are written under `dist/`.
|
||||||
@@ -4,5 +4,7 @@ NVIDIA_DRIVER_VERSION=590.48.01
|
|||||||
NCCL_VERSION=2.28.9-1
|
NCCL_VERSION=2.28.9-1
|
||||||
NCCL_CUDA_VERSION=13.0
|
NCCL_CUDA_VERSION=13.0
|
||||||
NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
|
NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
|
||||||
|
CUBLAS_VERSION=13.0.2.14-1
|
||||||
|
CUDA_USERSPACE_VERSION=13.0.96-1
|
||||||
GO_VERSION=1.24.0
|
GO_VERSION=1.24.0
|
||||||
AUDIT_VERSION=1.0.0
|
AUDIT_VERSION=1.0.0
|
||||||
|
|||||||
@@ -32,6 +32,6 @@ lb config noauto \
|
|||||||
--memtest none \
|
--memtest none \
|
||||||
--iso-volume "EASY-BEE" \
|
--iso-volume "EASY-BEE" \
|
||||||
--iso-application "EASY-BEE" \
|
--iso-application "EASY-BEE" \
|
||||||
--bootappend-live "boot=live components console=tty2 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
--bootappend-live "boot=live toram components console=tty2 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
||||||
--apt-recommends false \
|
--apt-recommends false \
|
||||||
"${@}"
|
"${@}"
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
170
iso/builder/build-cublas.sh
Normal file
170
iso/builder/build-cublas.sh
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-stress.
|
||||||
|
#
|
||||||
|
# Downloads .deb packages from NVIDIA's CUDA apt repository (Debian 12, x86_64),
|
||||||
|
# verifies them against Packages.gz, and extracts the small subset we need:
|
||||||
|
# - headers for compiling bee-gpu-stress against cuBLASLt
|
||||||
|
# - runtime libs for libcublas, libcublasLt, libcudart inside the ISO
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CUBLAS_VERSION="$1"
|
||||||
|
CUDA_USERSPACE_VERSION="$2"
|
||||||
|
CUDA_SERIES="$3"
|
||||||
|
DIST_DIR="$4"
|
||||||
|
|
||||||
|
[ -n "$CUBLAS_VERSION" ] || { echo "usage: $0 <cublas-version> <cuda-userspace-version> <cuda-series> <dist-dir>"; exit 1; }
|
||||||
|
[ -n "$CUDA_USERSPACE_VERSION" ] || { echo "usage: $0 <cublas-version> <cuda-userspace-version> <cuda-series> <dist-dir>"; exit 1; }
|
||||||
|
[ -n "$CUDA_SERIES" ] || { echo "usage: $0 <cublas-version> <cuda-userspace-version> <cuda-series> <dist-dir>"; exit 1; }
|
||||||
|
[ -n "$DIST_DIR" ] || { echo "usage: $0 <cublas-version> <cuda-userspace-version> <cuda-series> <dist-dir>"; exit 1; }
|
||||||
|
|
||||||
|
CUDA_SERIES_DASH=$(printf '%s' "$CUDA_SERIES" | tr '.' '-')
|
||||||
|
REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64"
|
||||||
|
CACHE_DIR="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${CUDA_SERIES}"
|
||||||
|
CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
|
||||||
|
DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/cublas-downloads"
|
||||||
|
PACKAGES_GZ="${DOWNLOAD_CACHE_DIR}/Packages.gz"
|
||||||
|
|
||||||
|
echo "=== cuBLAS ${CUBLAS_VERSION} / cudart ${CUDA_USERSPACE_VERSION} / CUDA ${CUDA_SERIES} ==="
|
||||||
|
|
||||||
|
if [ -f "${CACHE_DIR}/include/cublasLt.h" ] && [ -f "${CACHE_DIR}/include/cuda_runtime_api.h" ] \
|
||||||
|
&& [ "$(find "${CACHE_DIR}/lib" \( -name 'libcublas.so*' -o -name 'libcublasLt.so*' -o -name 'libcudart.so*' \) 2>/dev/null | wc -l)" -gt 0 ]; then
|
||||||
|
echo "=== cuBLAS cached, skipping download ==="
|
||||||
|
echo "cache: $CACHE_DIR"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "${DOWNLOAD_CACHE_DIR}" "${CACHE_DIR}/include" "${CACHE_DIR}/lib"
|
||||||
|
|
||||||
|
echo "=== downloading Packages.gz ==="
|
||||||
|
wget -q -O "${PACKAGES_GZ}" "${REPO_BASE}/Packages.gz"
|
||||||
|
|
||||||
|
lookup_pkg() {
|
||||||
|
pkg="$1"
|
||||||
|
ver="$2"
|
||||||
|
gzip -dc "${PACKAGES_GZ}" | awk -v pkg="$pkg" -v ver="$ver" '
|
||||||
|
/^Package: / { cur_pkg=$2 }
|
||||||
|
/^Version: / { cur_ver=$2 }
|
||||||
|
/^Filename: / { cur_file=$2 }
|
||||||
|
/^SHA256: / { cur_sha=$2 }
|
||||||
|
/^$/ {
|
||||||
|
if (cur_pkg == pkg && cur_ver == ver) {
|
||||||
|
print cur_file " " cur_sha
|
||||||
|
exit
|
||||||
|
}
|
||||||
|
cur_pkg=""; cur_ver=""; cur_file=""; cur_sha=""
|
||||||
|
}
|
||||||
|
END {
|
||||||
|
if (cur_pkg == pkg && cur_ver == ver) {
|
||||||
|
print cur_file " " cur_sha
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
}
|
||||||
|
|
||||||
|
download_verified_pkg() {
|
||||||
|
pkg="$1"
|
||||||
|
ver="$2"
|
||||||
|
|
||||||
|
meta="$(lookup_pkg "$pkg" "$ver")"
|
||||||
|
[ -n "$meta" ] || { echo "ERROR: package metadata not found for ${pkg} ${ver}"; exit 1; }
|
||||||
|
|
||||||
|
repo_file="$(printf '%s\n' "$meta" | awk '{print $1}')"
|
||||||
|
repo_sha="$(printf '%s\n' "$meta" | awk '{print $2}')"
|
||||||
|
[ -n "$repo_file" ] || { echo "ERROR: package filename missing for ${pkg}"; exit 1; }
|
||||||
|
[ -n "$repo_sha" ] || { echo "ERROR: package sha missing for ${pkg}"; exit 1; }
|
||||||
|
|
||||||
|
out="${DOWNLOAD_CACHE_DIR}/$(basename "$repo_file")"
|
||||||
|
if [ -f "$out" ]; then
|
||||||
|
actual_sha="$(sha256sum "$out" | awk '{print $1}')"
|
||||||
|
if [ "$actual_sha" = "$repo_sha" ]; then
|
||||||
|
echo "=== using cached $(basename "$repo_file") ==="
|
||||||
|
printf '%s\n' "$out"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo "=== removing stale $(basename "$repo_file") (sha256 mismatch) ==="
|
||||||
|
rm -f "$out"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "=== downloading $(basename "$repo_file") ==="
|
||||||
|
wget --show-progress -O "$out" "${REPO_BASE}/$(basename "$repo_file")"
|
||||||
|
|
||||||
|
actual_sha="$(sha256sum "$out" | awk '{print $1}')"
|
||||||
|
if [ "$actual_sha" != "$repo_sha" ]; then
|
||||||
|
echo "ERROR: sha256 mismatch for $(basename "$repo_file")"
|
||||||
|
echo " expected: $repo_sha"
|
||||||
|
echo " actual: $actual_sha"
|
||||||
|
rm -f "$out"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "sha256 OK: $(basename "$repo_file")"
|
||||||
|
printf '%s\n' "$out"
|
||||||
|
}
|
||||||
|
|
||||||
|
extract_deb() {
|
||||||
|
deb="$1"
|
||||||
|
dst="$2"
|
||||||
|
mkdir -p "$dst"
|
||||||
|
(
|
||||||
|
cd "$dst"
|
||||||
|
ar x "$deb"
|
||||||
|
data_tar=$(ls data.tar.* 2>/dev/null | head -1)
|
||||||
|
[ -n "$data_tar" ] || { echo "ERROR: data.tar.* not found in $deb"; exit 1; }
|
||||||
|
tar xf "$data_tar"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
copy_headers() {
|
||||||
|
from="$1"
|
||||||
|
if [ -d "${from}/usr/include" ]; then
|
||||||
|
cp -a "${from}/usr/include/." "${CACHE_DIR}/include/"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
copy_libs() {
|
||||||
|
from="$1"
|
||||||
|
find "$from" \( -name 'libcublas.so*' -o -name 'libcublasLt.so*' -o -name 'libcudart.so*' \) \
|
||||||
|
\( -type f -o -type l \) -exec cp -a {} "${CACHE_DIR}/lib/" \;
|
||||||
|
}
|
||||||
|
|
||||||
|
make_links() {
|
||||||
|
base="$1"
|
||||||
|
versioned=$(find "${CACHE_DIR}/lib" -maxdepth 1 -name "${base}.so.[0-9]*" -type f | sort | head -1)
|
||||||
|
[ -n "$versioned" ] || return 0
|
||||||
|
soname=$(printf '%s\n' "$versioned" | sed -E "s#.*/(${base}\.so\.[0-9]+).*#\\1#")
|
||||||
|
target=$(basename "$versioned")
|
||||||
|
ln -sf "$target" "${CACHE_DIR}/lib/${soname}" 2>/dev/null || true
|
||||||
|
ln -sf "${soname}" "${CACHE_DIR}/lib/${base}.so" 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
TMP_DIR=$(mktemp -d)
|
||||||
|
trap 'rm -rf "$TMP_DIR"' EXIT INT TERM
|
||||||
|
|
||||||
|
CUBLAS_RT_DEB=$(download_verified_pkg "libcublas-${CUDA_SERIES_DASH}" "${CUBLAS_VERSION}")
|
||||||
|
CUBLAS_DEV_DEB=$(download_verified_pkg "libcublas-dev-${CUDA_SERIES_DASH}" "${CUBLAS_VERSION}")
|
||||||
|
CUDART_RT_DEB=$(download_verified_pkg "cuda-cudart-${CUDA_SERIES_DASH}" "${CUDA_USERSPACE_VERSION}")
|
||||||
|
CUDART_DEV_DEB=$(download_verified_pkg "cuda-cudart-dev-${CUDA_SERIES_DASH}" "${CUDA_USERSPACE_VERSION}")
|
||||||
|
|
||||||
|
extract_deb "$CUBLAS_RT_DEB" "${TMP_DIR}/cublas-rt"
|
||||||
|
extract_deb "$CUBLAS_DEV_DEB" "${TMP_DIR}/cublas-dev"
|
||||||
|
extract_deb "$CUDART_RT_DEB" "${TMP_DIR}/cudart-rt"
|
||||||
|
extract_deb "$CUDART_DEV_DEB" "${TMP_DIR}/cudart-dev"
|
||||||
|
|
||||||
|
copy_headers "${TMP_DIR}/cublas-dev"
|
||||||
|
copy_headers "${TMP_DIR}/cudart-dev"
|
||||||
|
copy_libs "${TMP_DIR}/cublas-rt"
|
||||||
|
copy_libs "${TMP_DIR}/cudart-rt"
|
||||||
|
|
||||||
|
make_links "libcublas"
|
||||||
|
make_links "libcublasLt"
|
||||||
|
make_links "libcudart"
|
||||||
|
|
||||||
|
[ -f "${CACHE_DIR}/include/cublasLt.h" ] || { echo "ERROR: cublasLt.h not extracted"; exit 1; }
|
||||||
|
[ -f "${CACHE_DIR}/include/cuda_runtime_api.h" ] || { echo "ERROR: cuda_runtime_api.h not extracted"; exit 1; }
|
||||||
|
[ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcublasLt.so*' | wc -l)" -gt 0 ] || { echo "ERROR: libcublasLt not extracted"; exit 1; }
|
||||||
|
[ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcublas.so*' | wc -l)" -gt 0 ] || { echo "ERROR: libcublas not extracted"; exit 1; }
|
||||||
|
[ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' | wc -l)" -gt 0 ] || { echo "ERROR: libcudart not extracted"; exit 1; }
|
||||||
|
|
||||||
|
echo "=== cuBLAS extraction complete ==="
|
||||||
|
echo "cache: $CACHE_DIR"
|
||||||
|
echo "headers: $(find "${CACHE_DIR}/include" -type f | wc -l)"
|
||||||
|
echo "libs: $(find "${CACHE_DIR}/lib" -maxdepth 1 \( -name 'libcublas*.so*' -o -name 'libcudart.so*' \) | wc -l)"
|
||||||
@@ -7,6 +7,7 @@ REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
|||||||
BUILDER_DIR="${REPO_ROOT}/iso/builder"
|
BUILDER_DIR="${REPO_ROOT}/iso/builder"
|
||||||
CONTAINER_TOOL="${CONTAINER_TOOL:-docker}"
|
CONTAINER_TOOL="${CONTAINER_TOOL:-docker}"
|
||||||
IMAGE_TAG="${BEE_BUILDER_IMAGE:-bee-iso-builder}"
|
IMAGE_TAG="${BEE_BUILDER_IMAGE:-bee-iso-builder}"
|
||||||
|
BUILDER_PLATFORM="${BEE_BUILDER_PLATFORM:-linux/amd64}"
|
||||||
CACHE_DIR="${BEE_BUILDER_CACHE_DIR:-${REPO_ROOT}/dist/container-cache}"
|
CACHE_DIR="${BEE_BUILDER_CACHE_DIR:-${REPO_ROOT}/dist/container-cache}"
|
||||||
AUTH_KEYS=""
|
AUTH_KEYS=""
|
||||||
REBUILD_IMAGE=0
|
REBUILD_IMAGE=0
|
||||||
@@ -40,6 +41,13 @@ if ! command -v "$CONTAINER_TOOL" >/dev/null 2>&1; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
PLATFORM_OS="${BUILDER_PLATFORM%/*}"
|
||||||
|
PLATFORM_ARCH="${BUILDER_PLATFORM#*/}"
|
||||||
|
if [ -z "$PLATFORM_OS" ] || [ -z "$PLATFORM_ARCH" ] || [ "$PLATFORM_OS" = "$BUILDER_PLATFORM" ]; then
|
||||||
|
echo "invalid BEE_BUILDER_PLATFORM: ${BUILDER_PLATFORM} (expected os/arch, e.g. linux/amd64)" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -n "$AUTH_KEYS" ]; then
|
if [ -n "$AUTH_KEYS" ]; then
|
||||||
[ -f "$AUTH_KEYS" ] || { echo "authorized_keys not found: $AUTH_KEYS" >&2; exit 1; }
|
[ -f "$AUTH_KEYS" ] || { echo "authorized_keys not found: $AUTH_KEYS" >&2; exit 1; }
|
||||||
AUTH_KEYS_ABS="$(cd "$(dirname "$AUTH_KEYS")" && pwd)/$(basename "$AUTH_KEYS")"
|
AUTH_KEYS_ABS="$(cd "$(dirname "$AUTH_KEYS")" && pwd)/$(basename "$AUTH_KEYS")"
|
||||||
@@ -56,17 +64,35 @@ mkdir -p \
|
|||||||
|
|
||||||
IMAGE_REF="${IMAGE_TAG}:debian${DEBIAN_VERSION}"
|
IMAGE_REF="${IMAGE_TAG}:debian${DEBIAN_VERSION}"
|
||||||
|
|
||||||
if [ "$REBUILD_IMAGE" = "1" ] || ! "$CONTAINER_TOOL" image inspect "${IMAGE_REF}" >/dev/null 2>&1; then
|
image_matches_platform() {
|
||||||
|
actual_platform="$("$CONTAINER_TOOL" image inspect --format '{{.Os}}/{{.Architecture}}' "${IMAGE_REF}" 2>/dev/null || true)"
|
||||||
|
[ "$actual_platform" = "${BUILDER_PLATFORM}" ]
|
||||||
|
}
|
||||||
|
|
||||||
|
NEED_BUILD_IMAGE=0
|
||||||
|
if [ "$REBUILD_IMAGE" = "1" ]; then
|
||||||
|
NEED_BUILD_IMAGE=1
|
||||||
|
elif ! "$CONTAINER_TOOL" image inspect "${IMAGE_REF}" >/dev/null 2>&1; then
|
||||||
|
NEED_BUILD_IMAGE=1
|
||||||
|
elif ! image_matches_platform; then
|
||||||
|
actual_platform="$("$CONTAINER_TOOL" image inspect --format '{{.Os}}/{{.Architecture}}' "${IMAGE_REF}" 2>/dev/null || echo unknown)"
|
||||||
|
echo "=== rebuilding builder image ${IMAGE_REF}: platform mismatch (${actual_platform} != ${BUILDER_PLATFORM}) ==="
|
||||||
|
NEED_BUILD_IMAGE=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$NEED_BUILD_IMAGE" = "1" ]; then
|
||||||
"$CONTAINER_TOOL" build \
|
"$CONTAINER_TOOL" build \
|
||||||
|
--platform "${BUILDER_PLATFORM}" \
|
||||||
--build-arg GO_VERSION="${GO_VERSION}" \
|
--build-arg GO_VERSION="${GO_VERSION}" \
|
||||||
-t "${IMAGE_REF}" \
|
-t "${IMAGE_REF}" \
|
||||||
"${BUILDER_DIR}"
|
"${BUILDER_DIR}"
|
||||||
else
|
else
|
||||||
echo "=== using existing builder image ${IMAGE_REF} ==="
|
echo "=== using existing builder image ${IMAGE_REF} (${BUILDER_PLATFORM}) ==="
|
||||||
fi
|
fi
|
||||||
|
|
||||||
set -- \
|
set -- \
|
||||||
run --rm --privileged \
|
run --rm --privileged \
|
||||||
|
--platform "${BUILDER_PLATFORM}" \
|
||||||
-v "${REPO_ROOT}:/work" \
|
-v "${REPO_ROOT}:/work" \
|
||||||
-v "${CACHE_DIR}:/cache" \
|
-v "${CACHE_DIR}:/cache" \
|
||||||
-e BEE_CONTAINER_BUILD=1 \
|
-e BEE_CONTAINER_BUILD=1 \
|
||||||
@@ -80,6 +106,7 @@ set -- \
|
|||||||
|
|
||||||
if [ -n "$AUTH_KEYS" ]; then
|
if [ -n "$AUTH_KEYS" ]; then
|
||||||
set -- run --rm --privileged \
|
set -- run --rm --privileged \
|
||||||
|
--platform "${BUILDER_PLATFORM}" \
|
||||||
-v "${REPO_ROOT}:/work" \
|
-v "${REPO_ROOT}:/work" \
|
||||||
-v "${CACHE_DIR}:/cache" \
|
-v "${CACHE_DIR}:/cache" \
|
||||||
-v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
|
-v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
|
||||||
|
|||||||
@@ -159,6 +159,16 @@ else
|
|||||||
echo "=== bee binary up to date, skipping build ==="
|
echo "=== bee binary up to date, skipping build ==="
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== downloading cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace ==="
|
||||||
|
sh "${BUILDER_DIR}/build-cublas.sh" \
|
||||||
|
"${CUBLAS_VERSION}" \
|
||||||
|
"${CUDA_USERSPACE_VERSION}" \
|
||||||
|
"${NCCL_CUDA_VERSION}" \
|
||||||
|
"${DIST_DIR}"
|
||||||
|
|
||||||
|
CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"
|
||||||
|
|
||||||
GPU_STRESS_NEED_BUILD=1
|
GPU_STRESS_NEED_BUILD=1
|
||||||
if [ -f "$GPU_STRESS_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_STRESS_BIN" ]; then
|
if [ -f "$GPU_STRESS_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_STRESS_BIN" ]; then
|
||||||
GPU_STRESS_NEED_BUILD=0
|
GPU_STRESS_NEED_BUILD=0
|
||||||
@@ -167,6 +177,7 @@ fi
|
|||||||
if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
|
if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
|
||||||
echo "=== building bee-gpu-stress ==="
|
echo "=== building bee-gpu-stress ==="
|
||||||
gcc -O2 -s -Wall -Wextra \
|
gcc -O2 -s -Wall -Wextra \
|
||||||
|
-I"${CUBLAS_CACHE}/include" \
|
||||||
-o "$GPU_STRESS_BIN" \
|
-o "$GPU_STRESS_BIN" \
|
||||||
"${BUILDER_DIR}/bee-gpu-stress.c" \
|
"${BUILDER_DIR}/bee-gpu-stress.c" \
|
||||||
-ldl
|
-ldl
|
||||||
@@ -283,6 +294,10 @@ NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"
|
|||||||
cp "${NCCL_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
|
cp "${NCCL_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
|
||||||
echo "=== NCCL: $(ls "${NCCL_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
|
echo "=== NCCL: $(ls "${NCCL_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
|
||||||
|
|
||||||
|
# Inject cuBLAS/cuBLASLt/cudart runtime libs used by bee-gpu-stress tensor-core GEMM path
|
||||||
|
cp "${CUBLAS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
|
||||||
|
echo "=== cuBLAS: $(ls "${CUBLAS_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
|
||||||
|
|
||||||
# --- embed build metadata ---
|
# --- embed build metadata ---
|
||||||
mkdir -p "${OVERLAY_STAGE_DIR}/etc"
|
mkdir -p "${OVERLAY_STAGE_DIR}/etc"
|
||||||
BUILD_DATE="$(date +%Y-%m-%d)"
|
BUILD_DATE="$(date +%Y-%m-%d)"
|
||||||
@@ -297,6 +312,8 @@ DEBIAN_KERNEL_ABI=${DEBIAN_KERNEL_ABI}
|
|||||||
NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
|
NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
|
||||||
NCCL_VERSION=${NCCL_VERSION}
|
NCCL_VERSION=${NCCL_VERSION}
|
||||||
NCCL_CUDA_VERSION=${NCCL_CUDA_VERSION}
|
NCCL_CUDA_VERSION=${NCCL_CUDA_VERSION}
|
||||||
|
CUBLAS_VERSION=${CUBLAS_VERSION}
|
||||||
|
CUDA_USERSPACE_VERSION=${CUDA_USERSPACE_VERSION}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# Patch motd with build info
|
# Patch motd with build info
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ openssh-server
|
|||||||
|
|
||||||
# Filesystem support for USB export targets
|
# Filesystem support for USB export targets
|
||||||
exfatprogs
|
exfatprogs
|
||||||
|
exfat-fuse
|
||||||
ntfs-3g
|
ntfs-3g
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
|
|||||||
Reference in New Issue
Block a user