Unify live RAM runtime state

This commit is contained in:
Mikhail Chusavitin
2026-04-14 16:18:33 +03:00
parent 2be7ae6d28
commit 54338dbae5
9 changed files with 277 additions and 72 deletions

View File

@@ -84,6 +84,7 @@ type installer interface {
InstallToDisk(ctx context.Context, device string, logFile string) error InstallToDisk(ctx context.Context, device string, logFile string) error
IsLiveMediaInRAM() bool IsLiveMediaInRAM() bool
LiveBootSource() platform.LiveBootSource LiveBootSource() platform.LiveBootSource
LiveMediaRAMState() platform.LiveMediaRAMState
RunInstallToRAM(ctx context.Context, logFunc func(string)) error RunInstallToRAM(ctx context.Context, logFunc func(string)) error
} }
@@ -108,6 +109,10 @@ func (a *App) LiveBootSource() platform.LiveBootSource {
return a.installer.LiveBootSource() return a.installer.LiveBootSource()
} }
func (a *App) LiveMediaRAMState() platform.LiveMediaRAMState {
return a.installer.LiveMediaRAMState()
}
func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error { func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
return a.installer.RunInstallToRAM(ctx, logFunc) return a.installer.RunInstallToRAM(ctx, logFunc)
} }

View File

@@ -11,20 +11,10 @@ import (
"strings" "strings"
) )
const installToRAMDir = "/dev/shm/bee-live"
func (s *System) IsLiveMediaInRAM() bool { func (s *System) IsLiveMediaInRAM() bool {
fsType := mountFSType("/run/live/medium") return s.LiveMediaRAMState().InRAM
if fsType == "" {
// No medium mount at all — fall back to toram kernel parameter.
return toramActive()
}
if strings.EqualFold(fsType, "tmpfs") {
return true
}
// When RunInstallToRAM copies squashfs to /dev/shm/bee-live but the bind
// mount of /run/live/medium fails (common for CD-ROM boots), the medium
// fstype still shows the CD-ROM type. Check whether the RAM copy exists.
files, _ := filepath.Glob("/dev/shm/bee-live/*.squashfs")
return len(files) > 0
} }
func (s *System) LiveBootSource() LiveBootSource { func (s *System) LiveBootSource() LiveBootSource {
@@ -56,14 +46,95 @@ func (s *System) LiveBootSource() LiveBootSource {
return status return status
} }
func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error { func (s *System) LiveMediaRAMState() LiveMediaRAMState {
return evaluateLiveMediaRAMState(
s.LiveBootSource(),
toramActive(),
globPaths("/run/live/medium/live/*.squashfs"),
globPaths(filepath.Join(installToRAMDir, "*.squashfs")),
)
}
func evaluateLiveMediaRAMState(status LiveBootSource, toram bool, sourceSquashfs, copiedSquashfs []string) LiveMediaRAMState {
state := LiveMediaRAMState{
LiveBootSource: status,
ToramActive: toram,
CopyPresent: len(copiedSquashfs) > 0,
}
if status.InRAM {
state.State = "in_ram"
state.Status = "ok"
state.CopyComplete = true
state.Message = "Running from RAM — installation media can be safely disconnected."
return state
}
expected := pathBaseSet(sourceSquashfs)
copied := pathBaseSet(copiedSquashfs)
state.CopyComplete = len(expected) > 0 && setContainsAll(copied, expected)
switch {
case state.CopyComplete:
state.State = "partial"
state.Status = "partial"
state.CanStartCopy = true
state.Message = "Live media files were copied to RAM, but the system is still mounted from the original boot source."
case state.CopyPresent:
state.State = "partial"
state.Status = "partial"
state.CanStartCopy = true
state.Message = "Partial RAM copy detected. A previous Copy to RAM run was interrupted or cancelled."
case toram:
state.State = "toram_failed"
state.Status = "failed"
state.CanStartCopy = true
state.Message = "toram boot parameter is set but the live medium is not mounted from RAM."
default:
state.State = "not_in_ram"
state.Status = "warning"
state.CanStartCopy = true
state.Message = "ISO not copied to RAM. Use Copy to RAM to free the boot drive and improve performance."
}
return state
}
func globPaths(pattern string) []string {
matches, _ := filepath.Glob(pattern)
return matches
}
func pathBaseSet(paths []string) map[string]struct{} {
out := make(map[string]struct{}, len(paths))
for _, path := range paths {
base := strings.TrimSpace(filepath.Base(path))
if base != "" {
out[base] = struct{}{}
}
}
return out
}
func setContainsAll(have, want map[string]struct{}) bool {
if len(want) == 0 {
return false
}
for name := range want {
if _, ok := have[name]; !ok {
return false
}
}
return true
}
func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) (retErr error) {
log := func(msg string) { log := func(msg string) {
if logFunc != nil { if logFunc != nil {
logFunc(msg) logFunc(msg)
} }
} }
if s.IsLiveMediaInRAM() { state := s.LiveMediaRAMState()
if state.InRAM {
log("Already running from RAM — installation media can be safely disconnected.") log("Already running from RAM — installation media can be safely disconnected.")
return nil return nil
} }
@@ -88,10 +159,21 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
humanBytes(needed+headroom), humanBytes(free)) humanBytes(needed+headroom), humanBytes(free))
} }
dstDir := "/dev/shm/bee-live" dstDir := installToRAMDir
if state.CopyPresent {
log("Removing stale partial RAM copy before retry...")
}
_ = os.RemoveAll(dstDir)
if err := os.MkdirAll(dstDir, 0755); err != nil { if err := os.MkdirAll(dstDir, 0755); err != nil {
return fmt.Errorf("create tmpfs dir: %v", err) return fmt.Errorf("create tmpfs dir: %v", err)
} }
defer func() {
if retErr == nil {
return
}
_ = os.RemoveAll(dstDir)
log("Removed incomplete RAM copy.")
}()
for _, sf := range squashfsFiles { for _, sf := range squashfsFiles {
if err := ctx.Err(); err != nil { if err := ctx.Err(); err != nil {

View File

@@ -58,3 +58,46 @@ func TestDescribeLiveBootSource(t *testing.T) {
t.Fatalf("got %q want /run/live/medium", got) t.Fatalf("got %q want /run/live/medium", got)
} }
} }
func TestEvaluateLiveMediaRAMState(t *testing.T) {
t.Parallel()
t.Run("in_ram", func(t *testing.T) {
state := evaluateLiveMediaRAMState(
LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"},
false,
nil,
nil,
)
if state.State != "in_ram" || state.Status != "ok" || state.CanStartCopy {
t.Fatalf("state=%+v", state)
}
})
t.Run("partial_copy_after_cancel", func(t *testing.T) {
state := evaluateLiveMediaRAMState(
LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
false,
[]string{"/run/live/medium/live/filesystem.squashfs", "/run/live/medium/live/firmware.squashfs"},
[]string{"/dev/shm/bee-live/filesystem.squashfs"},
)
if state.State != "partial" || state.Status != "partial" || !state.CanStartCopy {
t.Fatalf("state=%+v", state)
}
if state.CopyComplete {
t.Fatalf("CopyComplete=%v want false", state.CopyComplete)
}
})
t.Run("toram_failed", func(t *testing.T) {
state := evaluateLiveMediaRAMState(
LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
true,
nil,
nil,
)
if state.State != "toram_failed" || state.Status != "failed" || !state.CanStartCopy {
t.Fatalf("state=%+v", state)
}
})
}

View File

@@ -171,25 +171,28 @@ func resolvedToolStatus(display string, candidates ...string) ToolStatus {
return ToolStatus{Name: display} return ToolStatus{Name: display}
} }
// collectToRAMHealth checks whether the LiveCD ISO has been copied to RAM. // collectToRAMHealth evaluates whether the live system is fully running from RAM.
// Status values: "ok" = in RAM, "warning" = toram not active (no copy attempted), // Status values: "ok" = fully in RAM, "warning" = not copied, "partial" = stale or
// "failed" = toram was requested but medium is not in RAM (copy failed or in progress). // incomplete RAM copy exists but runtime still depends on the boot medium,
// "failed" = toram was requested but medium is not in RAM.
func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) { func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) {
inRAM := s.IsLiveMediaInRAM() state := s.LiveMediaRAMState()
active := toramActive() health.ToRAMStatus = state.Status
switch { switch state.Status {
case inRAM: case "ok":
health.ToRAMStatus = "ok" return
case active: case "failed":
// toram was requested but medium is not yet/no longer in RAM
health.ToRAMStatus = "failed"
health.Issues = append(health.Issues, schema.RuntimeIssue{ health.Issues = append(health.Issues, schema.RuntimeIssue{
Code: "toram_copy_failed", Code: "toram_copy_failed",
Severity: "warning", Severity: "warning",
Description: "toram boot parameter is set but the live medium is not mounted from RAM.", Description: state.Message,
})
case "partial":
health.Issues = append(health.Issues, schema.RuntimeIssue{
Code: "toram_copy_partial",
Severity: "warning",
Description: state.Message,
}) })
default:
health.ToRAMStatus = "warning"
} }
} }
@@ -211,13 +214,13 @@ func findUSBExportMount() string {
// fs types that are expected on USB export drives // fs types that are expected on USB export drives
exportFSTypes := map[string]bool{ exportFSTypes := map[string]bool{
"vfat": true, "vfat": true,
"exfat": true, "exfat": true,
"ext2": true, "ext2": true,
"ext3": true, "ext3": true,
"ext4": true, "ext4": true,
"ntfs": true, "ntfs": true,
"ntfs3": true, "ntfs3": true,
"fuseblk": true, "fuseblk": true,
} }

View File

@@ -9,6 +9,17 @@ type LiveBootSource struct {
Device string `json:"device,omitempty"` Device string `json:"device,omitempty"`
} }
type LiveMediaRAMState struct {
LiveBootSource
State string `json:"state"`
Status string `json:"status"`
ToramActive bool `json:"toram_active,omitempty"`
CopyPresent bool `json:"copy_present,omitempty"`
CopyComplete bool `json:"copy_complete,omitempty"`
CanStartCopy bool `json:"can_start_copy,omitempty"`
Message string `json:"message,omitempty"`
}
type InterfaceInfo struct { type InterfaceInfo struct {
Name string Name string
State string State string

View File

@@ -15,17 +15,17 @@ type HardwareIngestRequest struct {
} }
type RuntimeHealth struct { type RuntimeHealth struct {
Status string `json:"status"` Status string `json:"status"`
CheckedAt string `json:"checked_at"` CheckedAt string `json:"checked_at"`
ExportDir string `json:"export_dir,omitempty"` ExportDir string `json:"export_dir,omitempty"`
DriverReady bool `json:"driver_ready,omitempty"` DriverReady bool `json:"driver_ready,omitempty"`
CUDAReady bool `json:"cuda_ready,omitempty"` CUDAReady bool `json:"cuda_ready,omitempty"`
NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck" NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
NetworkStatus string `json:"network_status,omitempty"` NetworkStatus string `json:"network_status,omitempty"`
// ToRAMStatus: "ok" (ISO in RAM), "warning" (toram not active), "failed" (toram active but copy failed) // ToRAMStatus: "ok" (fully in RAM), "warning" (not copied), "partial" (stale/incomplete copy exists), "failed" (toram active but copy failed)
ToRAMStatus string `json:"toram_status,omitempty"` ToRAMStatus string `json:"toram_status,omitempty"`
// USBExportPath: mount point of the first writable USB drive found, empty if none. // USBExportPath: mount point of the first writable USB drive found, empty if none.
USBExportPath string `json:"usb_export_path,omitempty"` USBExportPath string `json:"usb_export_path,omitempty"`
Issues []RuntimeIssue `json:"issues,omitempty"` Issues []RuntimeIssue `json:"issues,omitempty"`
Tools []RuntimeToolStatus `json:"tools,omitempty"` Tools []RuntimeToolStatus `json:"tools,omitempty"`
Services []RuntimeServiceStatus `json:"services,omitempty"` Services []RuntimeServiceStatus `json:"services,omitempty"`

View File

@@ -526,14 +526,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
return return
} }
var body struct { var body struct {
Duration int `json:"duration"` Duration int `json:"duration"`
StressMode bool `json:"stress_mode"` StressMode bool `json:"stress_mode"`
GPUIndices []int `json:"gpu_indices"` GPUIndices []int `json:"gpu_indices"`
ExcludeGPUIndices []int `json:"exclude_gpu_indices"` ExcludeGPUIndices []int `json:"exclude_gpu_indices"`
StaggerGPUStart bool `json:"stagger_gpu_start"` StaggerGPUStart bool `json:"stagger_gpu_start"`
ParallelGPUs bool `json:"parallel_gpus"` ParallelGPUs bool `json:"parallel_gpus"`
Loader string `json:"loader"` Loader string `json:"loader"`
Profile string `json:"profile"` Profile string `json:"profile"`
DisplayName string `json:"display_name"` DisplayName string `json:"display_name"`
PlatformComponents []string `json:"platform_components"` PlatformComponents []string `json:"platform_components"`
@@ -549,14 +549,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
if strings.TrimSpace(body.DisplayName) != "" { if strings.TrimSpace(body.DisplayName) != "" {
name = body.DisplayName name = body.DisplayName
} }
params := taskParams{ params := taskParams{
Duration: body.Duration, Duration: body.Duration,
StressMode: body.StressMode, StressMode: body.StressMode,
GPUIndices: body.GPUIndices, GPUIndices: body.GPUIndices,
ExcludeGPUIndices: body.ExcludeGPUIndices, ExcludeGPUIndices: body.ExcludeGPUIndices,
StaggerGPUStart: body.StaggerGPUStart, StaggerGPUStart: body.StaggerGPUStart,
ParallelGPUs: body.ParallelGPUs, ParallelGPUs: body.ParallelGPUs,
Loader: body.Loader, Loader: body.Loader,
BurnProfile: body.Profile, BurnProfile: body.Profile,
DisplayName: body.DisplayName, DisplayName: body.DisplayName,
PlatformComponents: body.PlatformComponents, PlatformComponents: body.PlatformComponents,
@@ -1072,18 +1072,55 @@ func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
writeError(w, http.StatusServiceUnavailable, "app not configured") writeError(w, http.StatusServiceUnavailable, "app not configured")
return return
} }
status := h.opts.App.LiveBootSource() status := h.currentRAMStatus()
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(status) _ = json.NewEncoder(w).Encode(status)
} }
type ramStatusResponse struct {
platform.LiveMediaRAMState
InstallTaskActive bool `json:"install_task_active,omitempty"`
CopyTaskActive bool `json:"copy_task_active,omitempty"`
CanStartTask bool `json:"can_start_task,omitempty"`
BlockedReason string `json:"blocked_reason,omitempty"`
}
func (h *handler) currentRAMStatus() ramStatusResponse {
state := h.opts.App.LiveMediaRAMState()
resp := ramStatusResponse{LiveMediaRAMState: state}
if globalQueue.hasActiveTarget("install") {
resp.InstallTaskActive = true
resp.BlockedReason = "install to disk is already running"
return resp
}
if globalQueue.hasActiveTarget("install-to-ram") {
resp.CopyTaskActive = true
resp.BlockedReason = "install to RAM task is already pending or running"
return resp
}
if state.InRAM {
resp.BlockedReason = "system is already running from RAM"
return resp
}
resp.CanStartTask = state.CanStartCopy
if !resp.CanStartTask && resp.BlockedReason == "" {
resp.BlockedReason = state.Message
}
return resp
}
func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) { func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) {
if h.opts.App == nil { if h.opts.App == nil {
writeError(w, http.StatusServiceUnavailable, "app not configured") writeError(w, http.StatusServiceUnavailable, "app not configured")
return return
} }
if globalQueue.hasActiveTarget("install") { status := h.currentRAMStatus()
writeError(w, http.StatusConflict, "install to disk is already running") if !status.CanStartTask {
msg := strings.TrimSpace(status.BlockedReason)
if msg == "" {
msg = "install to RAM is not available"
}
writeError(w, http.StatusConflict, msg)
return return
} }
t := &Task{ t := &Task{

View File

@@ -845,6 +845,13 @@ func buildRuntimeToRAMRow(health schema.RuntimeHealth) runtimeHealthRow {
Source: "live-boot / /proc/mounts", Source: "live-boot / /proc/mounts",
Issue: "", Issue: "",
} }
case "partial":
return runtimeHealthRow{
Title: "LiveCD in RAM",
Status: "WARNING",
Source: "live-boot / /proc/mounts / /dev/shm/bee-live",
Issue: "Partial or staged RAM copy detected. System is not fully running from RAM; Copy to RAM can be retried.",
}
case "failed": case "failed":
return runtimeHealthRow{ return runtimeHealthRow{
Title: "LiveCD in RAM", Title: "LiveCD in RAM",
@@ -2280,7 +2287,6 @@ func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun)
return maxGPUIndex, runs return maxGPUIndex, runs
} }
// ── Burn ────────────────────────────────────────────────────────────────────── // ── Burn ──────────────────────────────────────────────────────────────────────
func renderBurn() string { func renderBurn() string {
@@ -3245,12 +3251,19 @@ fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
else if (kind === 'disk') label = 'disk (' + source + ')'; else if (kind === 'disk') label = 'disk (' + source + ')';
else label = source; else label = source;
boot.textContent = 'Current boot source: ' + label + '.'; boot.textContent = 'Current boot source: ' + label + '.';
if (d.in_ram) { txt.textContent = d.message || 'Checking...';
txt.textContent = '✓ Running from RAM — installation media can be safely disconnected.'; if (d.status === 'ok' || d.in_ram) {
txt.style.color = 'var(--ok, green)'; txt.style.color = 'var(--ok, green)';
} else if (d.status === 'failed') {
txt.style.color = 'var(--err, #b91c1c)';
} else { } else {
txt.textContent = 'Live media is mounted from installation device. Copy to RAM to allow media removal.'; txt.style.color = 'var(--muted)';
}
if (d.can_start_task) {
btn.style.display = ''; btn.style.display = '';
btn.disabled = false;
} else {
btn.style.display = 'none';
} }
}); });
function installToRAM() { function installToRAM() {

View File

@@ -11,6 +11,7 @@ import (
"time" "time"
"bee/audit/internal/platform" "bee/audit/internal/platform"
"bee/audit/internal/schema"
) )
func TestChartLegendNumber(t *testing.T) { func TestChartLegendNumber(t *testing.T) {
@@ -78,6 +79,16 @@ func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) {
} }
} }
func TestBuildRuntimeToRAMRowShowsPartialCopyWarning(t *testing.T) {
row := buildRuntimeToRAMRow(schema.RuntimeHealth{ToRAMStatus: "partial"})
if row.Status != "WARNING" {
t.Fatalf("status=%q want WARNING", row.Status)
}
if !strings.Contains(row.Issue, "Partial or staged RAM copy detected") {
t.Fatalf("issue=%q", row.Issue)
}
}
func TestChartDataFromSamplesUsesFullHistory(t *testing.T) { func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
samples := []platform.LiveMetricSample{ samples := []platform.LiveMetricSample{
{ {
@@ -1113,8 +1124,8 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
`>Storage<`, `>Storage<`,
`>GPU<`, `>GPU<`,
`>PSU<`, `>PSU<`,
`badge-warn`, // cpu Warning badge `badge-warn`, // cpu Warning badge
`badge-err`, // storage Critical badge `badge-err`, // storage Critical badge
} { } {
if !strings.Contains(body, needle) { if !strings.Contains(body, needle) {
t.Fatalf("dashboard missing %q: %s", needle, body) t.Fatalf("dashboard missing %q: %s", needle, body)