From 54338dbae56f8c001532876c9bcea7a5a66e9bad Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Tue, 14 Apr 2026 16:18:33 +0300 Subject: [PATCH] Unify live RAM runtime state --- audit/internal/app/app.go | 5 + audit/internal/platform/install_to_ram.go | 114 +++++++++++++++--- .../internal/platform/install_to_ram_test.go | 43 +++++++ audit/internal/platform/runtime.go | 45 +++---- audit/internal/platform/types.go | 11 ++ audit/internal/schema/hardware.go | 20 +-- audit/internal/webui/api.go | 75 +++++++++--- audit/internal/webui/pages.go | 21 +++- audit/internal/webui/server_test.go | 15 ++- 9 files changed, 277 insertions(+), 72 deletions(-) diff --git a/audit/internal/app/app.go b/audit/internal/app/app.go index 56e1ff3..ca1c94d 100644 --- a/audit/internal/app/app.go +++ b/audit/internal/app/app.go @@ -84,6 +84,7 @@ type installer interface { InstallToDisk(ctx context.Context, device string, logFile string) error IsLiveMediaInRAM() bool LiveBootSource() platform.LiveBootSource + LiveMediaRAMState() platform.LiveMediaRAMState RunInstallToRAM(ctx context.Context, logFunc func(string)) error } @@ -108,6 +109,10 @@ func (a *App) LiveBootSource() platform.LiveBootSource { return a.installer.LiveBootSource() } +func (a *App) LiveMediaRAMState() platform.LiveMediaRAMState { + return a.installer.LiveMediaRAMState() +} + func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error { return a.installer.RunInstallToRAM(ctx, logFunc) } diff --git a/audit/internal/platform/install_to_ram.go b/audit/internal/platform/install_to_ram.go index 4f6b399..a01ccd5 100644 --- a/audit/internal/platform/install_to_ram.go +++ b/audit/internal/platform/install_to_ram.go @@ -11,20 +11,10 @@ import ( "strings" ) +const installToRAMDir = "/dev/shm/bee-live" + func (s *System) IsLiveMediaInRAM() bool { - fsType := mountFSType("/run/live/medium") - if fsType == "" { - // No medium mount at all — fall back to toram kernel parameter. - return toramActive() - } - if strings.EqualFold(fsType, "tmpfs") { - return true - } - // When RunInstallToRAM copies squashfs to /dev/shm/bee-live but the bind - // mount of /run/live/medium fails (common for CD-ROM boots), the medium - // fstype still shows the CD-ROM type. Check whether the RAM copy exists. - files, _ := filepath.Glob("/dev/shm/bee-live/*.squashfs") - return len(files) > 0 + return s.LiveMediaRAMState().InRAM } func (s *System) LiveBootSource() LiveBootSource { @@ -56,14 +46,95 @@ func (s *System) LiveBootSource() LiveBootSource { return status } -func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error { +func (s *System) LiveMediaRAMState() LiveMediaRAMState { + return evaluateLiveMediaRAMState( + s.LiveBootSource(), + toramActive(), + globPaths("/run/live/medium/live/*.squashfs"), + globPaths(filepath.Join(installToRAMDir, "*.squashfs")), + ) +} + +func evaluateLiveMediaRAMState(status LiveBootSource, toram bool, sourceSquashfs, copiedSquashfs []string) LiveMediaRAMState { + state := LiveMediaRAMState{ + LiveBootSource: status, + ToramActive: toram, + CopyPresent: len(copiedSquashfs) > 0, + } + if status.InRAM { + state.State = "in_ram" + state.Status = "ok" + state.CopyComplete = true + state.Message = "Running from RAM — installation media can be safely disconnected." + return state + } + + expected := pathBaseSet(sourceSquashfs) + copied := pathBaseSet(copiedSquashfs) + state.CopyComplete = len(expected) > 0 && setContainsAll(copied, expected) + + switch { + case state.CopyComplete: + state.State = "partial" + state.Status = "partial" + state.CanStartCopy = true + state.Message = "Live media files were copied to RAM, but the system is still mounted from the original boot source." + case state.CopyPresent: + state.State = "partial" + state.Status = "partial" + state.CanStartCopy = true + state.Message = "Partial RAM copy detected. A previous Copy to RAM run was interrupted or cancelled." + case toram: + state.State = "toram_failed" + state.Status = "failed" + state.CanStartCopy = true + state.Message = "toram boot parameter is set but the live medium is not mounted from RAM." + default: + state.State = "not_in_ram" + state.Status = "warning" + state.CanStartCopy = true + state.Message = "ISO not copied to RAM. Use Copy to RAM to free the boot drive and improve performance." + } + return state +} + +func globPaths(pattern string) []string { + matches, _ := filepath.Glob(pattern) + return matches +} + +func pathBaseSet(paths []string) map[string]struct{} { + out := make(map[string]struct{}, len(paths)) + for _, path := range paths { + base := strings.TrimSpace(filepath.Base(path)) + if base != "" { + out[base] = struct{}{} + } + } + return out +} + +func setContainsAll(have, want map[string]struct{}) bool { + if len(want) == 0 { + return false + } + for name := range want { + if _, ok := have[name]; !ok { + return false + } + } + return true +} + +func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) (retErr error) { log := func(msg string) { if logFunc != nil { logFunc(msg) } } - if s.IsLiveMediaInRAM() { + state := s.LiveMediaRAMState() + if state.InRAM { log("Already running from RAM — installation media can be safely disconnected.") return nil } @@ -88,10 +159,21 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro humanBytes(needed+headroom), humanBytes(free)) } - dstDir := "/dev/shm/bee-live" + dstDir := installToRAMDir + if state.CopyPresent { + log("Removing stale partial RAM copy before retry...") + } + _ = os.RemoveAll(dstDir) if err := os.MkdirAll(dstDir, 0755); err != nil { return fmt.Errorf("create tmpfs dir: %v", err) } + defer func() { + if retErr == nil { + return + } + _ = os.RemoveAll(dstDir) + log("Removed incomplete RAM copy.") + }() for _, sf := range squashfsFiles { if err := ctx.Err(); err != nil { diff --git a/audit/internal/platform/install_to_ram_test.go b/audit/internal/platform/install_to_ram_test.go index fb8475e..2ad458f 100644 --- a/audit/internal/platform/install_to_ram_test.go +++ b/audit/internal/platform/install_to_ram_test.go @@ -58,3 +58,46 @@ func TestDescribeLiveBootSource(t *testing.T) { t.Fatalf("got %q want /run/live/medium", got) } } + +func TestEvaluateLiveMediaRAMState(t *testing.T) { + t.Parallel() + + t.Run("in_ram", func(t *testing.T) { + state := evaluateLiveMediaRAMState( + LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}, + false, + nil, + nil, + ) + if state.State != "in_ram" || state.Status != "ok" || state.CanStartCopy { + t.Fatalf("state=%+v", state) + } + }) + + t.Run("partial_copy_after_cancel", func(t *testing.T) { + state := evaluateLiveMediaRAMState( + LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"}, + false, + []string{"/run/live/medium/live/filesystem.squashfs", "/run/live/medium/live/firmware.squashfs"}, + []string{"/dev/shm/bee-live/filesystem.squashfs"}, + ) + if state.State != "partial" || state.Status != "partial" || !state.CanStartCopy { + t.Fatalf("state=%+v", state) + } + if state.CopyComplete { + t.Fatalf("CopyComplete=%v want false", state.CopyComplete) + } + }) + + t.Run("toram_failed", func(t *testing.T) { + state := evaluateLiveMediaRAMState( + LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"}, + true, + nil, + nil, + ) + if state.State != "toram_failed" || state.Status != "failed" || !state.CanStartCopy { + t.Fatalf("state=%+v", state) + } + }) +} diff --git a/audit/internal/platform/runtime.go b/audit/internal/platform/runtime.go index 538869f..601e676 100644 --- a/audit/internal/platform/runtime.go +++ b/audit/internal/platform/runtime.go @@ -171,25 +171,28 @@ func resolvedToolStatus(display string, candidates ...string) ToolStatus { return ToolStatus{Name: display} } -// collectToRAMHealth checks whether the LiveCD ISO has been copied to RAM. -// Status values: "ok" = in RAM, "warning" = toram not active (no copy attempted), -// "failed" = toram was requested but medium is not in RAM (copy failed or in progress). +// collectToRAMHealth evaluates whether the live system is fully running from RAM. +// Status values: "ok" = fully in RAM, "warning" = not copied, "partial" = stale or +// incomplete RAM copy exists but runtime still depends on the boot medium, +// "failed" = toram was requested but medium is not in RAM. func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) { - inRAM := s.IsLiveMediaInRAM() - active := toramActive() - switch { - case inRAM: - health.ToRAMStatus = "ok" - case active: - // toram was requested but medium is not yet/no longer in RAM - health.ToRAMStatus = "failed" + state := s.LiveMediaRAMState() + health.ToRAMStatus = state.Status + switch state.Status { + case "ok": + return + case "failed": health.Issues = append(health.Issues, schema.RuntimeIssue{ Code: "toram_copy_failed", Severity: "warning", - Description: "toram boot parameter is set but the live medium is not mounted from RAM.", + Description: state.Message, + }) + case "partial": + health.Issues = append(health.Issues, schema.RuntimeIssue{ + Code: "toram_copy_partial", + Severity: "warning", + Description: state.Message, }) - default: - health.ToRAMStatus = "warning" } } @@ -211,13 +214,13 @@ func findUSBExportMount() string { // fs types that are expected on USB export drives exportFSTypes := map[string]bool{ - "vfat": true, - "exfat": true, - "ext2": true, - "ext3": true, - "ext4": true, - "ntfs": true, - "ntfs3": true, + "vfat": true, + "exfat": true, + "ext2": true, + "ext3": true, + "ext4": true, + "ntfs": true, + "ntfs3": true, "fuseblk": true, } diff --git a/audit/internal/platform/types.go b/audit/internal/platform/types.go index 1c95465..3ca31f8 100644 --- a/audit/internal/platform/types.go +++ b/audit/internal/platform/types.go @@ -9,6 +9,17 @@ type LiveBootSource struct { Device string `json:"device,omitempty"` } +type LiveMediaRAMState struct { + LiveBootSource + State string `json:"state"` + Status string `json:"status"` + ToramActive bool `json:"toram_active,omitempty"` + CopyPresent bool `json:"copy_present,omitempty"` + CopyComplete bool `json:"copy_complete,omitempty"` + CanStartCopy bool `json:"can_start_copy,omitempty"` + Message string `json:"message,omitempty"` +} + type InterfaceInfo struct { Name string State string diff --git a/audit/internal/schema/hardware.go b/audit/internal/schema/hardware.go index 82d5b15..fa88f5e 100644 --- a/audit/internal/schema/hardware.go +++ b/audit/internal/schema/hardware.go @@ -15,17 +15,17 @@ type HardwareIngestRequest struct { } type RuntimeHealth struct { - Status string `json:"status"` - CheckedAt string `json:"checked_at"` - ExportDir string `json:"export_dir,omitempty"` - DriverReady bool `json:"driver_ready,omitempty"` - CUDAReady bool `json:"cuda_ready,omitempty"` - NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck" - NetworkStatus string `json:"network_status,omitempty"` - // ToRAMStatus: "ok" (ISO in RAM), "warning" (toram not active), "failed" (toram active but copy failed) - ToRAMStatus string `json:"toram_status,omitempty"` + Status string `json:"status"` + CheckedAt string `json:"checked_at"` + ExportDir string `json:"export_dir,omitempty"` + DriverReady bool `json:"driver_ready,omitempty"` + CUDAReady bool `json:"cuda_ready,omitempty"` + NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck" + NetworkStatus string `json:"network_status,omitempty"` + // ToRAMStatus: "ok" (fully in RAM), "warning" (not copied), "partial" (stale/incomplete copy exists), "failed" (toram active but copy failed) + ToRAMStatus string `json:"toram_status,omitempty"` // USBExportPath: mount point of the first writable USB drive found, empty if none. - USBExportPath string `json:"usb_export_path,omitempty"` + USBExportPath string `json:"usb_export_path,omitempty"` Issues []RuntimeIssue `json:"issues,omitempty"` Tools []RuntimeToolStatus `json:"tools,omitempty"` Services []RuntimeServiceStatus `json:"services,omitempty"` diff --git a/audit/internal/webui/api.go b/audit/internal/webui/api.go index 129cdca..3216bb6 100644 --- a/audit/internal/webui/api.go +++ b/audit/internal/webui/api.go @@ -526,14 +526,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc { return } - var body struct { - Duration int `json:"duration"` - StressMode bool `json:"stress_mode"` - GPUIndices []int `json:"gpu_indices"` - ExcludeGPUIndices []int `json:"exclude_gpu_indices"` - StaggerGPUStart bool `json:"stagger_gpu_start"` - ParallelGPUs bool `json:"parallel_gpus"` - Loader string `json:"loader"` + var body struct { + Duration int `json:"duration"` + StressMode bool `json:"stress_mode"` + GPUIndices []int `json:"gpu_indices"` + ExcludeGPUIndices []int `json:"exclude_gpu_indices"` + StaggerGPUStart bool `json:"stagger_gpu_start"` + ParallelGPUs bool `json:"parallel_gpus"` + Loader string `json:"loader"` Profile string `json:"profile"` DisplayName string `json:"display_name"` PlatformComponents []string `json:"platform_components"` @@ -549,14 +549,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc { if strings.TrimSpace(body.DisplayName) != "" { name = body.DisplayName } - params := taskParams{ - Duration: body.Duration, - StressMode: body.StressMode, - GPUIndices: body.GPUIndices, - ExcludeGPUIndices: body.ExcludeGPUIndices, - StaggerGPUStart: body.StaggerGPUStart, - ParallelGPUs: body.ParallelGPUs, - Loader: body.Loader, + params := taskParams{ + Duration: body.Duration, + StressMode: body.StressMode, + GPUIndices: body.GPUIndices, + ExcludeGPUIndices: body.ExcludeGPUIndices, + StaggerGPUStart: body.StaggerGPUStart, + ParallelGPUs: body.ParallelGPUs, + Loader: body.Loader, BurnProfile: body.Profile, DisplayName: body.DisplayName, PlatformComponents: body.PlatformComponents, @@ -1072,18 +1072,55 @@ func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) { writeError(w, http.StatusServiceUnavailable, "app not configured") return } - status := h.opts.App.LiveBootSource() + status := h.currentRAMStatus() w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(status) } +type ramStatusResponse struct { + platform.LiveMediaRAMState + InstallTaskActive bool `json:"install_task_active,omitempty"` + CopyTaskActive bool `json:"copy_task_active,omitempty"` + CanStartTask bool `json:"can_start_task,omitempty"` + BlockedReason string `json:"blocked_reason,omitempty"` +} + +func (h *handler) currentRAMStatus() ramStatusResponse { + state := h.opts.App.LiveMediaRAMState() + resp := ramStatusResponse{LiveMediaRAMState: state} + if globalQueue.hasActiveTarget("install") { + resp.InstallTaskActive = true + resp.BlockedReason = "install to disk is already running" + return resp + } + if globalQueue.hasActiveTarget("install-to-ram") { + resp.CopyTaskActive = true + resp.BlockedReason = "install to RAM task is already pending or running" + return resp + } + if state.InRAM { + resp.BlockedReason = "system is already running from RAM" + return resp + } + resp.CanStartTask = state.CanStartCopy + if !resp.CanStartTask && resp.BlockedReason == "" { + resp.BlockedReason = state.Message + } + return resp +} + func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) { if h.opts.App == nil { writeError(w, http.StatusServiceUnavailable, "app not configured") return } - if globalQueue.hasActiveTarget("install") { - writeError(w, http.StatusConflict, "install to disk is already running") + status := h.currentRAMStatus() + if !status.CanStartTask { + msg := strings.TrimSpace(status.BlockedReason) + if msg == "" { + msg = "install to RAM is not available" + } + writeError(w, http.StatusConflict, msg) return } t := &Task{ diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go index 6181fb0..6e24398 100644 --- a/audit/internal/webui/pages.go +++ b/audit/internal/webui/pages.go @@ -845,6 +845,13 @@ func buildRuntimeToRAMRow(health schema.RuntimeHealth) runtimeHealthRow { Source: "live-boot / /proc/mounts", Issue: "", } + case "partial": + return runtimeHealthRow{ + Title: "LiveCD in RAM", + Status: "WARNING", + Source: "live-boot / /proc/mounts / /dev/shm/bee-live", + Issue: "Partial or staged RAM copy detected. System is not fully running from RAM; Copy to RAM can be retried.", + } case "failed": return runtimeHealthRow{ Title: "LiveCD in RAM", @@ -2280,7 +2287,6 @@ func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun) return maxGPUIndex, runs } - // ── Burn ────────────────────────────────────────────────────────────────────── func renderBurn() string { @@ -3245,12 +3251,19 @@ fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{ else if (kind === 'disk') label = 'disk (' + source + ')'; else label = source; boot.textContent = 'Current boot source: ' + label + '.'; - if (d.in_ram) { - txt.textContent = '✓ Running from RAM — installation media can be safely disconnected.'; + txt.textContent = d.message || 'Checking...'; + if (d.status === 'ok' || d.in_ram) { txt.style.color = 'var(--ok, green)'; + } else if (d.status === 'failed') { + txt.style.color = 'var(--err, #b91c1c)'; } else { - txt.textContent = 'Live media is mounted from installation device. Copy to RAM to allow media removal.'; + txt.style.color = 'var(--muted)'; + } + if (d.can_start_task) { btn.style.display = ''; + btn.disabled = false; + } else { + btn.style.display = 'none'; } }); function installToRAM() { diff --git a/audit/internal/webui/server_test.go b/audit/internal/webui/server_test.go index e868731..3bbb57b 100644 --- a/audit/internal/webui/server_test.go +++ b/audit/internal/webui/server_test.go @@ -11,6 +11,7 @@ import ( "time" "bee/audit/internal/platform" + "bee/audit/internal/schema" ) func TestChartLegendNumber(t *testing.T) { @@ -78,6 +79,16 @@ func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) { } } +func TestBuildRuntimeToRAMRowShowsPartialCopyWarning(t *testing.T) { + row := buildRuntimeToRAMRow(schema.RuntimeHealth{ToRAMStatus: "partial"}) + if row.Status != "WARNING" { + t.Fatalf("status=%q want WARNING", row.Status) + } + if !strings.Contains(row.Issue, "Partial or staged RAM copy detected") { + t.Fatalf("issue=%q", row.Issue) + } +} + func TestChartDataFromSamplesUsesFullHistory(t *testing.T) { samples := []platform.LiveMetricSample{ { @@ -1113,8 +1124,8 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) { `>Storage<`, `>GPU<`, `>PSU<`, - `badge-warn`, // cpu Warning badge - `badge-err`, // storage Critical badge + `badge-warn`, // cpu Warning badge + `badge-err`, // storage Critical badge } { if !strings.Contains(body, needle) { t.Fatalf("dashboard missing %q: %s", needle, body)