From 29179917c3717f7bf125618e7ebdffc4115c8e45 Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Fri, 24 Apr 2026 10:20:12 +0300 Subject: [PATCH] Add USB blackbox log mirroring service --- audit/cmd/bee/main.go | 33 + audit/internal/app/blackbox.go | 779 ++++++++++++++++++ audit/internal/app/blackbox_test.go | 52 ++ audit/internal/app/support_bundle.go | 18 +- audit/internal/webui/api.go | 75 ++ audit/internal/webui/api_test.go | 41 + audit/internal/webui/page_export_tools.go | 112 ++- audit/internal/webui/server.go | 5 +- audit/internal/webui/server_test.go | 8 +- .../etc/systemd/system/bee-audit.service | 2 +- .../etc/systemd/system/bee-blackbox.service | 18 + .../etc/systemd/system/bee-network.service | 2 +- .../etc/systemd/system/bee-nvidia.service | 2 +- .../etc/systemd/system/bee-preflight.service | 2 +- .../etc/systemd/system/bee-web.service | 1 + 15 files changed, 1116 insertions(+), 34 deletions(-) create mode 100644 audit/internal/app/blackbox.go create mode 100644 audit/internal/app/blackbox_test.go create mode 100644 iso/overlay/etc/systemd/system/bee-blackbox.service diff --git a/audit/cmd/bee/main.go b/audit/cmd/bee/main.go index a839bd3..2e80ab5 100644 --- a/audit/cmd/bee/main.go +++ b/audit/cmd/bee/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "errors" "flag" "fmt" "io" @@ -67,6 +68,8 @@ func run(args []string, stdout, stderr io.Writer) (exitCode int) { return runSupportBundle(args[1:], stdout, stderr) case "web": return runWeb(args[1:], stdout, stderr) + case "blackbox": + return runBlackbox(args[1:], stdout, stderr) case "sat": return runSAT(args[1:], stdout, stderr) case "benchmark": @@ -90,6 +93,7 @@ func printRootUsage(w io.Writer) { bee export --target bee support-bundle --output stdout|file: bee web --listen :80 [--audit-path `+app.DefaultAuditJSONPath+`] + bee blackbox --export-dir `+app.DefaultExportDir+` [--state-file `+app.DefaultBlackboxStatePath+`] bee sat nvidia|memory|storage|cpu [--duration ] bee benchmark nvidia [--profile standard|stability|overnight] bee bee-worker --export-dir `+app.DefaultExportDir+` --task-id TASK-001 @@ -109,6 +113,8 @@ func runHelp(args []string, stdout, stderr io.Writer) int { return runSupportBundle([]string{"--help"}, stdout, stdout) case "web": return runWeb([]string{"--help"}, stdout, stdout) + case "blackbox": + return runBlackbox([]string{"--help"}, stdout, stdout) case "sat": return runSAT([]string{"--help"}, stdout, stderr) case "benchmark": @@ -340,6 +346,33 @@ func runWeb(args []string, stdout, stderr io.Writer) int { return 0 } +func runBlackbox(args []string, stdout, stderr io.Writer) int { + fs := flag.NewFlagSet("blackbox", flag.ContinueOnError) + fs.SetOutput(stderr) + exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with logs, SAT results, and support bundles") + statePath := fs.String("state-file", app.DefaultBlackboxStatePath, "blackbox state file") + fs.Usage = func() { + fmt.Fprintf(stderr, "usage: bee blackbox [--export-dir %s] [--state-file %s]\n", app.DefaultExportDir, app.DefaultBlackboxStatePath) + fs.PrintDefaults() + } + if err := fs.Parse(args); err != nil { + if err == flag.ErrHelp { + return 0 + } + return 2 + } + if fs.NArg() != 0 { + fs.Usage() + return 2 + } + slog.Info("starting bee blackbox", "export_dir", *exportDir, "state_file", *statePath) + if err := app.RunBlackbox(context.Background(), *exportDir, *statePath, platform.New()); err != nil && !errors.Is(err, context.Canceled) { + slog.Error("run blackbox", "err", err) + return 1 + } + return 0 +} + func runSAT(args []string, stdout, stderr io.Writer) int { if len(args) == 0 { fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration ]") diff --git a/audit/internal/app/blackbox.go b/audit/internal/app/blackbox.go new file mode 100644 index 0000000..80f65a9 --- /dev/null +++ b/audit/internal/app/blackbox.go @@ -0,0 +1,779 @@ +package app + +import ( + "bytes" + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "sync" + "time" + + "bee/audit/internal/platform" +) + +const ( + blackboxMarkerName = ".bee-blackbox" + blackboxDiscoverInterval = 2 * time.Second + blackboxMinFlushPeriod = 1 * time.Second + blackboxMaxFlushPeriod = 30 * time.Second + blackboxRecoveryFastCount = 5 +) + +var DefaultBlackboxStatePath = DefaultExportDir + "/blackbox-state.json" + +var ( + blackboxExecCommand = exec.Command + blackboxNow = func() time.Time { return time.Now().UTC() } +) + +type BlackboxMarker struct { + Version int `json:"version"` + EnrollmentID string `json:"enrollment_id"` + CreatedAtUTC string `json:"created_at_utc"` + Host string `json:"host,omitempty"` +} + +type BlackboxTargetStatus struct { + EnrollmentID string `json:"enrollment_id"` + Device string `json:"device"` + FS platform.RemovableTarget `json:"fs"` + BootFolder string `json:"boot_folder"` + Status string `json:"status"` + LastSyncAtUTC string `json:"last_sync_at_utc,omitempty"` + LastCycleDuration string `json:"last_cycle_duration,omitempty"` + FlushPeriod string `json:"flush_period"` + LastError string `json:"last_error,omitempty"` + Mountpoint string `json:"mountpoint,omitempty"` +} + +type BlackboxState struct { + Status string `json:"status"` + BootStartedAtUTC string `json:"boot_started_at_utc"` + BootFolder string `json:"boot_folder"` + UpdatedAtUTC string `json:"updated_at_utc"` + Targets []BlackboxTargetStatus `json:"targets"` +} + +type blackboxRuntime struct { + exportDir string + statePath string + system *platform.System + bootStarted time.Time + bootFolder string + + mu sync.Mutex + workers map[string]*blackboxWorker +} + +type discoveredBlackboxTarget struct { + marker BlackboxMarker + target platform.RemovableTarget + seenMount string + mountedByBee bool +} + +type blackboxWorker struct { + runtime *blackboxRuntime + enrollmentID string + + mu sync.Mutex + target platform.RemovableTarget + marker BlackboxMarker + mountpoint string + mountedByBee bool + status string + lastSyncAt time.Time + lastDuration time.Duration + flushPeriod time.Duration + lastError string + fastCycles int + stopCh chan struct{} + stoppedCh chan struct{} +} + +func RunBlackbox(ctx context.Context, exportDir, statePath string, system *platform.System) error { + exportDir = strings.TrimSpace(exportDir) + if exportDir == "" { + exportDir = DefaultExportDir + } + statePath = strings.TrimSpace(statePath) + if statePath == "" { + statePath = DefaultBlackboxStatePath + } + if system == nil { + system = platform.New() + } + bootStarted, err := bootStartedAtUTC() + if err != nil { + bootStarted = blackboxNow() + } + rt := &blackboxRuntime{ + exportDir: exportDir, + statePath: statePath, + system: system, + bootStarted: bootStarted, + bootFolder: SupportBundleBaseName(bootStarted), + workers: make(map[string]*blackboxWorker), + } + _ = os.MkdirAll(filepath.Dir(statePath), 0755) + rt.persistState() + ticker := time.NewTicker(blackboxDiscoverInterval) + defer ticker.Stop() + for { + rt.reconcile() + select { + case <-ctx.Done(): + rt.stopAll() + return ctx.Err() + case <-ticker.C: + } + } +} + +func ReadBlackboxState(path string) (BlackboxState, error) { + path = strings.TrimSpace(path) + if path == "" { + path = DefaultBlackboxStatePath + } + raw, err := os.ReadFile(path) + if err != nil { + return BlackboxState{}, err + } + var state BlackboxState + if err := json.Unmarshal(raw, &state); err != nil { + return BlackboxState{}, err + } + return state, nil +} + +func EnableBlackboxTarget(target platform.RemovableTarget) (BlackboxMarker, error) { + target = sanitizeRemovableTarget(target) + if target.Device == "" { + return BlackboxMarker{}, fmt.Errorf("device is required") + } + mountpoint, mountedByBee, err := ensureMountedTarget(target, "marker") + if err != nil { + return BlackboxMarker{}, err + } + defer func() { + if mountedByBee { + _ = unmountTarget(mountpoint) + } + }() + + marker, _, err := readBlackboxMarker(mountpoint) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return BlackboxMarker{}, err + } + if marker.EnrollmentID == "" { + marker = BlackboxMarker{ + Version: 1, + EnrollmentID: newBlackboxEnrollmentID(), + CreatedAtUTC: blackboxNow().Format(time.RFC3339), + Host: hostnameOr("unknown"), + } + } + if err := writeBlackboxMarker(mountpoint, marker); err != nil { + return BlackboxMarker{}, err + } + return marker, nil +} + +func DisableBlackboxTarget(device, enrollmentID string) error { + device = strings.TrimSpace(device) + enrollmentID = strings.TrimSpace(enrollmentID) + if device == "" && enrollmentID == "" { + return fmt.Errorf("device or enrollment_id is required") + } + system := platform.New() + targets, err := system.ListRemovableTargets() + if err != nil { + return err + } + for _, target := range targets { + target = sanitizeRemovableTarget(target) + mountpoint, mountedByBee, mountErr := ensureMountedTarget(target, "marker") + if mountErr != nil { + continue + } + remove := false + marker, _, err := readBlackboxMarker(mountpoint) + if err == nil { + if enrollmentID != "" && marker.EnrollmentID == enrollmentID { + remove = true + } + if device != "" && target.Device == device { + remove = true + } + } + if remove { + err = os.Remove(filepath.Join(mountpoint, blackboxMarkerName)) + } + if mountedByBee { + _ = unmountTarget(mountpoint) + } + if remove { + return err + } + } + return os.ErrNotExist +} + +func (rt *blackboxRuntime) reconcile() { + discovered, _ := rt.discoverMarkedTargets() + + rt.mu.Lock() + defer rt.mu.Unlock() + + seen := make(map[string]struct{}, len(discovered)) + for _, found := range discovered { + seen[found.marker.EnrollmentID] = struct{}{} + worker, ok := rt.workers[found.marker.EnrollmentID] + if !ok { + worker = newBlackboxWorker(rt, found) + rt.workers[found.marker.EnrollmentID] = worker + go worker.run() + continue + } + worker.update(found) + } + for id, worker := range rt.workers { + if _, ok := seen[id]; ok { + continue + } + worker.stop() + delete(rt.workers, id) + } + rt.persistStateLocked() +} + +func (rt *blackboxRuntime) stopAll() { + rt.mu.Lock() + workers := make([]*blackboxWorker, 0, len(rt.workers)) + for _, worker := range rt.workers { + workers = append(workers, worker) + } + rt.workers = map[string]*blackboxWorker{} + rt.persistStateLocked() + rt.mu.Unlock() + for _, worker := range workers { + worker.stop() + } +} + +func (rt *blackboxRuntime) discoverMarkedTargets() ([]discoveredBlackboxTarget, error) { + targets, err := rt.system.ListRemovableTargets() + if err != nil { + return nil, err + } + var out []discoveredBlackboxTarget + for _, rawTarget := range targets { + target := sanitizeRemovableTarget(rawTarget) + if target.Device == "" { + continue + } + mountpoint, mountedByBee, err := ensureMountedTarget(target, "probe") + if err != nil { + continue + } + marker, ok, err := readBlackboxMarker(mountpoint) + if mountedByBee && !ok { + _ = unmountTarget(mountpoint) + } + if err != nil || !ok || marker.EnrollmentID == "" { + continue + } + if mountedByBee { + _ = unmountTarget(mountpoint) + } + out = append(out, discoveredBlackboxTarget{ + marker: marker, + target: target, + seenMount: mountpoint, + mountedByBee: mountedByBee, + }) + } + sort.Slice(out, func(i, j int) bool { + return out[i].marker.EnrollmentID < out[j].marker.EnrollmentID + }) + return out, nil +} + +func newBlackboxWorker(rt *blackboxRuntime, found discoveredBlackboxTarget) *blackboxWorker { + return &blackboxWorker{ + runtime: rt, + enrollmentID: found.marker.EnrollmentID, + target: found.target, + marker: found.marker, + flushPeriod: blackboxMinFlushPeriod, + status: "running", + stopCh: make(chan struct{}), + stoppedCh: make(chan struct{}), + } +} + +func (w *blackboxWorker) run() { + defer close(w.stoppedCh) + for { + start := time.Now() + err := w.syncCycle() + duration := time.Since(start) + w.finishCycle(duration, err) + + wait := w.currentFlushPeriod() + timer := time.NewTimer(wait) + select { + case <-w.stopCh: + timer.Stop() + w.cleanup() + return + case <-timer.C: + } + } +} + +func (w *blackboxWorker) update(found discoveredBlackboxTarget) { + w.mu.Lock() + defer w.mu.Unlock() + w.target = found.target + w.marker = found.marker +} + +func (w *blackboxWorker) stop() { + select { + case <-w.stopCh: + default: + close(w.stopCh) + } + <-w.stoppedCh +} + +func (w *blackboxWorker) currentFlushPeriod() time.Duration { + w.mu.Lock() + defer w.mu.Unlock() + return w.flushPeriod +} + +func (w *blackboxWorker) finishCycle(duration time.Duration, err error) { + w.mu.Lock() + defer w.mu.Unlock() + w.lastDuration = duration + if err != nil { + w.status = "degraded" + w.lastError = err.Error() + w.fastCycles = 0 + w.flushPeriod = adjustFlushPeriod(w.flushPeriod, duration, false, 0) + } else { + w.status = "running" + w.lastSyncAt = blackboxNow() + w.lastError = "" + if duration <= w.flushPeriod/2 { + w.fastCycles++ + } else { + w.fastCycles = 0 + } + w.flushPeriod = adjustFlushPeriod(w.flushPeriod, duration, true, w.fastCycles) + } + w.runtime.persistState() +} + +func adjustFlushPeriod(current, duration time.Duration, success bool, fastCycles int) time.Duration { + if current <= 0 { + current = blackboxMinFlushPeriod + } + if duration <= 0 { + duration = current + } + next := current + if duration > current { + growA := time.Duration(float64(current) * 1.25) + growB := time.Duration(float64(duration) * 1.25) + if growB > growA { + next = growB + } else { + next = growA + } + } + if success && fastCycles >= blackboxRecoveryFastCount { + next = time.Duration(float64(current) * 0.9) + } + if next < blackboxMinFlushPeriod { + next = blackboxMinFlushPeriod + } + if next > blackboxMaxFlushPeriod { + next = blackboxMaxFlushPeriod + } + return next +} + +func (w *blackboxWorker) syncCycle() error { + target, marker := w.snapshotTarget() + mountpoint, mountedByBee, err := ensureMountedTarget(target, marker.EnrollmentID) + if err != nil { + return err + } + w.recordMountpoint(mountpoint, mountedByBee) + + root := filepath.Join(mountpoint, w.runtime.bootFolder) + if err := os.MkdirAll(filepath.Join(root, "export"), 0755); err != nil { + return err + } + if err := syncDirectoryTree(w.runtime.exportDir, filepath.Join(root, "export")); err != nil { + return err + } + if err := w.captureSnapshots(root); err != nil { + return err + } + return syncFilesystem(root) +} + +func (w *blackboxWorker) cleanup() { + w.mu.Lock() + mountpoint := w.mountpoint + mountedByBee := w.mountedByBee + w.mu.Unlock() + if mountedByBee && mountpoint != "" { + _ = unmountTarget(mountpoint) + } +} + +func (w *blackboxWorker) snapshotTarget() (platform.RemovableTarget, BlackboxMarker) { + w.mu.Lock() + defer w.mu.Unlock() + return w.target, w.marker +} + +func (w *blackboxWorker) recordMountpoint(mountpoint string, mountedByBee bool) { + w.mu.Lock() + defer w.mu.Unlock() + w.mountpoint = mountpoint + w.mountedByBee = mountedByBee +} + +func (w *blackboxWorker) captureSnapshots(root string) error { + if err := captureCommandAtomic(filepath.Join(root, "systemd", "combined.journal.log"), "journalctl", "--no-pager", "--since", w.runtime.bootStarted.Format(time.RFC3339)); err != nil { + return err + } + for _, svc := range supportBundleServices { + if err := captureCommandAtomic(filepath.Join(root, "systemd", svc+".journal.log"), "journalctl", "--no-pager", "-u", svc, "--since", w.runtime.bootStarted.Format(time.RFC3339)); err != nil { + return err + } + if err := captureCommandAtomic(filepath.Join(root, "systemd", svc+".status.txt"), "systemctl", "status", svc, "--no-pager"); err != nil { + return err + } + } + if err := captureCommandAtomic(filepath.Join(root, "system", "dmesg.txt"), "dmesg"); err != nil { + return err + } + for _, item := range supportBundleOptionalFiles { + if err := copyFileIfChanged(item.src, filepath.Join(root, item.name)); err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + } + return nil +} + +func (rt *blackboxRuntime) persistState() { + rt.mu.Lock() + defer rt.mu.Unlock() + rt.persistStateLocked() +} + +func (rt *blackboxRuntime) persistStateLocked() { + state := BlackboxState{ + Status: "disabled", + BootStartedAtUTC: rt.bootStarted.Format(time.RFC3339), + BootFolder: rt.bootFolder, + UpdatedAtUTC: blackboxNow().Format(time.RFC3339), + Targets: make([]BlackboxTargetStatus, 0, len(rt.workers)), + } + if len(rt.workers) > 0 { + state.Status = "running" + } + for _, worker := range rt.workers { + worker.mu.Lock() + targetState := BlackboxTargetStatus{ + EnrollmentID: worker.enrollmentID, + Device: worker.target.Device, + FS: worker.target, + BootFolder: rt.bootFolder, + Status: worker.status, + FlushPeriod: worker.flushPeriod.String(), + LastError: worker.lastError, + Mountpoint: worker.mountpoint, + } + if !worker.lastSyncAt.IsZero() { + targetState.LastSyncAtUTC = worker.lastSyncAt.Format(time.RFC3339) + } + if worker.lastDuration > 0 { + targetState.LastCycleDuration = worker.lastDuration.String() + } + if worker.status == "degraded" { + state.Status = "degraded" + } + worker.mu.Unlock() + state.Targets = append(state.Targets, targetState) + } + sort.Slice(state.Targets, func(i, j int) bool { + return state.Targets[i].EnrollmentID < state.Targets[j].EnrollmentID + }) + _ = writeJSONAtomic(rt.statePath, state) +} + +func bootStartedAtUTC() (time.Time, error) { + raw, err := os.ReadFile("/proc/stat") + if err != nil { + return time.Time{}, err + } + for _, line := range strings.Split(string(raw), "\n") { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "btime ") { + continue + } + parts := strings.Fields(line) + if len(parts) != 2 { + break + } + sec, err := time.ParseDuration(parts[1] + "s") + if err != nil { + break + } + return time.Unix(int64(sec/time.Second), 0).UTC(), nil + } + return time.Time{}, fmt.Errorf("boot time not found") +} + +func newBlackboxEnrollmentID() string { + var buf [8]byte + if _, err := rand.Read(buf[:]); err != nil { + return fmt.Sprintf("bb-%d", time.Now().UnixNano()) + } + return "bb-" + hex.EncodeToString(buf[:]) +} + +func sanitizeRemovableTarget(target platform.RemovableTarget) platform.RemovableTarget { + target.Device = strings.TrimSpace(target.Device) + target.FSType = strings.TrimSpace(target.FSType) + target.Size = strings.TrimSpace(target.Size) + target.Label = strings.TrimSpace(target.Label) + target.Model = strings.TrimSpace(target.Model) + target.Mountpoint = strings.TrimSpace(target.Mountpoint) + return target +} + +func ensureMountedTarget(target platform.RemovableTarget, suffix string) (mountpoint string, mountedByBee bool, retErr error) { + target = sanitizeRemovableTarget(target) + if target.Mountpoint != "" { + if err := ensureWritableBlackboxMountpoint(target.Mountpoint); err == nil { + return target.Mountpoint, false, nil + } + } + mountpoint = filepath.Join("/tmp", "bee-blackbox-"+sanitizeFilename(suffix)) + if err := os.MkdirAll(mountpoint, 0755); err != nil { + return "", false, err + } + if raw, err := blackboxExecCommand("mount", target.Device, mountpoint).CombinedOutput(); err != nil { + return "", false, formatBlackboxMountTargetError(target, string(raw), err) + } + if err := ensureWritableBlackboxMountpoint(mountpoint); err != nil { + _ = unmountTarget(mountpoint) + return "", false, err + } + return mountpoint, true, nil +} + +func unmountTarget(mountpoint string) error { + _ = blackboxExecCommand("sync").Run() + raw, err := blackboxExecCommand("umount", mountpoint).CombinedOutput() + if err != nil { + msg := strings.TrimSpace(string(raw)) + if msg == "" { + return err + } + return fmt.Errorf("%s: %w", msg, err) + } + return nil +} + +func readBlackboxMarker(mountpoint string) (BlackboxMarker, bool, error) { + raw, err := os.ReadFile(filepath.Join(mountpoint, blackboxMarkerName)) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return BlackboxMarker{}, false, os.ErrNotExist + } + return BlackboxMarker{}, false, err + } + var marker BlackboxMarker + if err := json.Unmarshal(raw, &marker); err != nil { + return BlackboxMarker{}, false, err + } + return marker, true, nil +} + +func writeBlackboxMarker(mountpoint string, marker BlackboxMarker) error { + if marker.Version == 0 { + marker.Version = 1 + } + return writeJSONAtomic(filepath.Join(mountpoint, blackboxMarkerName), marker) +} + +func syncDirectoryTree(srcDir, dstDir string) error { + seen := make(map[string]struct{}) + err := filepath.WalkDir(srcDir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + rel, err := filepath.Rel(srcDir, path) + if err != nil { + return err + } + rel = filepath.Clean(rel) + if rel == "." { + seen["."] = struct{}{} + return os.MkdirAll(dstDir, 0755) + } + seen[rel] = struct{}{} + dstPath := filepath.Join(dstDir, rel) + if d.IsDir() { + info, err := d.Info() + if err != nil { + return err + } + return os.MkdirAll(dstPath, info.Mode().Perm()) + } + return copyFileIfChanged(path, dstPath) + }) + if err != nil { + return err + } + return removeMissingPaths(dstDir, seen) +} + +func removeMissingPaths(dstDir string, seen map[string]struct{}) error { + return filepath.WalkDir(dstDir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + rel, err := filepath.Rel(dstDir, path) + if err != nil { + return err + } + rel = filepath.Clean(rel) + if rel == "." { + return nil + } + if _, ok := seen[rel]; ok { + return nil + } + return os.RemoveAll(path) + }) +} + +func copyFileIfChanged(src, dst string) error { + info, err := os.Stat(src) + if err != nil { + return err + } + if info.IsDir() { + return os.MkdirAll(dst, info.Mode().Perm()) + } + srcData, err := os.ReadFile(src) + if err != nil { + return err + } + if dstData, err := os.ReadFile(dst); err == nil && bytes.Equal(dstData, srcData) { + return nil + } + return writeFileAtomic(dst, srcData, info.Mode().Perm()) +} + +func captureCommandAtomic(dst string, name string, args ...string) error { + raw, err := blackboxExecCommand(name, args...).CombinedOutput() + if len(raw) == 0 { + if err != nil { + raw = []byte(err.Error() + "\n") + } else { + raw = []byte("no output\n") + } + } + return writeFileAtomic(dst, raw, 0644) +} + +func writeJSONAtomic(path string, v any) error { + raw, err := json.MarshalIndent(v, "", " ") + if err != nil { + return err + } + raw = append(raw, '\n') + return writeFileAtomic(path, raw, 0644) +} + +func writeFileAtomic(path string, data []byte, perm os.FileMode) error { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + if existing, err := os.ReadFile(path); err == nil && bytes.Equal(existing, data) { + return nil + } + tmp := path + ".tmp" + f, err := os.OpenFile(tmp, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, perm) + if err != nil { + return err + } + if _, err := f.Write(data); err != nil { + _ = f.Close() + return err + } + if err := f.Sync(); err != nil { + _ = f.Close() + return err + } + if err := f.Close(); err != nil { + return err + } + if err := os.Rename(tmp, path); err != nil { + return err + } + return syncFilesystem(filepath.Dir(path)) +} + +func syncFilesystem(path string) error { + return blackboxExecCommand("sync").Run() +} + +func ensureWritableBlackboxMountpoint(mountpoint string) error { + probe, err := os.CreateTemp(mountpoint, ".bee-blackbox-write-test-*") + if err != nil { + return fmt.Errorf("target filesystem is not writable: %w", err) + } + name := probe.Name() + if closeErr := probe.Close(); closeErr != nil { + _ = os.Remove(name) + return closeErr + } + if err := os.Remove(name); err != nil { + return err + } + return nil +} + +func formatBlackboxMountTargetError(target platform.RemovableTarget, raw string, err error) error { + msg := strings.TrimSpace(raw) + fstype := strings.ToLower(strings.TrimSpace(target.FSType)) + if fstype == "exfat" && strings.Contains(strings.ToLower(msg), "unknown filesystem type 'exfat'") { + return fmt.Errorf("mount %s: exFAT support is missing in this ISO build: %w", target.Device, err) + } + if msg == "" { + return err + } + return fmt.Errorf("%s: %w", msg, err) +} diff --git a/audit/internal/app/blackbox_test.go b/audit/internal/app/blackbox_test.go new file mode 100644 index 0000000..e7c2b8e --- /dev/null +++ b/audit/internal/app/blackbox_test.go @@ -0,0 +1,52 @@ +package app + +import ( + "path/filepath" + "testing" + "time" +) + +func TestAdjustFlushPeriodGrowsOnSlowCycle(t *testing.T) { + current := 2 * time.Second + got := adjustFlushPeriod(current, 4*time.Second, false, 0) + if got <= current { + t.Fatalf("adjustFlushPeriod=%s want > %s", got, current) + } +} + +func TestAdjustFlushPeriodShrinksAfterFastCycles(t *testing.T) { + current := 10 * time.Second + got := adjustFlushPeriod(current, 2*time.Second, true, blackboxRecoveryFastCount) + if got >= current { + t.Fatalf("adjustFlushPeriod=%s want < %s", got, current) + } + if got < blackboxMinFlushPeriod { + t.Fatalf("adjustFlushPeriod=%s below min %s", got, blackboxMinFlushPeriod) + } +} + +func TestReadBlackboxState(t *testing.T) { + path := filepath.Join(t.TempDir(), "blackbox-state.json") + want := BlackboxState{ + Status: "running", + BootStartedAtUTC: "2026-04-24T00:00:00Z", + BootFolder: "boot-folder", + UpdatedAtUTC: "2026-04-24T00:00:01Z", + Targets: []BlackboxTargetStatus{{ + EnrollmentID: "bb-1", + Device: "/dev/sdb1", + Status: "running", + FlushPeriod: "1s", + }}, + } + if err := writeJSONAtomic(path, want); err != nil { + t.Fatalf("writeJSONAtomic: %v", err) + } + got, err := ReadBlackboxState(path) + if err != nil { + t.Fatalf("ReadBlackboxState: %v", err) + } + if got.Status != want.Status || got.BootFolder != want.BootFolder || len(got.Targets) != 1 || got.Targets[0].EnrollmentID != "bb-1" { + t.Fatalf("state=%+v", got) + } +} diff --git a/audit/internal/app/support_bundle.go b/audit/internal/app/support_bundle.go index d9f5158..fc95334 100644 --- a/audit/internal/app/support_bundle.go +++ b/audit/internal/app/support_bundle.go @@ -15,6 +15,7 @@ import ( ) var supportBundleServices = []string{ + "bee-blackbox.service", "bee-audit.service", "bee-web.service", "bee-network.service", @@ -256,11 +257,6 @@ func BuildSupportBundle(exportDir string) (string, error) { } now := time.Now().UTC() - date := now.Format("2006-01-02") - tod := now.Format("150405") - ver := bundleVersion() - model := serverModelForBundle() - sn := serverSerialForBundle() stageRoot := filepath.Join(os.TempDir(), fmt.Sprintf("bee-support-stage-%s-%s", sanitizeFilename(hostnameOr("unknown")), now.Format("20060102-150405"))) if err := os.MkdirAll(stageRoot, 0755); err != nil { @@ -294,7 +290,7 @@ func BuildSupportBundle(exportDir string) (string, error) { return "", err } - archiveName := fmt.Sprintf("%s (BEE-SP v%s) %s %s %s.tar.gz", date, ver, model, sn, tod) + archiveName := SupportBundleBaseName(now) + ".tar.gz" archivePath := filepath.Join(os.TempDir(), archiveName) if err := createSupportTarGz(archivePath, stageRoot); err != nil { return "", err @@ -302,6 +298,16 @@ func BuildSupportBundle(exportDir string) (string, error) { return archivePath, nil } +func SupportBundleBaseName(at time.Time) string { + at = at.UTC() + date := at.Format("2006-01-02") + tod := at.Format("150405") + ver := bundleVersion() + model := serverModelForBundle() + sn := serverSerialForBundle() + return fmt.Sprintf("%s (BEE-SP v%s) %s %s %s", date, ver, model, sn, tod) +} + func LatestSupportBundlePath() (string, error) { return latestSupportBundlePath(os.TempDir()) } diff --git a/audit/internal/webui/api.go b/audit/internal/webui/api.go index 036c6e7..cd1d367 100644 --- a/audit/internal/webui/api.go +++ b/audit/internal/webui/api.go @@ -1038,6 +1038,81 @@ func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Reques writeJSON(w, map[string]string{"status": "ok", "message": result.Body}) } +func (h *handler) handleAPIBlackboxStatus(w http.ResponseWriter, _ *http.Request) { + state, err := app.ReadBlackboxState(filepath.Join(h.opts.ExportDir, "blackbox-state.json")) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + writeJSON(w, app.BlackboxState{Status: "disabled", Targets: []app.BlackboxTargetStatus{}}) + return + } + writeError(w, http.StatusInternalServerError, err.Error()) + return + } + if state.Targets == nil { + state.Targets = []app.BlackboxTargetStatus{} + } + writeJSON(w, state) +} + +func (h *handler) handleAPIBlackboxEnable(w http.ResponseWriter, r *http.Request) { + if h.opts.App == nil { + writeError(w, http.StatusServiceUnavailable, "app not configured") + return + } + var target platform.RemovableTarget + if err := json.NewDecoder(r.Body).Decode(&target); err != nil || strings.TrimSpace(target.Device) == "" { + writeError(w, http.StatusBadRequest, "device is required") + return + } + targets, err := h.opts.App.ListRemovableTargets() + if err != nil { + writeError(w, http.StatusInternalServerError, err.Error()) + return + } + allowed := false + for _, candidate := range targets { + if candidate.Device == target.Device { + target = candidate + allowed = true + break + } + } + if !allowed { + writeError(w, http.StatusBadRequest, "device not in removable target list") + return + } + marker, err := app.EnableBlackboxTarget(target) + if err != nil { + writeError(w, http.StatusInternalServerError, err.Error()) + return + } + writeJSON(w, map[string]any{ + "status": "ok", + "message": "Black-box marker written.", + "enrollment_id": marker.EnrollmentID, + }) +} + +func (h *handler) handleAPIBlackboxDisable(w http.ResponseWriter, r *http.Request) { + var req struct { + Device string `json:"device"` + EnrollmentID string `json:"enrollment_id"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "invalid request body") + return + } + if err := app.DisableBlackboxTarget(req.Device, req.EnrollmentID); err != nil { + if errors.Is(err, os.ErrNotExist) { + writeError(w, http.StatusNotFound, "black-box target not found") + return + } + writeError(w, http.StatusInternalServerError, err.Error()) + return + } + writeJSON(w, map[string]string{"status": "ok", "message": "Black-box marker removed."}) +} + // ── GPU presence ────────────────────────────────────────────────────────────── func (h *handler) handleAPIGNVIDIAGPUs(w http.ResponseWriter, _ *http.Request) { diff --git a/audit/internal/webui/api_test.go b/audit/internal/webui/api_test.go index d132ad7..f43cea4 100644 --- a/audit/internal/webui/api_test.go +++ b/audit/internal/webui/api_test.go @@ -3,6 +3,8 @@ package webui import ( "encoding/json" "net/http/httptest" + "os" + "path/filepath" "strings" "testing" @@ -44,6 +46,45 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) { } } +func TestHandleAPIBlackboxStatusReturnsDisabledWhenStateMissing(t *testing.T) { + h := &handler{opts: HandlerOptions{ExportDir: t.TempDir()}} + rec := httptest.NewRecorder() + req := httptest.NewRequest("GET", "/api/blackbox/status", nil) + + h.handleAPIBlackboxStatus(rec, req) + + if rec.Code != 200 { + t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String()) + } + var state app.BlackboxState + if err := json.Unmarshal(rec.Body.Bytes(), &state); err != nil { + t.Fatalf("decode state: %v", err) + } + if state.Status != "disabled" { + t.Fatalf("status=%q want disabled", state.Status) + } +} + +func TestHandleAPIBlackboxStatusReturnsPersistedState(t *testing.T) { + exportDir := t.TempDir() + statePath := filepath.Join(exportDir, "blackbox-state.json") + if err := os.WriteFile(statePath, []byte(`{"status":"running","boot_folder":"boot-folder","targets":[{"enrollment_id":"bb-1","device":"/dev/sdb1","status":"running","flush_period":"1s"}]}`), 0644); err != nil { + t.Fatalf("write state: %v", err) + } + h := &handler{opts: HandlerOptions{ExportDir: exportDir}} + rec := httptest.NewRecorder() + req := httptest.NewRequest("GET", "/api/blackbox/status", nil) + + h.handleAPIBlackboxStatus(rec, req) + + if rec.Code != 200 { + t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String()) + } + if !strings.Contains(rec.Body.String(), `"boot_folder":"boot-folder"`) { + t.Fatalf("body=%s", rec.Body.String()) + } +} + func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) { globalQueue.mu.Lock() originalTasks := globalQueue.tasks diff --git a/audit/internal/webui/page_export_tools.go b/audit/internal/webui/page_export_tools.go index f79ac53..910ead6 100644 --- a/audit/internal/webui/page_export_tools.go +++ b/audit/internal/webui/page_export_tools.go @@ -102,47 +102,69 @@ window.supportBundleDownload = function() { func renderUSBExportCard() string { return `
-
Export to USB - +
USB Black-Box +
` + renderUSBExportInline() + `
` } func renderUSBExportInline() string { - return `

Write audit JSON or support bundle directly to a removable USB drive.

+ return `

Marks removable USB devices as black-box targets. The dedicated bee-blackbox service mirrors export files and system logs into a boot-scoped folder and resumes automatically after restart.

Scanning for USB devices...
+
Loading black-box status...
` } @@ -382,7 +458,7 @@ function installToRAM() {

Downloads a tar.gz archive of all audit files, SAT results, and logs.

` + renderSupportBundleInline() + `
-
Export to USB
+
USB Black-Box
` + renderUSBExportInline() + `
diff --git a/audit/internal/webui/server.go b/audit/internal/webui/server.go index 9b523d8..a7ba4c7 100644 --- a/audit/internal/webui/server.go +++ b/audit/internal/webui/server.go @@ -301,8 +301,9 @@ func NewHandler(opts HandlerOptions) http.Handler { // Export mux.HandleFunc("GET /api/export/list", h.handleAPIExportList) mux.HandleFunc("GET /api/export/usb", h.handleAPIExportUSBTargets) - mux.HandleFunc("POST /api/export/usb/audit", h.handleAPIExportUSBAudit) - mux.HandleFunc("POST /api/export/usb/bundle", h.handleAPIExportUSBBundle) + mux.HandleFunc("GET /api/blackbox/status", h.handleAPIBlackboxStatus) + mux.HandleFunc("POST /api/blackbox/enable", h.handleAPIBlackboxEnable) + mux.HandleFunc("POST /api/blackbox/disable", h.handleAPIBlackboxDisable) // Tools mux.HandleFunc("GET /api/tools/check", h.handleAPIToolsCheck) diff --git a/audit/internal/webui/server_test.go b/audit/internal/webui/server_test.go index 80c6524..37ee239 100644 --- a/audit/internal/webui/server_test.go +++ b/audit/internal/webui/server_test.go @@ -671,11 +671,11 @@ func TestToolsPageRendersNvidiaSelfHealSection(t *testing.T) { if !strings.Contains(body, `id="boot-source-text"`) { t.Fatalf("tools page missing boot source field: %s", body) } - if !strings.Contains(body, `Export to USB`) { - t.Fatalf("tools page missing export to usb section: %s", body) + if !strings.Contains(body, `USB Black-Box`) { + t.Fatalf("tools page missing usb black-box section: %s", body) } - if !strings.Contains(body, `Support Bundle`) { - t.Fatalf("tools page missing support bundle usb button: %s", body) + if !strings.Contains(body, `/api/blackbox/status`) { + t.Fatalf("tools page missing black-box status api usage: %s", body) } } diff --git a/iso/overlay/etc/systemd/system/bee-audit.service b/iso/overlay/etc/systemd/system/bee-audit.service index ec19395..08e989b 100644 --- a/iso/overlay/etc/systemd/system/bee-audit.service +++ b/iso/overlay/etc/systemd/system/bee-audit.service @@ -1,6 +1,6 @@ [Unit] Description=Bee: hardware audit -After=bee-preflight.service bee-network.service bee-nvidia.service +After=bee-preflight.service bee-network.service bee-nvidia.service bee-blackbox.service [Service] Type=oneshot diff --git a/iso/overlay/etc/systemd/system/bee-blackbox.service b/iso/overlay/etc/systemd/system/bee-blackbox.service new file mode 100644 index 0000000..997fdaa --- /dev/null +++ b/iso/overlay/etc/systemd/system/bee-blackbox.service @@ -0,0 +1,18 @@ +[Unit] +Description=Bee: USB black-box log mirror +After=local-fs.target +Before=bee-network.service bee-nvidia.service bee-preflight.service bee-audit.service bee-web.service +StartLimitIntervalSec=0 + +[Service] +Type=simple +ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-blackbox.log /usr/local/bin/bee blackbox --export-dir /appdata/bee/export --state-file /appdata/bee/export/blackbox-state.json +Restart=always +RestartSec=1 +StandardOutput=journal +StandardError=journal +OOMScoreAdjust=-900 +Nice=0 + +[Install] +WantedBy=multi-user.target diff --git a/iso/overlay/etc/systemd/system/bee-network.service b/iso/overlay/etc/systemd/system/bee-network.service index 080ad3a..37aba2f 100644 --- a/iso/overlay/etc/systemd/system/bee-network.service +++ b/iso/overlay/etc/systemd/system/bee-network.service @@ -1,6 +1,6 @@ [Unit] Description=Bee: bring up network interfaces via DHCP -After=local-fs.target +After=local-fs.target bee-blackbox.service Before=network-online.target bee-audit.service [Service] diff --git a/iso/overlay/etc/systemd/system/bee-nvidia.service b/iso/overlay/etc/systemd/system/bee-nvidia.service index 1bddb2f..8d31ea1 100644 --- a/iso/overlay/etc/systemd/system/bee-nvidia.service +++ b/iso/overlay/etc/systemd/system/bee-nvidia.service @@ -1,6 +1,6 @@ [Unit] Description=Bee: load NVIDIA kernel modules and create device nodes -After=local-fs.target udev.service +After=local-fs.target udev.service bee-blackbox.service Before=bee-audit.service [Service] diff --git a/iso/overlay/etc/systemd/system/bee-preflight.service b/iso/overlay/etc/systemd/system/bee-preflight.service index b6c73da..66f4d58 100644 --- a/iso/overlay/etc/systemd/system/bee-preflight.service +++ b/iso/overlay/etc/systemd/system/bee-preflight.service @@ -1,6 +1,6 @@ [Unit] Description=Bee: runtime preflight self-check -After=bee-network.service bee-nvidia.service +After=bee-network.service bee-nvidia.service bee-blackbox.service Before=bee-audit.service [Service] diff --git a/iso/overlay/etc/systemd/system/bee-web.service b/iso/overlay/etc/systemd/system/bee-web.service index e9f8925..adcc2ff 100644 --- a/iso/overlay/etc/systemd/system/bee-web.service +++ b/iso/overlay/etc/systemd/system/bee-web.service @@ -1,5 +1,6 @@ [Unit] Description=Bee: hardware audit web viewer +After=bee-blackbox.service StartLimitIntervalSec=0 [Service]