package main import ( "context" "errors" "flag" "fmt" "io" "log/slog" "os" "runtime/debug" "strconv" "strings" "bee/audit/internal/app" "bee/audit/internal/platform" "bee/audit/internal/runtimeenv" "bee/audit/internal/webui" ) var Version = "dev" func buildLabel() string { label := strings.TrimSpace(Version) if label == "" { return "dev" } return label } func main() { os.Exit(run(os.Args[1:], os.Stdout, os.Stderr)) } func run(args []string, stdout, stderr io.Writer) (exitCode int) { slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ Level: slog.LevelInfo, }))) defer func() { if rec := recover(); rec != nil { slog.Error("fatal panic", "panic", fmt.Sprint(rec), "stack", string(debug.Stack()), ) exitCode = 1 } }() if len(args) == 0 { printRootUsage(stderr) return 2 } switch args[0] { case "help", "--help", "-h": if len(args) > 1 { return runHelp(args[1:], stdout, stderr) } printRootUsage(stdout) return 0 case "audit": return runAudit(args[1:], stdout, stderr) case "export": return runExport(args[1:], stdout, stderr) case "preflight": return runPreflight(args[1:], stdout, stderr) case "install-to-ram": return runInstallToRAM(args[1:], stdout, stderr) case "support-bundle": return runSupportBundle(args[1:], stdout, stderr) case "web": return runWeb(args[1:], stdout, stderr) case "blackbox": return runBlackbox(args[1:], stdout, stderr) case "sat": return runSAT(args[1:], stdout, stderr) case "benchmark": return runBenchmark(args[1:], stdout, stderr) case "bee-worker": return runBeeWorker(args[1:], stdout, stderr) case "version", "--version", "-version": fmt.Fprintln(stdout, Version) return 0 default: fmt.Fprintf(stderr, "bee: unknown command %q\n\n", args[0]) printRootUsage(stderr) return 2 } } func printRootUsage(w io.Writer) { fmt.Fprintln(w, `bee commands: bee audit --runtime auto|local|livecd --output stdout|file: bee preflight --output stdout|file: bee install-to-ram bee export --target bee support-bundle --output stdout|file: bee web --listen :80 [--audit-path `+app.DefaultAuditJSONPath+`] bee blackbox --export-dir `+app.DefaultExportDir+` [--state-file `+app.DefaultBlackboxStatePath+`] bee sat nvidia|memory|storage|cpu [--duration ] bee benchmark nvidia [--profile standard|stability|overnight] bee bee-worker --export-dir `+app.DefaultExportDir+` --task-id TASK-001 bee version bee help [command]`) } func runHelp(args []string, stdout, stderr io.Writer) int { switch args[0] { case "audit": return runAudit([]string{"--help"}, stdout, stdout) case "export": return runExport([]string{"--help"}, stdout, stdout) case "preflight": return runPreflight([]string{"--help"}, stdout, stdout) case "install-to-ram": return runInstallToRAM([]string{"--help"}, stdout, stdout) case "support-bundle": return runSupportBundle([]string{"--help"}, stdout, stdout) case "web": return runWeb([]string{"--help"}, stdout, stdout) case "blackbox": return runBlackbox([]string{"--help"}, stdout, stdout) case "sat": return runSAT([]string{"--help"}, stdout, stderr) case "benchmark": return runBenchmark([]string{"--help"}, stdout, stderr) case "bee-worker": return runBeeWorker([]string{"--help"}, stdout, stderr) case "version": fmt.Fprintln(stdout, "usage: bee version") return 0 default: fmt.Fprintf(stderr, "bee help: unknown command %q\n\n", args[0]) printRootUsage(stderr) return 2 } } func runAudit(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("audit", flag.ContinueOnError) fs.SetOutput(stderr) output := fs.String("output", "stdout", "output destination: stdout or file:") runtimeFlag := fs.String("runtime", "auto", "runtime environment: auto, local, livecd") showVersion := fs.Bool("version", false, "print version and exit") fs.Usage = func() { fmt.Fprintln(stderr, "usage: bee audit [--runtime auto|local|livecd] [--output stdout|file:]") fs.PrintDefaults() } if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fs.Usage() return 2 } if *showVersion { fmt.Fprintln(stdout, Version) return 0 } runtimeInfo, err := runtimeenv.Detect(*runtimeFlag) if err != nil { slog.Error("resolve runtime", "err", err) return 1 } slog.Info("runtime resolved", "mode", runtimeInfo.Mode, "reason", runtimeInfo.Reason) application := app.New(platform.New()) path, err := application.RunAudit(runtimeInfo.Mode, *output) if err != nil { slog.Error("run audit", "err", err) return 1 } if path != "stdout" { slog.Info("audit output written", "path", path) } return 0 } func runExport(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("export", flag.ContinueOnError) fs.SetOutput(stderr) targetDevice := fs.String("target", "", "removable device path, e.g. /dev/sdb1") fs.Usage = func() { fmt.Fprintln(stderr, "usage: bee export --target ") fs.PrintDefaults() } if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fs.Usage() return 2 } if strings.TrimSpace(*targetDevice) == "" { fmt.Fprintln(stderr, "bee export: --target is required") fs.Usage() return 2 } application := app.New(platform.New()) targets, err := application.ListRemovableTargets() if err != nil { slog.Error("list removable targets", "err", err) return 1 } for _, target := range targets { if target.Device == *targetDevice { path, err := application.ExportLatestAudit(target) if err != nil { slog.Error("export latest audit", "err", err) return 1 } slog.Info("audit exported", "path", path) return 0 } } slog.Error("target device not found among removable filesystems", "device", *targetDevice) return 1 } func runPreflight(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("preflight", flag.ContinueOnError) fs.SetOutput(stderr) output := fs.String("output", "stdout", "output destination: stdout or file:") fs.Usage = func() { fmt.Fprintf(stderr, "usage: bee preflight [--output stdout|file:%s]\n", app.DefaultRuntimeJSONPath) fs.PrintDefaults() } if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fs.Usage() return 2 } application := app.New(platform.New()) path, err := application.RunRuntimePreflight(*output) if err != nil { slog.Error("run preflight", "err", err) return 1 } if path != "stdout" { slog.Info("runtime health written", "path", path) } return 0 } func runInstallToRAM(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("install-to-ram", flag.ContinueOnError) fs.SetOutput(stderr) fs.Usage = func() { fmt.Fprintln(stderr, "usage: bee install-to-ram") } if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fs.Usage() return 2 } application := app.New(platform.New()) logLine := func(s string) { fmt.Fprintln(stdout, s) } if err := application.RunInstallToRAM(context.Background(), logLine); err != nil { slog.Error("run install-to-ram", "err", err) return 1 } return 0 } func runSupportBundle(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("support-bundle", flag.ContinueOnError) fs.SetOutput(stderr) output := fs.String("output", "stdout", "output destination: stdout or file:") fs.Usage = func() { fmt.Fprintln(stderr, "usage: bee support-bundle [--output stdout|file:]") fs.PrintDefaults() } if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fs.Usage() return 2 } path, err := app.BuildSupportBundle(app.DefaultExportDir) if err != nil { slog.Error("build support bundle", "err", err) return 1 } defer os.Remove(path) raw, err := os.ReadFile(path) if err != nil { slog.Error("read support bundle", "err", err) return 1 } switch { case *output == "stdout": if _, err := stdout.Write(raw); err != nil { slog.Error("write support bundle stdout", "err", err) return 1 } case strings.HasPrefix(*output, "file:"): dst := strings.TrimPrefix(*output, "file:") if err := os.WriteFile(dst, raw, 0644); err != nil { slog.Error("write support bundle", "err", err) return 1 } slog.Info("support bundle written", "path", dst) default: fmt.Fprintln(stderr, "bee support-bundle: unknown output destination") fs.Usage() return 2 } return 0 } func runWeb(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("web", flag.ContinueOnError) fs.SetOutput(stderr) listenAddr := fs.String("listen", ":8080", "listen address, e.g. :80") auditPath := fs.String("audit-path", "", "optional path to the latest audit JSON snapshot") exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with logs, SAT results, and support bundles") title := fs.String("title", "Bee Hardware Audit", "page title") fs.Usage = func() { fmt.Fprintf(stderr, "usage: bee web [--listen :80] [--audit-path %s] [--export-dir %s] [--title \"Bee Hardware Audit\"]\n", app.DefaultAuditJSONPath, app.DefaultExportDir) fs.PrintDefaults() } if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fs.Usage() return 2 } slog.Info("starting bee web", "listen", *listenAddr, "audit_path", *auditPath) runtimeInfo, err := runtimeenv.Detect("auto") if err != nil { slog.Warn("resolve runtime for web", "err", err) } if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{ Title: *title, BuildLabel: buildLabel(), AuditPath: *auditPath, ExportDir: *exportDir, App: app.New(platform.New()), RuntimeMode: runtimeInfo.Mode, }); err != nil { slog.Error("run web", "err", err) return 1 } return 0 } func runBlackbox(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("blackbox", flag.ContinueOnError) fs.SetOutput(stderr) exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with logs, SAT results, and support bundles") statePath := fs.String("state-file", app.DefaultBlackboxStatePath, "blackbox state file") fs.Usage = func() { fmt.Fprintf(stderr, "usage: bee blackbox [--export-dir %s] [--state-file %s]\n", app.DefaultExportDir, app.DefaultBlackboxStatePath) fs.PrintDefaults() } if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fs.Usage() return 2 } slog.Info("starting bee blackbox", "export_dir", *exportDir, "state_file", *statePath) if err := app.RunBlackbox(context.Background(), *exportDir, *statePath, platform.New()); err != nil && !errors.Is(err, context.Canceled) { slog.Error("run blackbox", "err", err) return 1 } return 0 } func runSAT(args []string, stdout, stderr io.Writer) int { if len(args) == 0 { fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration ]") return 2 } if args[0] == "help" || args[0] == "--help" || args[0] == "-h" { fmt.Fprintln(stdout, "usage: bee sat nvidia|memory|storage|cpu [--duration ]") return 0 } fs := flag.NewFlagSet("sat", flag.ContinueOnError) fs.SetOutput(stderr) duration := fs.Int("duration", 0, "stress-ng duration in seconds (cpu only; default: 60)") diagLevel := fs.Int("diag-level", 0, "DCGM diagnostic level for nvidia (1=quick, 2=medium, 3=targeted stress, 4=extended stress; default: 1)") if err := fs.Parse(args[1:]); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fmt.Fprintf(stderr, "bee sat: unexpected arguments\n") return 2 } target := args[0] if target != "nvidia" && target != "memory" && target != "storage" && target != "cpu" { fmt.Fprintf(stderr, "bee sat: unknown target %q\n", target) fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration ] [--diag-level <1-4>]") return 2 } application := app.New(platform.New()) var ( archive string err error ) logLine := func(s string) { fmt.Fprintln(os.Stderr, s) } switch target { case "nvidia": level := *diagLevel if level > 0 { _, err = application.RunNvidiaAcceptancePackWithOptions(context.Background(), "", level, nil, logLine) } else { archive, err = application.RunNvidiaAcceptancePack("", logLine) } case "memory": archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", 256, 1, logLine) case "storage": archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", false, logLine) case "cpu": dur := *duration if dur <= 0 { dur = 60 } archive, err = application.RunCPUAcceptancePackCtx(context.Background(), "", dur, logLine) } if err != nil { slog.Error("run sat", "target", target, "err", err) return 1 } slog.Info("sat archive written", "target", target, "path", archive) return 0 } func runBenchmark(args []string, stdout, stderr io.Writer) int { if len(args) == 0 { fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]") return 2 } if args[0] == "help" || args[0] == "--help" || args[0] == "-h" { fmt.Fprintln(stdout, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]") return 0 } target := args[0] if target != "nvidia" { fmt.Fprintf(stderr, "bee benchmark: unknown target %q\n", target) fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]") return 2 } fs := flag.NewFlagSet("benchmark", flag.ContinueOnError) fs.SetOutput(stderr) profile := fs.String("profile", platform.NvidiaBenchmarkProfileStandard, "benchmark profile: standard, stability, overnight") devices := fs.String("devices", "", "comma-separated GPU indices to include") exclude := fs.String("exclude", "", "comma-separated GPU indices to exclude") sizeMB := fs.Int("size-mb", 0, "per-GPU benchmark buffer size in MB (0 = auto)") skipNCCL := fs.Bool("skip-nccl", false, "skip multi-GPU NCCL interconnect benchmark") if err := fs.Parse(args[1:]); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fmt.Fprintf(stderr, "bee benchmark: unexpected arguments\n") return 2 } includeIndices, err := parseBenchmarkIndexCSV(*devices) if err != nil { fmt.Fprintf(stderr, "bee benchmark: invalid --devices: %v\n", err) return 2 } excludeIndices, err := parseBenchmarkIndexCSV(*exclude) if err != nil { fmt.Fprintf(stderr, "bee benchmark: invalid --exclude: %v\n", err) return 2 } application := app.New(platform.New()) logLine := func(s string) { fmt.Fprintln(os.Stderr, s) } archive, err := application.RunNvidiaBenchmark("", platform.NvidiaBenchmarkOptions{ Profile: *profile, SizeMB: *sizeMB, GPUIndices: includeIndices, ExcludeGPUIndices: excludeIndices, RunNCCL: !*skipNCCL, }, logLine) if err != nil { slog.Error("run benchmark", "target", target, "err", err) return 1 } slog.Info("benchmark archive written", "target", target, "path", archive) return 0 } func runBeeWorker(args []string, stdout, stderr io.Writer) int { fs := flag.NewFlagSet("bee-worker", flag.ContinueOnError) fs.SetOutput(stderr) exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with task state and artifacts") taskID := fs.String("task-id", "", "task identifier, e.g. TASK-001") fs.Usage = func() { fmt.Fprintf(stderr, "usage: bee bee-worker --export-dir %s --task-id TASK-001\n", app.DefaultExportDir) fs.PrintDefaults() } if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return 0 } return 2 } if fs.NArg() != 0 { fs.Usage() return 2 } return webui.RunPersistedTask(*exportDir, *taskID, stdout, stderr) } func parseBenchmarkIndexCSV(raw string) ([]int, error) { raw = strings.TrimSpace(raw) if raw == "" { return nil, nil } var indices []int for _, part := range strings.Split(raw, ",") { part = strings.TrimSpace(part) if part == "" { continue } value, err := strconv.Atoi(part) if err != nil || value < 0 { return nil, fmt.Errorf("bad gpu index %q", part) } indices = append(indices, value) } return indices, nil }