diff --git a/audit/internal/platform/runtime.go b/audit/internal/platform/runtime.go index 659f603..c9fc4f8 100644 --- a/audit/internal/platform/runtime.go +++ b/audit/internal/platform/runtime.go @@ -173,6 +173,22 @@ func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHe switch vendor { case "nvidia": + if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil { + health.NvidiaGSPMode = strings.TrimSpace(string(raw)) + if health.NvidiaGSPMode == "gsp-stuck" { + health.Issues = append(health.Issues, schema.RuntimeIssue{ + Code: "nvidia_gsp_stuck", + Severity: "critical", + Description: "NVIDIA GSP firmware init timed out and the kernel module is stuck. Reboot and select 'GSP=off' in the boot menu.", + }) + } else if health.NvidiaGSPMode == "gsp-off" { + health.Issues = append(health.Issues, schema.RuntimeIssue{ + Code: "nvidia_gsp_disabled", + Severity: "warning", + Description: "NVIDIA GSP firmware disabled (fallback). Power management runs via CPU path — power draw readings may differ from reference hardware.", + }) + } + } health.DriverReady = strings.Contains(lsmodText, "nvidia ") if !health.DriverReady { health.Issues = append(health.Issues, schema.RuntimeIssue{ diff --git a/audit/internal/schema/hardware.go b/audit/internal/schema/hardware.go index 0ced83a..f644f3f 100644 --- a/audit/internal/schema/hardware.go +++ b/audit/internal/schema/hardware.go @@ -20,6 +20,7 @@ type RuntimeHealth struct { ExportDir string `json:"export_dir,omitempty"` DriverReady bool `json:"driver_ready,omitempty"` CUDAReady bool `json:"cuda_ready,omitempty"` + NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck" NetworkStatus string `json:"network_status,omitempty"` Issues []RuntimeIssue `json:"issues,omitempty"` Tools []RuntimeToolStatus `json:"tools,omitempty"` diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go index 3080fc6..18ec4d3 100644 --- a/audit/internal/webui/pages.go +++ b/audit/internal/webui/pages.go @@ -33,6 +33,9 @@ a{color:var(--accent);text-decoration:none} .sidebar-logo{padding:18px 16px 12px;font-size:18px;font-weight:700;color:#fff;letter-spacing:-.5px} .sidebar-logo span{color:rgba(255,255,255,.5);font-weight:400;font-size:12px;display:block;margin-top:2px} .sidebar-version{padding:0 16px 14px;font-size:11px;color:rgba(255,255,255,.45)} +.sidebar-badge{margin:0 12px 12px;padding:5px 8px;border-radius:4px;font-size:11px;font-weight:600;text-align:center} +.sidebar-badge-warn{background:#7a4f00;color:#f6c90e} +.sidebar-badge-crit{background:#5c1a1a;color:#ff6b6b} .nav{flex:1} .nav-item{display:block;padding:10px 16px;color:rgba(255,255,255,.7);font-size:13px;border-left:3px solid transparent;transition:all .15s} .nav-item:hover{color:#fff;background:rgba(255,255,255,.08)} @@ -107,6 +110,15 @@ func layoutNav(active string, buildLabel string) string { buildLabel = "dev" } b.WriteString(``) + if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil { + gspMode := strings.TrimSpace(string(raw)) + switch gspMode { + case "gsp-off": + b.WriteString(``) + case "gsp-stuck": + b.WriteString(``) + } + } b.WriteString(`