Handle NVIDIA GSP firmware init hang with timeout fallback
- bee-nvidia-load: run insmod in background, poll /proc/devices for nvidiactl; if GSP init doesn't complete in 90s, kill insmod and retry with NVreg_EnableGpuFirmware=0. Handles EBUSY case with clear error. - Write /run/bee-nvidia-mode (gsp-on/gsp-off/gsp-stuck) for audit layer - Show GSP mode badge in sidebar: yellow for gsp-off, red for gsp-stuck - Report NvidiaGSPMode in RuntimeHealth with issue entries - Simplify GRUB menu: default (KMS+GSP), advanced submenu (GSP=off, nomodeset, fail-safe), remove load-to-RAM entry - Add pcmanfm, ristretto, mupdf, mousepad to desktop packages Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -173,6 +173,22 @@ func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHe
|
||||
|
||||
switch vendor {
|
||||
case "nvidia":
|
||||
if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil {
|
||||
health.NvidiaGSPMode = strings.TrimSpace(string(raw))
|
||||
if health.NvidiaGSPMode == "gsp-stuck" {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "nvidia_gsp_stuck",
|
||||
Severity: "critical",
|
||||
Description: "NVIDIA GSP firmware init timed out and the kernel module is stuck. Reboot and select 'GSP=off' in the boot menu.",
|
||||
})
|
||||
} else if health.NvidiaGSPMode == "gsp-off" {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "nvidia_gsp_disabled",
|
||||
Severity: "warning",
|
||||
Description: "NVIDIA GSP firmware disabled (fallback). Power management runs via CPU path — power draw readings may differ from reference hardware.",
|
||||
})
|
||||
}
|
||||
}
|
||||
health.DriverReady = strings.Contains(lsmodText, "nvidia ")
|
||||
if !health.DriverReady {
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
|
||||
Reference in New Issue
Block a user