Fix Runtime Health criteria: network, services, nvidia-fabricmanager
Network: green if at least one interface has IPv4 (drop PARTIAL state). Bee Services: treat inactive as OK — oneshot services (bee-sshsetup, bee-preflight, bee-network, bee-audit, etc.) complete successfully and exit to inactive; only failed is a real problem. nvidia-fabricmanager: add ExecCondition=bee-check-nvswitch drop-in so the service is silently skipped (inactive, not failed) on systems without NVSwitch hardware (e.g. H200 NVL with direct NVLink, no NVSwitch chips). bee-check-nvswitch detects NVSwitch via lspci (vendor 10de, class 0680). bee-nvidia.service: add ConditionPathExists=/usr/local/bin/bee-nvidia-load so the unit is a no-op if somehow present in a non-nvidia build. bee-boot-status: read /etc/bee-gpu-vendor and exclude bee-nvidia from CRITICAL/ALL on non-nvidia builds, preventing boot hang if the unit is unexpectedly present. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -55,7 +55,6 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
|
||||
if err == nil {
|
||||
health.Interfaces = make([]schema.RuntimeInterface, 0, len(interfaces))
|
||||
hasIPv4 := false
|
||||
missingIPv4 := false
|
||||
for _, iface := range interfaces {
|
||||
outcome := "no_offer"
|
||||
if len(iface.IPv4) > 0 {
|
||||
@@ -63,8 +62,6 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
|
||||
hasIPv4 = true
|
||||
} else if strings.EqualFold(iface.State, "DOWN") {
|
||||
outcome = "link_down"
|
||||
} else {
|
||||
missingIPv4 = true
|
||||
}
|
||||
health.Interfaces = append(health.Interfaces, schema.RuntimeInterface{
|
||||
Name: iface.Name,
|
||||
@@ -73,17 +70,9 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
|
||||
Outcome: outcome,
|
||||
})
|
||||
}
|
||||
switch {
|
||||
case hasIPv4 && !missingIPv4:
|
||||
if hasIPv4 {
|
||||
health.NetworkStatus = "OK"
|
||||
case hasIPv4:
|
||||
health.NetworkStatus = "PARTIAL"
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "dhcp_partial",
|
||||
Severity: "warning",
|
||||
Description: "At least one interface did not obtain IPv4 connectivity.",
|
||||
})
|
||||
default:
|
||||
} else {
|
||||
health.NetworkStatus = "FAILED"
|
||||
health.Issues = append(health.Issues, schema.RuntimeIssue{
|
||||
Code: "dhcp_failed",
|
||||
|
||||
Reference in New Issue
Block a user