From 2320925433f7021df37423a1c6ae1287ef3eac2b Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Fri, 12 Jun 2026 03:42:19 +0300 Subject: [PATCH] Skip PCIe link-speed warnings for disabled devices Disabled PCIe devices (sysfs enable==0) carry no data traffic; their link state has no operational impact. Switchtec PCIe switch management endpoints on NVIDIA HGX H100 baseboards (and similar fabric controllers) train at reduced speed intentionally and were producing spurious warnings. Check is vendor-agnostic: reads enable attribute via existing helper, no vendor/device ID hardcoding. Documented in bible-local/decisions/2026-06-12-pcie-disabled-device-link-warning.md. Co-Authored-By: Claude Sonnet 4.6 --- audit/internal/collector/pcie.go | 10 +++++ ...06-12-pcie-disabled-device-link-warning.md | 41 +++++++++++++++++++ bible-local/decisions/README.md | 1 + 3 files changed, 52 insertions(+) create mode 100644 bible-local/decisions/2026-06-12-pcie-disabled-device-link-warning.md diff --git a/audit/internal/collector/pcie.go b/audit/internal/collector/pcie.go index b766cd9..d2384b5 100644 --- a/audit/internal/collector/pcie.go +++ b/audit/internal/collector/pcie.go @@ -278,6 +278,11 @@ func readPCIStringAttribute(bdf, attribute string) (string, bool) { // below the device maximum. Regular PCIe slots get Warning; NVLink bridge cards // get Critical because they are fixed internal connectors that must always train // to max speed — any downgrade signals a hardware fault. +// +// Disabled devices (sysfs enable==0) are skipped: they carry no data traffic and +// their link state has no operational impact. This covers management endpoints +// (e.g. PCIe switch fabric controllers on HGX baseboards) that the kernel never +// activates but that lspci still reports with link stats. func applyPCIeLinkSpeedWarning(dev *schema.HardwarePCIeDevice) { if dev.LinkSpeed == nil || dev.MaxLinkSpeed == nil { return @@ -285,6 +290,11 @@ func applyPCIeLinkSpeedWarning(dev *schema.HardwarePCIeDevice) { if pcieLinkSpeedRank(*dev.LinkSpeed) >= pcieLinkSpeedRank(*dev.MaxLinkSpeed) { return } + if dev.BDF != nil { + if enabled, ok := readPCIIntAttribute(*dev.BDF, "enable"); ok && enabled == 0 { + return + } + } desc := fmt.Sprintf("PCIe link speed degraded: running at %s, capable of %s", *dev.LinkSpeed, *dev.MaxLinkSpeed) dev.ErrorDescription = &desc diff --git a/bible-local/decisions/2026-06-12-pcie-disabled-device-link-warning.md b/bible-local/decisions/2026-06-12-pcie-disabled-device-link-warning.md new file mode 100644 index 0000000..72f5fb5 --- /dev/null +++ b/bible-local/decisions/2026-06-12-pcie-disabled-device-link-warning.md @@ -0,0 +1,41 @@ +# Decision: Skip PCIe link-speed warnings for disabled devices + +**Date:** 2026-06-12 +**Status:** active + +## Context + +On HGX H100 SXM5 baseboards, the Microchip Switchtec PM41028 PSX PCIe switch +(vendor 11F8, device 4128, NVIDIA subsystem 10DE:1643) appears in `lspci` as a +"Memory controller". Its upstream link trains at Gen3 x2 while the device is +capable of Gen4 x16. The device is permanently in a disabled state: memory access +and bus-mastering are both off (Mem-, BusMaster-); `/sys/bus/pci/devices//enable` +reads `0`. + +This chip is the PCIe fabric management endpoint for the NVSwitch interconnect — it +carries only management traffic at low bandwidth and is intentionally not activated +by any Linux driver. The bee audit was reporting a `statusWarning` with message +"PCIe link speed degraded" for this device, which is misleading because the device +is not in the data path. + +## Decision + +`applyPCIeLinkSpeedWarning` reads `/sys/bus/pci/devices//enable` via the +existing `readPCIIntAttribute` helper. If the value is `0` the function returns +early without setting any warning status. + +The check is vendor-agnostic: it applies to any PCIe device that Linux has not +activated, regardless of make or model. This is consistent with the +`no-hardcoded-vendors` contract — no vendor ID, device ID, or name string is +used as a condition. + +## Consequences + +- PCIe fabric management endpoints, IPMI virtual devices, and other permanently + disabled PCIe functions no longer produce spurious link-degradation warnings. +- Real link degradation on active devices (GPUs, NICs, NVMe, NVLink bridges) + continues to be detected and reported as before. +- NVLink bridge cards retain their existing `statusCritical` path (they are always + enabled, so the early return is never taken for them). +- The Switchtec device on HGX H100 boards shows `statusOK` with no + `error_description` in the audit JSON. diff --git a/bible-local/decisions/README.md b/bible-local/decisions/README.md index a9251e0..98a0c0a 100644 --- a/bible-local/decisions/README.md +++ b/bible-local/decisions/README.md @@ -7,3 +7,4 @@ One file per decision, named `YYYY-MM-DD-short-topic.md`. | 2026-03-05 | Use NVIDIA proprietary driver | active | | 2026-04-01 | Treat memtest as explicit ISO content | active | | 2026-04-29 | Treat embedded submodules as read-only | active | +| 2026-06-12 | Skip PCIe link-speed warnings for disabled devices | active |