- collector/pcie: add applyPCIeLinkSpeedWarning that sets status=Warning and ErrorDescription when current link speed is below maximum negotiated speed (e.g. Gen1 running on a Gen5 slot) - collector/pcie: add pcieLinkSpeedRank helper for Gen string comparison - collector/pcie_filter_test: cover degraded and healthy link speed cases - platform/techdump: collect lspci -vvv → lspci-vvv.txt for LnkCap/LnkSta Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
302 lines
7.3 KiB
Go
302 lines
7.3 KiB
Go
package collector
|
|
|
|
import (
|
|
"bee/audit/internal/schema"
|
|
"fmt"
|
|
"log/slog"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
func collectPCIe() []schema.HardwarePCIeDevice {
|
|
out, err := exec.Command("lspci", "-vmm", "-D").Output()
|
|
if err != nil {
|
|
slog.Warn("pcie: lspci failed", "err", err)
|
|
return nil
|
|
}
|
|
devs := parseLspci(string(out))
|
|
slog.Info("pcie: collected", "count", len(devs))
|
|
return devs
|
|
}
|
|
|
|
func parseLspci(output string) []schema.HardwarePCIeDevice {
|
|
// lspci -vmm -D outputs blank-line separated records, each field is "Key:\tValue"
|
|
var devs []schema.HardwarePCIeDevice
|
|
for _, block := range strings.Split(output, "\n\n") {
|
|
block = strings.TrimSpace(block)
|
|
if block == "" {
|
|
continue
|
|
}
|
|
fields := map[string]string{}
|
|
for _, line := range strings.Split(block, "\n") {
|
|
idx := strings.Index(line, ":\t")
|
|
if idx < 0 {
|
|
continue
|
|
}
|
|
key := strings.TrimSpace(line[:idx])
|
|
val := strings.TrimSpace(line[idx+2:])
|
|
fields[key] = val
|
|
}
|
|
if !shouldIncludePCIeDevice(fields["Class"], fields["Vendor"], fields["Device"]) {
|
|
continue
|
|
}
|
|
dev := parseLspciDevice(fields)
|
|
devs = append(devs, dev)
|
|
}
|
|
return devs
|
|
}
|
|
|
|
func shouldIncludePCIeDevice(class, vendor, device string) bool {
|
|
c := strings.ToLower(strings.TrimSpace(class))
|
|
v := strings.ToLower(strings.TrimSpace(vendor))
|
|
d := strings.ToLower(strings.TrimSpace(device))
|
|
if c == "" {
|
|
return true
|
|
}
|
|
|
|
// Keep inventory focused on useful replaceable components, not chipset/virtual noise.
|
|
excluded := []string{
|
|
"host bridge",
|
|
"isa bridge",
|
|
"pci bridge",
|
|
"co-processor",
|
|
"performance counter",
|
|
"performance counters",
|
|
"ram memory",
|
|
"system peripheral",
|
|
"communication controller",
|
|
"signal processing controller",
|
|
"usb controller",
|
|
"smbus",
|
|
"audio device",
|
|
"serial bus controller",
|
|
"unassigned class",
|
|
"non-essential instrumentation",
|
|
}
|
|
for _, bad := range excluded {
|
|
if strings.Contains(c, bad) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Exclude BMC/management virtual VGA adapters — these are firmware video chips,
|
|
// not real GPUs, and pollute the GPU inventory (e.g. iBMC, iDRAC, iLO VGA).
|
|
if strings.Contains(c, "vga") || strings.Contains(c, "display") || strings.Contains(c, "3d") {
|
|
bmcPatterns := []string{
|
|
"management system chip",
|
|
"management controller",
|
|
"ibmc",
|
|
"idrac",
|
|
"ilo vga",
|
|
"aspeed",
|
|
"matrox",
|
|
}
|
|
for _, bad := range bmcPatterns {
|
|
if strings.Contains(d, bad) {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
|
|
if strings.Contains(v, "advanced micro devices") || strings.Contains(v, "[amd]") {
|
|
internalAMDPatterns := []string{
|
|
"dummy function",
|
|
"reserved spp",
|
|
"ptdma",
|
|
"cryptographic coprocessor pspcpp",
|
|
"pspcpp",
|
|
}
|
|
for _, bad := range internalAMDPatterns {
|
|
if strings.Contains(d, bad) {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {
|
|
dev := schema.HardwarePCIeDevice{}
|
|
present := true
|
|
dev.Present = &present
|
|
status := statusOK
|
|
dev.Status = &status
|
|
|
|
// Slot is the BDF: "0000:00:02.0"
|
|
if bdf := fields["Slot"]; bdf != "" {
|
|
dev.Slot = &bdf
|
|
dev.BDF = &bdf
|
|
// parse vendor_id and device_id from sysfs
|
|
vendorID, deviceID := readPCIIDs(bdf)
|
|
if vendorID != 0 {
|
|
dev.VendorID = &vendorID
|
|
}
|
|
if deviceID != 0 {
|
|
dev.DeviceID = &deviceID
|
|
}
|
|
if numaNode, ok := readPCINumaNode(bdf); ok {
|
|
dev.NUMANode = &numaNode
|
|
} else if numaNode, ok := parsePCINumaNode(fields["NUMANode"]); ok {
|
|
dev.NUMANode = &numaNode
|
|
}
|
|
if width, ok := readPCIIntAttribute(bdf, "current_link_width"); ok {
|
|
dev.LinkWidth = &width
|
|
}
|
|
if width, ok := readPCIIntAttribute(bdf, "max_link_width"); ok {
|
|
dev.MaxLinkWidth = &width
|
|
}
|
|
if speed, ok := readPCIStringAttribute(bdf, "current_link_speed"); ok {
|
|
linkSpeed := normalizePCILinkSpeed(speed)
|
|
if linkSpeed != "" {
|
|
dev.LinkSpeed = &linkSpeed
|
|
}
|
|
}
|
|
if speed, ok := readPCIStringAttribute(bdf, "max_link_speed"); ok {
|
|
linkSpeed := normalizePCILinkSpeed(speed)
|
|
if linkSpeed != "" {
|
|
dev.MaxLinkSpeed = &linkSpeed
|
|
}
|
|
}
|
|
}
|
|
|
|
if v := fields["Class"]; v != "" {
|
|
class := mapPCIeDeviceClass(v)
|
|
dev.DeviceClass = &class
|
|
}
|
|
if v := fields["Vendor"]; v != "" {
|
|
dev.Manufacturer = &v
|
|
}
|
|
if v := fields["Device"]; v != "" {
|
|
dev.Model = &v
|
|
}
|
|
|
|
// SVendor/SDevice available but not in schema — skip
|
|
|
|
// Warn if PCIe link is running below its maximum negotiated speed.
|
|
applyPCIeLinkSpeedWarning(&dev)
|
|
|
|
return dev
|
|
}
|
|
|
|
// readPCIIDs reads vendor and device IDs from sysfs for a given BDF.
|
|
func readPCIIDs(bdf string) (vendorID, deviceID int) {
|
|
base := "/sys/bus/pci/devices/" + bdf
|
|
if v, err := readHexFile(base + "/vendor"); err == nil {
|
|
vendorID = v
|
|
}
|
|
if v, err := readHexFile(base + "/device"); err == nil {
|
|
deviceID = v
|
|
}
|
|
return
|
|
}
|
|
|
|
func readHexFile(path string) (int, error) {
|
|
out, err := exec.Command("cat", path).Output()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
s := strings.TrimSpace(strings.TrimPrefix(string(out), "0x"))
|
|
n, err := strconv.ParseInt(s, 16, 64)
|
|
return int(n), err
|
|
}
|
|
|
|
func readPCINumaNode(bdf string) (int, bool) {
|
|
value, ok := readPCIIntAttribute(bdf, "numa_node")
|
|
if !ok || value < 0 {
|
|
return 0, false
|
|
}
|
|
return value, true
|
|
}
|
|
|
|
func parsePCINumaNode(raw string) (int, bool) {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return 0, false
|
|
}
|
|
value, err := strconv.Atoi(raw)
|
|
if err != nil || value < 0 {
|
|
return 0, false
|
|
}
|
|
return value, true
|
|
}
|
|
|
|
func readPCIIntAttribute(bdf, attribute string) (int, bool) {
|
|
out, err := exec.Command("cat", "/sys/bus/pci/devices/"+bdf+"/"+attribute).Output()
|
|
if err != nil {
|
|
return 0, false
|
|
}
|
|
value, err := strconv.Atoi(strings.TrimSpace(string(out)))
|
|
if err != nil || value < 0 {
|
|
return 0, false
|
|
}
|
|
return value, true
|
|
}
|
|
|
|
func readPCIStringAttribute(bdf, attribute string) (string, bool) {
|
|
out, err := exec.Command("cat", "/sys/bus/pci/devices/"+bdf+"/"+attribute).Output()
|
|
if err != nil {
|
|
return "", false
|
|
}
|
|
value := strings.TrimSpace(string(out))
|
|
if value == "" {
|
|
return "", false
|
|
}
|
|
return value, true
|
|
}
|
|
|
|
// applyPCIeLinkSpeedWarning sets the device status to Warning if the current PCIe link
|
|
// speed is below the maximum negotiated speed supported by both ends.
|
|
func applyPCIeLinkSpeedWarning(dev *schema.HardwarePCIeDevice) {
|
|
if dev.LinkSpeed == nil || dev.MaxLinkSpeed == nil {
|
|
return
|
|
}
|
|
if pcieLinkSpeedRank(*dev.LinkSpeed) < pcieLinkSpeedRank(*dev.MaxLinkSpeed) {
|
|
warn := statusWarning
|
|
dev.Status = &warn
|
|
desc := fmt.Sprintf("PCIe link speed degraded: running at %s, capable of %s", *dev.LinkSpeed, *dev.MaxLinkSpeed)
|
|
dev.ErrorDescription = &desc
|
|
}
|
|
}
|
|
|
|
// pcieLinkSpeedRank returns a numeric rank for a normalized Gen string (e.g. "Gen4" → 4).
|
|
// Returns 0 for unrecognised values so comparisons fail safe.
|
|
func pcieLinkSpeedRank(gen string) int {
|
|
switch gen {
|
|
case "Gen1":
|
|
return 1
|
|
case "Gen2":
|
|
return 2
|
|
case "Gen3":
|
|
return 3
|
|
case "Gen4":
|
|
return 4
|
|
case "Gen5":
|
|
return 5
|
|
case "Gen6":
|
|
return 6
|
|
default:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
func normalizePCILinkSpeed(raw string) string {
|
|
raw = strings.TrimSpace(strings.ToLower(raw))
|
|
switch {
|
|
case strings.Contains(raw, "2.5"):
|
|
return "Gen1"
|
|
case strings.Contains(raw, "5.0"):
|
|
return "Gen2"
|
|
case strings.Contains(raw, "8.0"):
|
|
return "Gen3"
|
|
case strings.Contains(raw, "16.0"):
|
|
return "Gen4"
|
|
case strings.Contains(raw, "32.0"):
|
|
return "Gen5"
|
|
case strings.Contains(raw, "64.0"):
|
|
return "Gen6"
|
|
default:
|
|
return ""
|
|
}
|
|
}
|