Improve PCIe inventory and hardware identity collection

This commit is contained in:
Mikhail Chusavitin
2026-03-25 20:00:38 +03:00
parent 9a1df9b1ba
commit 614b7cad61
10 changed files with 367 additions and 22 deletions

View File

@@ -36,6 +36,7 @@ func Run(_ runtimeenv.Mode) schema.HardwareIngestRequest {
snap.Memory = enrichMemoryWithTelemetry(snap.Memory, sensorDoc)
snap.Storage = collectStorage()
snap.PCIeDevices = collectPCIe()
snap.PCIeDevices = enrichPCIeWithPCISerials(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithNVIDIA(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithMellanox(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithNICTelemetry(snap.PCIeDevices)

View File

@@ -41,7 +41,18 @@ func filterStorage(disks []schema.HardwareStorage) []schema.HardwareStorage {
func filterPSUs(psus []schema.HardwarePowerSupply) []schema.HardwarePowerSupply {
out := make([]schema.HardwarePowerSupply, 0, len(psus))
for _, psu := range psus {
if psu.SerialNumber == nil || *psu.SerialNumber == "" {
hasIdentity := false
switch {
case psu.SerialNumber != nil && *psu.SerialNumber != "":
hasIdentity = true
case psu.Slot != nil && *psu.Slot != "":
hasIdentity = true
case psu.Model != nil && *psu.Model != "":
hasIdentity = true
case psu.Vendor != nil && *psu.Vendor != "":
hasIdentity = true
}
if !hasIdentity {
continue
}
out = append(out, psu)

View File

@@ -61,3 +61,20 @@ func TestFinalizeSnapshotPreservesDuplicateSerials(t *testing.T) {
t.Fatalf("duplicate serial should stay unchanged: %q", got)
}
}
func TestFilterPSUsKeepsSlotOnlyEntries(t *testing.T) {
slot := "0"
status := statusOK
got := filterPSUs([]schema.HardwarePowerSupply{
{Slot: &slot, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
})
if len(got) != 1 {
t.Fatalf("len(got)=%d want 1", len(got))
}
if got[0].Slot == nil || *got[0].Slot != "0" {
t.Fatalf("unexpected kept PSU: %+v", got[0])
}
}

View File

@@ -44,6 +44,11 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
}
iface := ifaces[0]
devs[i].MacAddresses = collectInterfaceMACs(ifaces)
if devs[i].SerialNumber == nil {
if serial := queryPCIDeviceSerial(bdf); serial != "" {
devs[i].SerialNumber = &serial
}
}
if devs[i].Firmware == nil {
if out, err := ethtoolInfoQuery(iface); err == nil {

View File

@@ -1,6 +1,10 @@
package collector
import "testing"
import (
"bee/audit/internal/schema"
"fmt"
"testing"
)
func TestParseSFPDOM(t *testing.T) {
raw := `
@@ -29,6 +33,74 @@ func TestParseSFPDOM(t *testing.T) {
}
}
func TestParseLSPCIDetailSerial(t *testing.T) {
raw := `
05:00.0 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6]
Serial number: NIC-SN-12345
`
if got := parseLSPCIDetailSerial(raw); got != "NIC-SN-12345" {
t.Fatalf("serial=%q want %q", got, "NIC-SN-12345")
}
}
func TestParsePCIVPDSerial(t *testing.T) {
raw := []byte{0x82, 0x05, 0x00, 'M', 'L', 'X', '5', 0x90, 0x08, 0x00, 'S', 'N', 0x08, 'M', 'T', '1', '2', '3', '4', '5', '6'}
if got := parsePCIVPDSerial(raw); got != "MT123456" {
t.Fatalf("serial=%q want %q", got, "MT123456")
}
}
func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
origDetail := queryPCILSPCIDetail
origVPD := readPCIVPDFile
origIfaces := netIfacesByBDF
origReadMAC := readNetAddressFile
origEth := ethtoolInfoQuery
origModule := ethtoolModuleQuery
t.Cleanup(func() {
queryPCILSPCIDetail = origDetail
readPCIVPDFile = origVPD
netIfacesByBDF = origIfaces
readNetAddressFile = origReadMAC
ethtoolInfoQuery = origEth
ethtoolModuleQuery = origModule
})
queryPCILSPCIDetail = func(bdf string) (string, error) {
if bdf != "0000:18:00.0" {
t.Fatalf("unexpected bdf: %s", bdf)
}
return "Serial number: NIC-SN-98765\n", nil
}
readPCIVPDFile = func(string) ([]byte, error) {
return nil, fmt.Errorf("no vpd needed")
}
netIfacesByBDF = func(string) []string { return []string{"eth0"} }
readNetAddressFile = func(iface string) (string, error) {
if iface != "eth0" {
t.Fatalf("unexpected iface: %s", iface)
}
return "aa:bb:cc:dd:ee:ff", nil
}
ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
class := "EthernetController"
bdf := "0000:18:00.0"
devs := []schema.HardwarePCIeDevice{{
DeviceClass: &class,
BDF: &bdf,
}}
out := enrichPCIeWithNICTelemetry(devs)
if out[0].SerialNumber == nil || *out[0].SerialNumber != "NIC-SN-98765" {
t.Fatalf("serial=%v want NIC-SN-98765", out[0].SerialNumber)
}
if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
}
}
func TestDBMValue(t *testing.T) {
tests := []struct {
in string

View File

@@ -37,7 +37,7 @@ func parseLspci(output string) []schema.HardwarePCIeDevice {
val := strings.TrimSpace(line[idx+2:])
fields[key] = val
}
if !shouldIncludePCIeDevice(fields["Class"]) {
if !shouldIncludePCIeDevice(fields["Class"], fields["Vendor"], fields["Device"]) {
continue
}
dev := parseLspciDevice(fields)
@@ -46,8 +46,10 @@ func parseLspci(output string) []schema.HardwarePCIeDevice {
return devs
}
func shouldIncludePCIeDevice(class string) bool {
func shouldIncludePCIeDevice(class, vendor, device string) bool {
c := strings.ToLower(strings.TrimSpace(class))
v := strings.ToLower(strings.TrimSpace(vendor))
d := strings.ToLower(strings.TrimSpace(device))
if c == "" {
return true
}
@@ -68,12 +70,28 @@ func shouldIncludePCIeDevice(class string) bool {
"audio device",
"serial bus controller",
"unassigned class",
"non-essential instrumentation",
}
for _, bad := range excluded {
if strings.Contains(c, bad) {
return false
}
}
if strings.Contains(v, "advanced micro devices") || strings.Contains(v, "[amd]") {
internalAMDPatterns := []string{
"dummy function",
"reserved spp",
"ptdma",
"cryptographic coprocessor pspcpp",
"pspcpp",
}
for _, bad := range internalAMDPatterns {
if strings.Contains(d, bad) {
return false
}
}
}
return true
}
@@ -98,6 +116,8 @@ func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {
}
if numaNode, ok := readPCINumaNode(bdf); ok {
dev.NUMANode = &numaNode
} else if numaNode, ok := parsePCINumaNode(fields["NUMANode"]); ok {
dev.NUMANode = &numaNode
}
if width, ok := readPCIIntAttribute(bdf, "current_link_width"); ok {
dev.LinkWidth = &width
@@ -165,6 +185,18 @@ func readPCINumaNode(bdf string) (int, bool) {
return value, true
}
func parsePCINumaNode(raw string) (int, bool) {
raw = strings.TrimSpace(raw)
if raw == "" {
return 0, false
}
value, err := strconv.Atoi(raw)
if err != nil || value < 0 {
return 0, false
}
return value, true
}
func readPCIIntAttribute(bdf, attribute string) (int, bool) {
out, err := exec.Command("cat", "/sys/bus/pci/devices/"+bdf+"/"+attribute).Output()
if err != nil {

View File

@@ -8,32 +8,42 @@ import (
func TestShouldIncludePCIeDevice(t *testing.T) {
tests := []struct {
class string
want bool
name string
class string
vendor string
device string
want bool
}{
{"USB controller", false},
{"System peripheral", false},
{"Audio device", false},
{"Host bridge", false},
{"PCI bridge", false},
{"SMBus", false},
{"Performance counters", false},
{"Ethernet controller", true},
{"RAID bus controller", true},
{"Non-Volatile memory controller", true},
{"VGA compatible controller", true},
{name: "usb", class: "USB controller", want: false},
{name: "system peripheral", class: "System peripheral", want: false},
{name: "audio", class: "Audio device", want: false},
{name: "host bridge", class: "Host bridge", want: false},
{name: "pci bridge", class: "PCI bridge", want: false},
{name: "smbus", class: "SMBus", want: false},
{name: "perf", class: "Performance counters", want: false},
{name: "non essential instrumentation", class: "Non-Essential Instrumentation", want: false},
{name: "amd dummy function", class: "Encryption controller", vendor: "Advanced Micro Devices, Inc. [AMD]", device: "Starship/Matisse PTDMA", want: false},
{name: "amd pspcpp", class: "Encryption controller", vendor: "Advanced Micro Devices, Inc. [AMD]", device: "Starship/Matisse Cryptographic Coprocessor PSPCPP", want: false},
{name: "ethernet", class: "Ethernet controller", want: true},
{name: "raid", class: "RAID bus controller", want: true},
{name: "nvme", class: "Non-Volatile memory controller", want: true},
{name: "vga", class: "VGA compatible controller", want: true},
{name: "other encryption controller", class: "Encryption controller", vendor: "Intel Corporation", device: "QuickAssist", want: true},
}
for _, tt := range tests {
got := shouldIncludePCIeDevice(tt.class)
if got != tt.want {
t.Fatalf("class %q include=%v want %v", tt.class, got, tt.want)
}
t.Run(tt.name, func(t *testing.T) {
got := shouldIncludePCIeDevice(tt.class, tt.vendor, tt.device)
if got != tt.want {
t.Fatalf("class=%q vendor=%q device=%q include=%v want %v", tt.class, tt.vendor, tt.device, got, tt.want)
}
})
}
}
func TestParseLspci_filtersExcludedClasses(t *testing.T) {
input := "Slot:\t0000:00:14.0\nClass:\tUSB controller\nVendor:\tIntel Corporation\nDevice:\tUSB 3.0\n\n" +
"Slot:\t0000:00:18.0\nClass:\tNon-Essential Instrumentation\nVendor:\tAdvanced Micro Devices, Inc. [AMD]\nDevice:\tStarship/Matisse PCIe Dummy Function\n\n" +
"Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
devs := parseLspci(input)
@@ -51,6 +61,21 @@ func TestParseLspci_filtersExcludedClasses(t *testing.T) {
}
}
func TestParseLspci_filtersAMDChipsetNoise(t *testing.T) {
input := "" +
"Slot:\t0000:1a:00.0\nClass:\tNon-Essential Instrumentation\nVendor:\tAdvanced Micro Devices, Inc. [AMD]\nDevice:\tStarship/Matisse PCIe Dummy Function\n\n" +
"Slot:\t0000:1a:00.2\nClass:\tEncryption controller\nVendor:\tAdvanced Micro Devices, Inc. [AMD]\nDevice:\tStarship/Matisse PTDMA\n\n" +
"Slot:\t0000:05:00.0\nClass:\tEthernet controller\nVendor:\tMellanox Technologies\nDevice:\tMT28908 Family [ConnectX-6]\n\n"
devs := parseLspci(input)
if len(devs) != 1 {
t.Fatalf("expected 1 remaining device, got %d", len(devs))
}
if devs[0].Model == nil || *devs[0].Model != "MT28908 Family [ConnectX-6]" {
t.Fatalf("unexpected remaining device: %+v", devs[0])
}
}
func TestPCIeJSONUsesSlotNotBDF(t *testing.T) {
input := "Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
@@ -68,6 +93,18 @@ func TestPCIeJSONUsesSlotNotBDF(t *testing.T) {
}
}
func TestParseLspciUsesNUMANodeFieldWhenSysfsUnavailable(t *testing.T) {
input := "Slot:\t0000:65:00.0\nClass:\tEthernet controller\nVendor:\tIntel Corporation\nDevice:\tX710\nNUMANode:\t1\n\n"
devs := parseLspci(input)
if len(devs) != 1 {
t.Fatalf("expected 1 device, got %d", len(devs))
}
if devs[0].NUMANode == nil || *devs[0].NUMANode != 1 {
t.Fatalf("numa_node=%v want 1", devs[0].NUMANode)
}
}
func TestNormalizePCILinkSpeed(t *testing.T) {
tests := []struct {
raw string

View File

@@ -0,0 +1,123 @@
package collector
import (
"bee/audit/internal/schema"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strings"
)
var (
queryPCILSPCIDetail = func(bdf string) (string, error) {
out, err := exec.Command("lspci", "-vv", "-s", bdf).Output()
if err != nil {
return "", err
}
return string(out), nil
}
readPCIVPDFile = func(bdf string) ([]byte, error) {
return os.ReadFile(filepath.Join("/sys/bus/pci/devices", bdf, "vpd"))
}
)
func enrichPCIeWithPCISerials(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
enriched := 0
for i := range devs {
if !shouldProbePCIeSerial(devs[i]) {
continue
}
bdf := normalizePCIeBDF(*devs[i].BDF)
if bdf == "" {
continue
}
if serial := queryPCIDeviceSerial(bdf); serial != "" {
devs[i].SerialNumber = &serial
enriched++
}
}
if enriched > 0 {
slog.Info("pcie: serials enriched", "count", enriched)
}
return devs
}
func shouldProbePCIeSerial(dev schema.HardwarePCIeDevice) bool {
if dev.BDF == nil || dev.SerialNumber != nil {
return false
}
if dev.DeviceClass == nil {
return false
}
class := strings.TrimSpace(*dev.DeviceClass)
return isNICClass(class) || isGPUClass(class)
}
func queryPCIDeviceSerial(bdf string) string {
if out, err := queryPCILSPCIDetail(bdf); err == nil {
if serial := parseLSPCIDetailSerial(out); serial != "" {
return serial
}
}
if raw, err := readPCIVPDFile(bdf); err == nil {
return parsePCIVPDSerial(raw)
}
return ""
}
func parseLSPCIDetailSerial(raw string) string {
for _, line := range strings.Split(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
lower := strings.ToLower(line)
if !strings.Contains(lower, "serial number:") {
continue
}
idx := strings.Index(line, ":")
if idx < 0 {
continue
}
if serial := strings.TrimSpace(line[idx+1:]); serial != "" {
return serial
}
}
return ""
}
func parsePCIVPDSerial(raw []byte) string {
for i := 0; i+3 < len(raw); i++ {
if raw[i] != 'S' || raw[i+1] != 'N' {
continue
}
length := int(raw[i+2])
if length <= 0 || length > 64 || i+3+length > len(raw) {
continue
}
value := strings.TrimSpace(strings.Trim(string(raw[i+3:i+3+length]), "\x00"))
if !looksLikeSerial(value) {
continue
}
return value
}
return ""
}
func looksLikeSerial(value string) bool {
if len(value) < 4 {
return false
}
hasAlphaNum := false
for _, r := range value {
switch {
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9':
hasAlphaNum = true
case strings.ContainsRune(" -_./:", r):
default:
return false
}
}
return hasAlphaNum
}

View File

@@ -0,0 +1,47 @@
package collector
import (
"bee/audit/internal/schema"
"fmt"
"testing"
)
func TestEnrichPCIeWithPCISerialsAddsGPUFallback(t *testing.T) {
origDetail := queryPCILSPCIDetail
origVPD := readPCIVPDFile
t.Cleanup(func() {
queryPCILSPCIDetail = origDetail
readPCIVPDFile = origVPD
})
queryPCILSPCIDetail = func(bdf string) (string, error) {
if bdf != "0000:11:00.0" {
t.Fatalf("unexpected bdf: %s", bdf)
}
return "Serial number: GPU-SN-12345\n", nil
}
readPCIVPDFile = func(string) ([]byte, error) {
return nil, fmt.Errorf("no vpd needed")
}
class := "DisplayController"
bdf := "0000:11:00.0"
devs := []schema.HardwarePCIeDevice{{
DeviceClass: &class,
BDF: &bdf,
}}
out := enrichPCIeWithPCISerials(devs)
if out[0].SerialNumber == nil || *out[0].SerialNumber != "GPU-SN-12345" {
t.Fatalf("serial=%v want GPU-SN-12345", out[0].SerialNumber)
}
}
func TestShouldProbePCIeSerialSkipsNonGPUOrNIC(t *testing.T) {
class := "StorageController"
bdf := "0000:19:00.0"
dev := schema.HardwarePCIeDevice{DeviceClass: &class, BDF: &bdf}
if shouldProbePCIeSerial(dev) {
t.Fatal("unexpected probe for storage controller")
}
}