Harden NIC probing for empty SFP ports

This commit is contained in:
Mikhail Chusavitin
2026-04-04 15:23:15 +03:00
parent 728270dc8e
commit f3c14cd893
3 changed files with 73 additions and 7 deletions

View File

@@ -2,18 +2,21 @@ package collector
import (
"bee/audit/internal/schema"
"context"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
const mellanoxVendorID = 0x15b3
const nicProbeTimeout = 2 * time.Second
var (
mstflintQuery = func(bdf string) (string, error) {
out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
out, err := commandOutputWithTimeout(nicProbeTimeout, "mstflint", "-d", bdf, "q")
if err != nil {
return "", err
}
@@ -21,7 +24,7 @@ var (
}
ethtoolInfoQuery = func(iface string) (string, error) {
out, err := exec.Command("ethtool", "-i", iface).Output()
out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-i", iface)
if err != nil {
return "", err
}
@@ -29,6 +32,14 @@ var (
}
netIfacesByBDF = listNetIfacesByBDF
readNetCarrierFile = func(iface string) (string, error) {
path := filepath.Join("/sys/class/net", iface, "carrier")
raw, err := os.ReadFile(path)
if err != nil {
return "", err
}
return strings.TrimSpace(string(raw)), nil
}
)
// enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
@@ -162,3 +173,17 @@ func listNetIfacesByBDF(bdf string) []string {
}
return ifaces
}
func commandOutputWithTimeout(timeout time.Duration, name string, args ...string) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
return exec.CommandContext(ctx, name, args...).Output()
}
func interfaceHasCarrier(iface string) bool {
raw, err := readNetCarrierFile(iface)
if err != nil {
return false
}
return strings.TrimSpace(raw) == "1"
}

View File

@@ -12,7 +12,7 @@ import (
var (
ethtoolModuleQuery = func(iface string) (string, error) {
out, err := raidToolQuery("ethtool", "-m", iface)
out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-m", iface)
if err != nil {
return "", err
}
@@ -58,10 +58,12 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
}
}
if out, err := ethtoolModuleQuery(iface); err == nil {
if injectSFPDOMTelemetry(&devs[i], out) {
enriched++
continue
if interfaceHasCarrier(iface) {
if out, err := ethtoolModuleQuery(iface); err == nil {
if injectSFPDOMTelemetry(&devs[i], out) {
enriched++
continue
}
}
}
if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {

View File

@@ -57,6 +57,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
origReadMAC := readNetAddressFile
origEth := ethtoolInfoQuery
origModule := ethtoolModuleQuery
origCarrier := readNetCarrierFile
t.Cleanup(func() {
queryPCILSPCIDetail = origDetail
readPCIVPDFile = origVPD
@@ -64,6 +65,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
readNetAddressFile = origReadMAC
ethtoolInfoQuery = origEth
ethtoolModuleQuery = origModule
readNetCarrierFile = origCarrier
})
queryPCILSPCIDetail = func(bdf string) (string, error) {
@@ -82,6 +84,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
}
return "aa:bb:cc:dd:ee:ff", nil
}
readNetCarrierFile = func(string) (string, error) { return "1", nil }
ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
@@ -101,6 +104,42 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
}
}
func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T) {
origIfaces := netIfacesByBDF
origReadMAC := readNetAddressFile
origEth := ethtoolInfoQuery
origModule := ethtoolModuleQuery
origCarrier := readNetCarrierFile
t.Cleanup(func() {
netIfacesByBDF = origIfaces
readNetAddressFile = origReadMAC
ethtoolInfoQuery = origEth
ethtoolModuleQuery = origModule
readNetCarrierFile = origCarrier
})
netIfacesByBDF = func(string) []string { return []string{"eth0"} }
readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
readNetCarrierFile = func(string) (string, error) { return "0", nil }
ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
ethtoolModuleQuery = func(string) (string, error) {
t.Fatal("ethtool -m should not be called without carrier")
return "", nil
}
class := "EthernetController"
bdf := "0000:18:00.0"
devs := []schema.HardwarePCIeDevice{{
DeviceClass: &class,
BDF: &bdf,
}}
out := enrichPCIeWithNICTelemetry(devs)
if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
}
}
func TestDBMValue(t *testing.T) {
tests := []struct {
in string