Compare commits

..

2 Commits
v4.4 ... v4.6

Author SHA1 Message Date
Mikhail Chusavitin
f3c14cd893 Harden NIC probing for empty SFP ports 2026-04-04 15:23:15 +03:00
Mikhail Chusavitin
728270dc8e Unblock bee-web startup and expand support bundle diagnostics 2026-04-04 15:18:43 +03:00
7 changed files with 172 additions and 9 deletions

View File

@@ -754,6 +754,26 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
}
}
for _, want := range []string{
"/system/ip-link.txt",
"/system/ip-link-stats.txt",
"/system/ethtool-info.txt",
"/system/ethtool-link.txt",
"/system/ethtool-module.txt",
"/system/mstflint-query.txt",
} {
var found bool
for _, name := range names {
if contains(name, want) {
found = true
break
}
}
if !found {
t.Fatalf("support bundle missing %s, names=%v", want, names)
}
}
var foundRaw bool
for _, name := range names {
if contains(name, "/export/bee-sat/memory-run/verbose.log") {

View File

@@ -32,6 +32,8 @@ var supportBundleCommands = []struct {
{name: "system/lspci-nn.txt", cmd: []string{"lspci", "-nn"}},
{name: "system/lspci-vvv.txt", cmd: []string{"lspci", "-vvv"}},
{name: "system/ip-addr.txt", cmd: []string{"ip", "addr"}},
{name: "system/ip-link.txt", cmd: []string{"ip", "-details", "link", "show"}},
{name: "system/ip-link-stats.txt", cmd: []string{"ip", "-s", "link", "show"}},
{name: "system/ip-route.txt", cmd: []string{"ip", "route"}},
{name: "system/mount.txt", cmd: []string{"mount"}},
{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
@@ -47,6 +49,83 @@ for d in /sys/bus/pci/devices/*/; do
printf " %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
done
done
`}},
{name: "system/ethtool-info.txt", cmd: []string{"sh", "-c", `
if ! command -v ethtool >/dev/null 2>&1; then
echo "ethtool not found"
exit 0
fi
found=0
for path in /sys/class/net/*; do
[ -e "$path" ] || continue
iface=$(basename "$path")
[ "$iface" = "lo" ] && continue
found=1
echo "=== $iface ==="
ethtool -i "$iface" 2>&1 || true
echo
done
if [ "$found" -eq 0 ]; then
echo "no interfaces found"
fi
`}},
{name: "system/ethtool-link.txt", cmd: []string{"sh", "-c", `
if ! command -v ethtool >/dev/null 2>&1; then
echo "ethtool not found"
exit 0
fi
found=0
for path in /sys/class/net/*; do
[ -e "$path" ] || continue
iface=$(basename "$path")
[ "$iface" = "lo" ] && continue
found=1
echo "=== $iface ==="
ethtool "$iface" 2>&1 || true
echo
done
if [ "$found" -eq 0 ]; then
echo "no interfaces found"
fi
`}},
{name: "system/ethtool-module.txt", cmd: []string{"sh", "-c", `
if ! command -v ethtool >/dev/null 2>&1; then
echo "ethtool not found"
exit 0
fi
found=0
for path in /sys/class/net/*; do
[ -e "$path" ] || continue
iface=$(basename "$path")
[ "$iface" = "lo" ] && continue
found=1
echo "=== $iface ==="
ethtool -m "$iface" 2>&1 || true
echo
done
if [ "$found" -eq 0 ]; then
echo "no interfaces found"
fi
`}},
{name: "system/mstflint-query.txt", cmd: []string{"sh", "-c", `
if ! command -v mstflint >/dev/null 2>&1; then
echo "mstflint not found"
exit 0
fi
found=0
for path in /sys/bus/pci/devices/*; do
[ -e "$path/vendor" ] || continue
vendor=$(cat "$path/vendor" 2>/dev/null)
[ "$vendor" = "0x15b3" ] || continue
bdf=$(basename "$path")
found=1
echo "=== $bdf ==="
mstflint -d "$bdf" q 2>&1 || true
echo
done
if [ "$found" -eq 0 ]; then
echo "no Mellanox/NVIDIA networking devices found"
fi
`}},
}

View File

@@ -2,18 +2,21 @@ package collector
import (
"bee/audit/internal/schema"
"context"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
const mellanoxVendorID = 0x15b3
const nicProbeTimeout = 2 * time.Second
var (
mstflintQuery = func(bdf string) (string, error) {
out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
out, err := commandOutputWithTimeout(nicProbeTimeout, "mstflint", "-d", bdf, "q")
if err != nil {
return "", err
}
@@ -21,7 +24,7 @@ var (
}
ethtoolInfoQuery = func(iface string) (string, error) {
out, err := exec.Command("ethtool", "-i", iface).Output()
out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-i", iface)
if err != nil {
return "", err
}
@@ -29,6 +32,14 @@ var (
}
netIfacesByBDF = listNetIfacesByBDF
readNetCarrierFile = func(iface string) (string, error) {
path := filepath.Join("/sys/class/net", iface, "carrier")
raw, err := os.ReadFile(path)
if err != nil {
return "", err
}
return strings.TrimSpace(string(raw)), nil
}
)
// enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
@@ -162,3 +173,17 @@ func listNetIfacesByBDF(bdf string) []string {
}
return ifaces
}
func commandOutputWithTimeout(timeout time.Duration, name string, args ...string) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
return exec.CommandContext(ctx, name, args...).Output()
}
func interfaceHasCarrier(iface string) bool {
raw, err := readNetCarrierFile(iface)
if err != nil {
return false
}
return strings.TrimSpace(raw) == "1"
}

View File

@@ -12,7 +12,7 @@ import (
var (
ethtoolModuleQuery = func(iface string) (string, error) {
out, err := raidToolQuery("ethtool", "-m", iface)
out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-m", iface)
if err != nil {
return "", err
}
@@ -58,10 +58,12 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
}
}
if out, err := ethtoolModuleQuery(iface); err == nil {
if injectSFPDOMTelemetry(&devs[i], out) {
enriched++
continue
if interfaceHasCarrier(iface) {
if out, err := ethtoolModuleQuery(iface); err == nil {
if injectSFPDOMTelemetry(&devs[i], out) {
enriched++
continue
}
}
}
if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {

View File

@@ -57,6 +57,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
origReadMAC := readNetAddressFile
origEth := ethtoolInfoQuery
origModule := ethtoolModuleQuery
origCarrier := readNetCarrierFile
t.Cleanup(func() {
queryPCILSPCIDetail = origDetail
readPCIVPDFile = origVPD
@@ -64,6 +65,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
readNetAddressFile = origReadMAC
ethtoolInfoQuery = origEth
ethtoolModuleQuery = origModule
readNetCarrierFile = origCarrier
})
queryPCILSPCIDetail = func(bdf string) (string, error) {
@@ -82,6 +84,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
}
return "aa:bb:cc:dd:ee:ff", nil
}
readNetCarrierFile = func(string) (string, error) { return "1", nil }
ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
@@ -101,6 +104,42 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
}
}
func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T) {
origIfaces := netIfacesByBDF
origReadMAC := readNetAddressFile
origEth := ethtoolInfoQuery
origModule := ethtoolModuleQuery
origCarrier := readNetCarrierFile
t.Cleanup(func() {
netIfacesByBDF = origIfaces
readNetAddressFile = origReadMAC
ethtoolInfoQuery = origEth
ethtoolModuleQuery = origModule
readNetCarrierFile = origCarrier
})
netIfacesByBDF = func(string) []string { return []string{"eth0"} }
readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
readNetCarrierFile = func(string) (string, error) { return "0", nil }
ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
ethtoolModuleQuery = func(string) (string, error) {
t.Fatal("ethtool -m should not be called without carrier")
return "", nil
}
class := "EthernetController"
bdf := "0000:18:00.0"
devs := []schema.HardwarePCIeDevice{{
DeviceClass: &class,
BDF: &bdf,
}}
out := enrichPCIeWithNICTelemetry(devs)
if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
}
}
func TestDBMValue(t *testing.T) {
tests := []struct {
in string

View File

@@ -1,7 +1,6 @@
[Unit]
Description=Bee: hardware audit
After=bee-preflight.service bee-network.service bee-nvidia.service
Before=bee-web.service
[Service]
Type=oneshot

View File

@@ -1,6 +1,5 @@
[Unit]
Description=Bee: hardware audit web viewer
After=bee-audit.service
[Service]
Type=simple