Files
bee/audit/internal/collector/nic_mellanox.go
Michael Chus 05241f2e0e Redesign dashboard: split Runtime Health and Hardware Summary
- Runtime Health now shows only LiveCD system status (services, tools,
  drivers, network, CUDA/ROCm) — hardware component rows removed
- Hardware Summary now shows server components with readable descriptions
  (model, count×size) and component-status.json health badges
- Add Network Adapters row to Hardware Summary
- SFP module static info (vendor, PN, SN, connector, type, wavelength)
  now collected via ethtool -m regardless of carrier state
- PSU statuses from IPMI audit written to component-status.json so PSU
  badge shows actual status after first audit instead of UNKNOWN

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-09 23:41:23 +03:00

182 lines
3.9 KiB
Go

package collector
import (
"bee/audit/internal/schema"
"context"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
const mellanoxVendorID = 0x15b3
const nicProbeTimeout = 2 * time.Second
var (
mstflintQuery = func(bdf string) (string, error) {
out, err := commandOutputWithTimeout(nicProbeTimeout, "mstflint", "-d", bdf, "q")
if err != nil {
return "", err
}
return string(out), nil
}
ethtoolInfoQuery = func(iface string) (string, error) {
out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-i", iface)
if err != nil {
return "", err
}
return string(out), nil
}
netIfacesByBDF = listNetIfacesByBDF
readNetCarrierFile = func(iface string) (string, error) {
path := filepath.Join("/sys/class/net", iface, "carrier")
raw, err := os.ReadFile(path)
if err != nil {
return "", err
}
return strings.TrimSpace(string(raw)), nil
}
)
// enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
// firmware/serial information from mstflint, with ethtool fallback for firmware.
func enrichPCIeWithMellanox(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
enriched := 0
for i := range devs {
if !isMellanoxDevice(devs[i]) {
continue
}
bdf := ""
if devs[i].BDF != nil {
bdf = normalizePCIeBDF(*devs[i].BDF)
}
if bdf == "" {
continue
}
fw, serial := queryMellanoxFromMstflint(bdf)
if fw == "" {
fw = queryFirmwareFromEthtool(bdf)
}
if fw != "" {
devs[i].Firmware = &fw
}
if serial != "" {
devs[i].SerialNumber = &serial
}
if fw != "" || serial != "" {
enriched++
}
}
slog.Info("mellanox: enriched", "count", enriched)
return devs
}
func isMellanoxDevice(dev schema.HardwarePCIeDevice) bool {
if dev.VendorID != nil && *dev.VendorID == mellanoxVendorID {
return true
}
if dev.Manufacturer != nil {
m := strings.ToLower(*dev.Manufacturer)
if strings.Contains(m, "mellanox") || strings.Contains(m, "nvidia networking") {
return true
}
}
return false
}
func queryMellanoxFromMstflint(bdf string) (firmware, serial string) {
out, err := mstflintQuery(bdf)
if err != nil {
return "", ""
}
return parseMstflintQuery(out)
}
func parseMstflintQuery(raw string) (firmware, serial string) {
for _, line := range strings.Split(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
idx := strings.Index(line, ":")
if idx < 0 {
continue
}
key := strings.ToLower(strings.TrimSpace(line[:idx]))
val := strings.TrimSpace(line[idx+1:])
switch key {
case "fw version":
if val != "" {
firmware = val
}
case "board serial number":
if val != "" {
serial = val
}
}
}
return firmware, serial
}
func queryFirmwareFromEthtool(bdf string) string {
for _, iface := range netIfacesByBDF(bdf) {
out, err := ethtoolInfoQuery(iface)
if err != nil {
continue
}
if fw := parseEthtoolFirmwareInfo(out); fw != "" {
return fw
}
}
return ""
}
func parseEthtoolFirmwareInfo(raw string) string {
for _, line := range strings.Split(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
idx := strings.Index(line, ":")
if idx < 0 {
continue
}
key := strings.ToLower(strings.TrimSpace(line[:idx]))
val := strings.TrimSpace(line[idx+1:])
if key == "firmware-version" && val != "" {
return val
}
}
return ""
}
func listNetIfacesByBDF(bdf string) []string {
path := filepath.Join("/sys/bus/pci/devices", bdf, "net")
entries, err := os.ReadDir(path)
if err != nil {
return nil
}
ifaces := make([]string, 0, len(entries))
for _, e := range entries {
if e.Name() == "" {
continue
}
ifaces = append(ifaces, e.Name())
}
return ifaces
}
func commandOutputWithTimeout(timeout time.Duration, name string, args ...string) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
return exec.CommandContext(ctx, name, args...).Output()
}