Fix NIC port count handling and apply pending exporter updates

This commit is contained in:
2026-02-28 18:42:01 +03:00
parent 612058ed16
commit fe5da1dbd7
7 changed files with 362 additions and 4 deletions

View File

@@ -2361,7 +2361,7 @@ func parseNIC(doc map[string]interface{}) models.NetworkAdapter {
location = firstNonEmpty(location, redfishLocationLabel(ctrl["Location"]))
firmware = asString(ctrl["FirmwarePackageVersion"])
if caps, ok := ctrl["ControllerCapabilities"].(map[string]interface{}); ok {
portCount = asInt(caps["NetworkPortCount"])
portCount = sanitizeNetworkPortCount(asInt(caps["NetworkPortCount"]))
}
}
}
@@ -3406,6 +3406,8 @@ func mergeNetworkAdapterEntries(a, b models.NetworkAdapter) models.NetworkAdapte
base, donor = donor, base
}
out := base
out.PortCount = sanitizeNetworkPortCount(out.PortCount)
donor.PortCount = sanitizeNetworkPortCount(donor.PortCount)
if strings.TrimSpace(out.Slot) == "" && strings.TrimSpace(donor.Slot) != "" {
out.Slot = donor.Slot
}
@@ -3452,6 +3454,15 @@ func mergeNetworkAdapterEntries(a, b models.NetworkAdapter) models.NetworkAdapte
return out
}
const maxReasonableNetworkPortCount = 256
func sanitizeNetworkPortCount(v int) int {
if v <= 0 || v > maxReasonableNetworkPortCount {
return 0
}
return v
}
func dedupePCIeDevices(items []models.PCIeDevice) []models.PCIeDevice {
if len(items) <= 1 {
return items

View File

@@ -653,7 +653,7 @@ func (r redfishSnapshotReader) enrichNICsFromNetworkInterfaces(nics *[]models.Ne
macs = append(macs, collectNetworkPortMACs(p)...)
}
(*nics)[idx].MACAddresses = dedupeStrings(macs)
if (*nics)[idx].PortCount == 0 {
if sanitizeNetworkPortCount((*nics)[idx].PortCount) == 0 {
(*nics)[idx].PortCount = len(portDocs)
}
}

View File

@@ -551,6 +551,40 @@ func TestEnrichNICFromPCIeFunctions(t *testing.T) {
}
}
func TestParseNIC_PortCountFromControllerCapabilities(t *testing.T) {
nic := parseNIC(map[string]interface{}{
"Id": "1",
"Controllers": []interface{}{
map[string]interface{}{
"ControllerCapabilities": map[string]interface{}{
"NetworkPortCount": 2,
},
},
},
})
if nic.PortCount != 2 {
t.Fatalf("expected port_count=2, got %d", nic.PortCount)
}
}
func TestParseNIC_DropsUnrealisticPortCount(t *testing.T) {
nic := parseNIC(map[string]interface{}{
"Id": "1",
"Controllers": []interface{}{
map[string]interface{}{
"ControllerCapabilities": map[string]interface{}{
"NetworkPortCount": 825307750,
},
},
},
})
if nic.PortCount != 0 {
t.Fatalf("expected unrealistic port count to be dropped, got %d", nic.PortCount)
}
}
func TestParsePCIeDevice_PrefersFunctionClassOverDeviceType(t *testing.T) {
doc := map[string]interface{}{
"Id": "NIC1",

View File

@@ -193,6 +193,7 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice {
appendDevice(models.HardwareDevice{
Kind: models.DeviceKindGPU,
Slot: gpu.Slot,
Location: gpu.Location,
BDF: gpu.BDF,
DeviceClass: "DisplayController",
VendorID: gpu.VendorID,
@@ -206,12 +207,27 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice {
LinkSpeed: gpu.CurrentLinkSpeed,
MaxLinkWidth: gpu.MaxLinkWidth,
MaxLinkSpeed: gpu.MaxLinkSpeed,
TemperatureC: gpu.Temperature,
Status: gpu.Status,
StatusCheckedAt: gpu.StatusCheckedAt,
StatusChangedAt: gpu.StatusChangedAt,
StatusAtCollect: gpu.StatusAtCollect,
StatusHistory: gpu.StatusHistory,
ErrorDescription: gpu.ErrorDescription,
Details: map[string]any{
"uuid": gpu.UUID,
"video_bios": gpu.VideoBIOS,
"irq": gpu.IRQ,
"bus_type": gpu.BusType,
"dma_size": gpu.DMASize,
"dma_mask": gpu.DMAMask,
"device_minor": gpu.DeviceMinor,
"temperature": gpu.Temperature,
"mem_temperature": gpu.MemTemperature,
"power": gpu.Power,
"max_power": gpu.MaxPower,
"clock_speed": gpu.ClockSpeed,
},
})
}
for _, nic := range hw.NetworkAdapters {
@@ -292,8 +308,14 @@ func dedupeCanonicalDevices(items []models.HardwareDevice) []models.HardwareDevi
continue
}
if curr.score > prev.score {
curr.item = mergeCanonicalDevice(curr.item, prev.item)
curr.score = canonicalScore(curr.item)
byKey[key] = curr
continue
}
prev.item = mergeCanonicalDevice(prev.item, curr.item)
prev.score = canonicalScore(prev.item)
byKey[key] = prev
}
out := make([]models.HardwareDevice, 0, len(order)+len(noKey))
for _, key := range order {
@@ -306,6 +328,95 @@ func dedupeCanonicalDevices(items []models.HardwareDevice) []models.HardwareDevi
return out
}
func mergeCanonicalDevice(primary, secondary models.HardwareDevice) models.HardwareDevice {
fillString := func(dst *string, src string) {
if strings.TrimSpace(*dst) == "" && strings.TrimSpace(src) != "" {
*dst = src
}
}
fillInt := func(dst *int, src int) {
if *dst == 0 && src != 0 {
*dst = src
}
}
fillFloat := func(dst *float64, src float64) {
if *dst == 0 && src != 0 {
*dst = src
}
}
fillString(&primary.Kind, secondary.Kind)
fillString(&primary.Source, secondary.Source)
fillString(&primary.Slot, secondary.Slot)
fillString(&primary.Location, secondary.Location)
fillString(&primary.BDF, secondary.BDF)
fillString(&primary.DeviceClass, secondary.DeviceClass)
fillInt(&primary.VendorID, secondary.VendorID)
fillInt(&primary.DeviceID, secondary.DeviceID)
fillString(&primary.Model, secondary.Model)
fillString(&primary.PartNumber, secondary.PartNumber)
fillString(&primary.Manufacturer, secondary.Manufacturer)
fillString(&primary.SerialNumber, secondary.SerialNumber)
fillString(&primary.Firmware, secondary.Firmware)
fillString(&primary.Type, secondary.Type)
fillString(&primary.Interface, secondary.Interface)
if primary.Present == nil && secondary.Present != nil {
primary.Present = secondary.Present
}
fillInt(&primary.SizeMB, secondary.SizeMB)
fillInt(&primary.SizeGB, secondary.SizeGB)
fillInt(&primary.Cores, secondary.Cores)
fillInt(&primary.Threads, secondary.Threads)
fillInt(&primary.FrequencyMHz, secondary.FrequencyMHz)
fillInt(&primary.MaxFreqMHz, secondary.MaxFreqMHz)
fillInt(&primary.PortCount, secondary.PortCount)
fillString(&primary.PortType, secondary.PortType)
if len(primary.MACAddresses) == 0 && len(secondary.MACAddresses) > 0 {
primary.MACAddresses = secondary.MACAddresses
}
fillInt(&primary.LinkWidth, secondary.LinkWidth)
fillString(&primary.LinkSpeed, secondary.LinkSpeed)
fillInt(&primary.MaxLinkWidth, secondary.MaxLinkWidth)
fillString(&primary.MaxLinkSpeed, secondary.MaxLinkSpeed)
fillInt(&primary.WattageW, secondary.WattageW)
fillString(&primary.InputType, secondary.InputType)
fillInt(&primary.InputPowerW, secondary.InputPowerW)
fillInt(&primary.OutputPowerW, secondary.OutputPowerW)
fillFloat(&primary.InputVoltage, secondary.InputVoltage)
fillInt(&primary.TemperatureC, secondary.TemperatureC)
fillString(&primary.Status, secondary.Status)
if primary.StatusCheckedAt.IsZero() && !secondary.StatusCheckedAt.IsZero() {
primary.StatusCheckedAt = secondary.StatusCheckedAt
}
if primary.StatusChangedAt.IsZero() && !secondary.StatusChangedAt.IsZero() {
primary.StatusChangedAt = secondary.StatusChangedAt
}
if primary.StatusAtCollect == nil && secondary.StatusAtCollect != nil {
primary.StatusAtCollect = secondary.StatusAtCollect
}
if len(primary.StatusHistory) == 0 && len(secondary.StatusHistory) > 0 {
primary.StatusHistory = secondary.StatusHistory
}
fillString(&primary.ErrorDescription, secondary.ErrorDescription)
primary.Details = mergeDetailMaps(primary.Details, secondary.Details)
return primary
}
func mergeDetailMaps(primary, secondary map[string]any) map[string]any {
if len(secondary) == 0 {
return primary
}
if primary == nil {
primary = make(map[string]any, len(secondary))
}
for k, v := range secondary {
if _, exists := primary[k]; !exists {
primary[k] = v
}
}
return primary
}
func canonicalKey(item models.HardwareDevice) string {
if sn := normalizedSerial(item.SerialNumber); sn != "" {
return "sn:" + strings.ToLower(sn)
@@ -483,6 +594,23 @@ func convertPCIeFromDevices(devices []models.HardwareDevice, collectedAt string)
if model == "" {
model = d.PartNumber
}
temperatureC := d.TemperatureC
if temperatureC == 0 {
temperatureC = firstNonZeroInt(
intFromDetailMap(d.Details, "temperature_c"),
intFromDetailMap(d.Details, "temperature"),
)
}
powerW := firstNonZeroInt(
intFromDetailMap(d.Details, "power_w"),
intFromDetailMap(d.Details, "power"),
)
voltageV := firstNonZeroFloat(
floatFromDetailMap(d.Details, "voltage_v"),
floatFromDetailMap(d.Details, "voltage"),
floatFromDetailMap(d.Details, "input_voltage"),
d.InputVoltage,
)
status := normalizeStatus(d.Status, false)
meta := buildStatusMeta(status, d.StatusCheckedAt, d.StatusChangedAt, d.StatusAtCollect, d.StatusHistory, d.ErrorDescription, collectedAt)
result = append(result, ReanimatorPCIe{
@@ -499,6 +627,9 @@ func convertPCIeFromDevices(devices []models.HardwareDevice, collectedAt string)
MaxLinkSpeed: d.MaxLinkSpeed,
SerialNumber: normalizedSerial(d.SerialNumber),
Firmware: d.Firmware,
TemperatureC: temperatureC,
PowerW: powerW,
VoltageV: voltageV,
Status: status,
StatusCheckedAt: meta.StatusCheckedAt,
StatusChangedAt: meta.StatusChangedAt,
@@ -536,6 +667,7 @@ func convertPSUsFromDevices(devices []models.HardwareDevice, collectedAt string)
InputPowerW: d.InputPowerW,
OutputPowerW: d.OutputPowerW,
InputVoltage: d.InputVoltage,
TemperatureC: d.TemperatureC,
StatusCheckedAt: meta.StatusCheckedAt,
StatusChangedAt: meta.StatusChangedAt,
StatusAtCollect: meta.StatusAtCollection,
@@ -804,6 +936,8 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
MaxLinkSpeed: gpu.MaxLinkSpeed,
SerialNumber: serialNumber,
Firmware: gpu.Firmware,
TemperatureC: gpu.Temperature,
PowerW: gpu.Power,
Status: status,
StatusCheckedAt: meta.StatusCheckedAt,
StatusChangedAt: meta.StatusChangedAt,
@@ -954,6 +1088,7 @@ func convertPowerSupplies(psus []models.PSU, collectedAt string) []ReanimatorPSU
InputPowerW: psu.InputPowerW,
OutputPowerW: psu.OutputPowerW,
InputVoltage: psu.InputVoltage,
TemperatureC: psu.TemperatureC,
StatusCheckedAt: meta.StatusCheckedAt,
StatusChangedAt: meta.StatusChangedAt,
StatusAtCollect: meta.StatusAtCollection,
@@ -1286,13 +1421,73 @@ func intFromDetailMap(details map[string]any, key string) int {
switch n := v.(type) {
case int:
return n
case int64:
return int(n)
case int32:
return int(n)
case float64:
return int(n)
case float32:
return int(n)
case string:
i, err := strconv.Atoi(strings.TrimSpace(n))
if err == nil {
return i
}
return 0
default:
return 0
}
}
func floatFromDetailMap(details map[string]any, key string) float64 {
if details == nil {
return 0
}
v, ok := details[key]
if !ok {
return 0
}
switch n := v.(type) {
case float64:
return n
case float32:
return float64(n)
case int:
return float64(n)
case int64:
return float64(n)
case int32:
return float64(n)
case string:
f, err := strconv.ParseFloat(strings.TrimSpace(n), 64)
if err == nil {
return f
}
return 0
default:
return 0
}
}
func firstNonZeroInt(values ...int) int {
for _, v := range values {
if v != 0 {
return v
}
}
return 0
}
func firstNonZeroFloat(values ...float64) float64 {
for _, v := range values {
if v != 0 {
return v
}
}
return 0
}
// inferStorageStatus determines storage device status
func inferStorageStatus(stor models.Storage) string {
if !stor.Present {

View File

@@ -737,4 +737,110 @@ func TestConvertToReanimator_UsesCanonicalDevices(t *testing.T) {
}
}
func TestConvertToReanimator_BindsDeviceVitals(t *testing.T) {
input := &models.AnalysisResult{
Filename: "vitals.json",
Hardware: &models.HardwareConfig{
BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"},
Devices: []models.HardwareDevice{
{
Kind: models.DeviceKindGPU,
Slot: "#GPU0",
Model: "B200 180GB HBM3e",
SerialNumber: "GPU-001",
BDF: "0000:17:00.0",
Details: map[string]any{
"temperature": 71,
"power": 350,
"voltage": 12.2,
},
},
{
Kind: models.DeviceKindPSU,
Slot: "PSU0",
SerialNumber: "PSU-001",
Present: boolPtr(true),
InputPowerW: 1400,
OutputPowerW: 1300,
InputVoltage: 229.5,
TemperatureC: 44,
},
},
},
}
out, err := ConvertToReanimator(input)
if err != nil {
t.Fatalf("ConvertToReanimator() failed: %v", err)
}
if len(out.Hardware.PCIeDevices) != 1 {
t.Fatalf("expected one pcie device, got %d", len(out.Hardware.PCIeDevices))
}
pcie := out.Hardware.PCIeDevices[0]
if pcie.TemperatureC != 71 {
t.Fatalf("expected GPU temperature 71C, got %d", pcie.TemperatureC)
}
if pcie.PowerW != 350 {
t.Fatalf("expected GPU power 350W, got %d", pcie.PowerW)
}
if pcie.VoltageV != 12.2 {
t.Fatalf("expected device voltage 12.2V, got %.2f", pcie.VoltageV)
}
if len(out.Hardware.PowerSupplies) != 1 {
t.Fatalf("expected one PSU, got %d", len(out.Hardware.PowerSupplies))
}
psu := out.Hardware.PowerSupplies[0]
if psu.TemperatureC != 44 {
t.Fatalf("expected PSU temperature 44C, got %d", psu.TemperatureC)
}
}
func TestConvertToReanimator_PreservesVitalsAcrossCanonicalDedup(t *testing.T) {
input := &models.AnalysisResult{
Filename: "dedup-vitals.json",
Hardware: &models.HardwareConfig{
BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"},
PCIeDevices: []models.PCIeDevice{
{
Slot: "#GPU0",
BDF: "0000:17:00.0",
DeviceClass: "3D Controller",
PartNumber: "Generic Display",
Manufacturer: "NVIDIA",
SerialNumber: "GPU-SN-001",
},
},
GPUs: []models.GPU{
{
Slot: "#GPU0",
BDF: "0000:17:00.0",
Model: "B200 180GB HBM3e",
Manufacturer: "NVIDIA",
SerialNumber: "GPU-SN-001",
Temperature: 67,
Power: 330,
Status: "OK",
},
},
},
}
out, err := ConvertToReanimator(input)
if err != nil {
t.Fatalf("ConvertToReanimator() failed: %v", err)
}
if len(out.Hardware.PCIeDevices) != 1 {
t.Fatalf("expected deduped one pcie entry, got %d", len(out.Hardware.PCIeDevices))
}
got := out.Hardware.PCIeDevices[0]
if got.TemperatureC != 67 {
t.Fatalf("expected deduped GPU temperature 67C, got %d", got.TemperatureC)
}
if got.PowerW != 330 {
t.Fatalf("expected deduped GPU power 330W, got %d", got.PowerW)
}
}
func boolPtr(v bool) *bool { return &v }

View File

@@ -118,6 +118,9 @@ type ReanimatorPCIe struct {
MaxLinkSpeed string `json:"max_link_speed,omitempty"`
SerialNumber string `json:"serial_number,omitempty"`
Firmware string `json:"firmware,omitempty"`
TemperatureC int `json:"temperature_c,omitempty"`
PowerW int `json:"power_w,omitempty"`
VoltageV float64 `json:"voltage_v,omitempty"`
Status string `json:"status,omitempty"`
StatusCheckedAt string `json:"status_checked_at,omitempty"`
StatusChangedAt string `json:"status_changed_at,omitempty"`
@@ -141,6 +144,7 @@ type ReanimatorPSU struct {
InputPowerW int `json:"input_power_w,omitempty"`
OutputPowerW int `json:"output_power_w,omitempty"`
InputVoltage float64 `json:"input_voltage,omitempty"`
TemperatureC int `json:"temperature_c,omitempty"`
StatusCheckedAt string `json:"status_checked_at,omitempty"`
StatusChangedAt string `json:"status_changed_at,omitempty"`
StatusAtCollect *ReanimatorStatusAtCollection `json:"status_at_collection,omitempty"`