Fix NIC port count handling and apply pending exporter updates
This commit is contained in:
@@ -2361,7 +2361,7 @@ func parseNIC(doc map[string]interface{}) models.NetworkAdapter {
|
||||
location = firstNonEmpty(location, redfishLocationLabel(ctrl["Location"]))
|
||||
firmware = asString(ctrl["FirmwarePackageVersion"])
|
||||
if caps, ok := ctrl["ControllerCapabilities"].(map[string]interface{}); ok {
|
||||
portCount = asInt(caps["NetworkPortCount"])
|
||||
portCount = sanitizeNetworkPortCount(asInt(caps["NetworkPortCount"]))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3406,6 +3406,8 @@ func mergeNetworkAdapterEntries(a, b models.NetworkAdapter) models.NetworkAdapte
|
||||
base, donor = donor, base
|
||||
}
|
||||
out := base
|
||||
out.PortCount = sanitizeNetworkPortCount(out.PortCount)
|
||||
donor.PortCount = sanitizeNetworkPortCount(donor.PortCount)
|
||||
if strings.TrimSpace(out.Slot) == "" && strings.TrimSpace(donor.Slot) != "" {
|
||||
out.Slot = donor.Slot
|
||||
}
|
||||
@@ -3452,6 +3454,15 @@ func mergeNetworkAdapterEntries(a, b models.NetworkAdapter) models.NetworkAdapte
|
||||
return out
|
||||
}
|
||||
|
||||
const maxReasonableNetworkPortCount = 256
|
||||
|
||||
func sanitizeNetworkPortCount(v int) int {
|
||||
if v <= 0 || v > maxReasonableNetworkPortCount {
|
||||
return 0
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func dedupePCIeDevices(items []models.PCIeDevice) []models.PCIeDevice {
|
||||
if len(items) <= 1 {
|
||||
return items
|
||||
|
||||
@@ -653,7 +653,7 @@ func (r redfishSnapshotReader) enrichNICsFromNetworkInterfaces(nics *[]models.Ne
|
||||
macs = append(macs, collectNetworkPortMACs(p)...)
|
||||
}
|
||||
(*nics)[idx].MACAddresses = dedupeStrings(macs)
|
||||
if (*nics)[idx].PortCount == 0 {
|
||||
if sanitizeNetworkPortCount((*nics)[idx].PortCount) == 0 {
|
||||
(*nics)[idx].PortCount = len(portDocs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -551,6 +551,40 @@ func TestEnrichNICFromPCIeFunctions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNIC_PortCountFromControllerCapabilities(t *testing.T) {
|
||||
nic := parseNIC(map[string]interface{}{
|
||||
"Id": "1",
|
||||
"Controllers": []interface{}{
|
||||
map[string]interface{}{
|
||||
"ControllerCapabilities": map[string]interface{}{
|
||||
"NetworkPortCount": 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if nic.PortCount != 2 {
|
||||
t.Fatalf("expected port_count=2, got %d", nic.PortCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNIC_DropsUnrealisticPortCount(t *testing.T) {
|
||||
nic := parseNIC(map[string]interface{}{
|
||||
"Id": "1",
|
||||
"Controllers": []interface{}{
|
||||
map[string]interface{}{
|
||||
"ControllerCapabilities": map[string]interface{}{
|
||||
"NetworkPortCount": 825307750,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if nic.PortCount != 0 {
|
||||
t.Fatalf("expected unrealistic port count to be dropped, got %d", nic.PortCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePCIeDevice_PrefersFunctionClassOverDeviceType(t *testing.T) {
|
||||
doc := map[string]interface{}{
|
||||
"Id": "NIC1",
|
||||
|
||||
@@ -193,6 +193,7 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice {
|
||||
appendDevice(models.HardwareDevice{
|
||||
Kind: models.DeviceKindGPU,
|
||||
Slot: gpu.Slot,
|
||||
Location: gpu.Location,
|
||||
BDF: gpu.BDF,
|
||||
DeviceClass: "DisplayController",
|
||||
VendorID: gpu.VendorID,
|
||||
@@ -206,12 +207,27 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice {
|
||||
LinkSpeed: gpu.CurrentLinkSpeed,
|
||||
MaxLinkWidth: gpu.MaxLinkWidth,
|
||||
MaxLinkSpeed: gpu.MaxLinkSpeed,
|
||||
TemperatureC: gpu.Temperature,
|
||||
Status: gpu.Status,
|
||||
StatusCheckedAt: gpu.StatusCheckedAt,
|
||||
StatusChangedAt: gpu.StatusChangedAt,
|
||||
StatusAtCollect: gpu.StatusAtCollect,
|
||||
StatusHistory: gpu.StatusHistory,
|
||||
ErrorDescription: gpu.ErrorDescription,
|
||||
Details: map[string]any{
|
||||
"uuid": gpu.UUID,
|
||||
"video_bios": gpu.VideoBIOS,
|
||||
"irq": gpu.IRQ,
|
||||
"bus_type": gpu.BusType,
|
||||
"dma_size": gpu.DMASize,
|
||||
"dma_mask": gpu.DMAMask,
|
||||
"device_minor": gpu.DeviceMinor,
|
||||
"temperature": gpu.Temperature,
|
||||
"mem_temperature": gpu.MemTemperature,
|
||||
"power": gpu.Power,
|
||||
"max_power": gpu.MaxPower,
|
||||
"clock_speed": gpu.ClockSpeed,
|
||||
},
|
||||
})
|
||||
}
|
||||
for _, nic := range hw.NetworkAdapters {
|
||||
@@ -292,8 +308,14 @@ func dedupeCanonicalDevices(items []models.HardwareDevice) []models.HardwareDevi
|
||||
continue
|
||||
}
|
||||
if curr.score > prev.score {
|
||||
curr.item = mergeCanonicalDevice(curr.item, prev.item)
|
||||
curr.score = canonicalScore(curr.item)
|
||||
byKey[key] = curr
|
||||
continue
|
||||
}
|
||||
prev.item = mergeCanonicalDevice(prev.item, curr.item)
|
||||
prev.score = canonicalScore(prev.item)
|
||||
byKey[key] = prev
|
||||
}
|
||||
out := make([]models.HardwareDevice, 0, len(order)+len(noKey))
|
||||
for _, key := range order {
|
||||
@@ -306,6 +328,95 @@ func dedupeCanonicalDevices(items []models.HardwareDevice) []models.HardwareDevi
|
||||
return out
|
||||
}
|
||||
|
||||
func mergeCanonicalDevice(primary, secondary models.HardwareDevice) models.HardwareDevice {
|
||||
fillString := func(dst *string, src string) {
|
||||
if strings.TrimSpace(*dst) == "" && strings.TrimSpace(src) != "" {
|
||||
*dst = src
|
||||
}
|
||||
}
|
||||
fillInt := func(dst *int, src int) {
|
||||
if *dst == 0 && src != 0 {
|
||||
*dst = src
|
||||
}
|
||||
}
|
||||
fillFloat := func(dst *float64, src float64) {
|
||||
if *dst == 0 && src != 0 {
|
||||
*dst = src
|
||||
}
|
||||
}
|
||||
|
||||
fillString(&primary.Kind, secondary.Kind)
|
||||
fillString(&primary.Source, secondary.Source)
|
||||
fillString(&primary.Slot, secondary.Slot)
|
||||
fillString(&primary.Location, secondary.Location)
|
||||
fillString(&primary.BDF, secondary.BDF)
|
||||
fillString(&primary.DeviceClass, secondary.DeviceClass)
|
||||
fillInt(&primary.VendorID, secondary.VendorID)
|
||||
fillInt(&primary.DeviceID, secondary.DeviceID)
|
||||
fillString(&primary.Model, secondary.Model)
|
||||
fillString(&primary.PartNumber, secondary.PartNumber)
|
||||
fillString(&primary.Manufacturer, secondary.Manufacturer)
|
||||
fillString(&primary.SerialNumber, secondary.SerialNumber)
|
||||
fillString(&primary.Firmware, secondary.Firmware)
|
||||
fillString(&primary.Type, secondary.Type)
|
||||
fillString(&primary.Interface, secondary.Interface)
|
||||
if primary.Present == nil && secondary.Present != nil {
|
||||
primary.Present = secondary.Present
|
||||
}
|
||||
fillInt(&primary.SizeMB, secondary.SizeMB)
|
||||
fillInt(&primary.SizeGB, secondary.SizeGB)
|
||||
fillInt(&primary.Cores, secondary.Cores)
|
||||
fillInt(&primary.Threads, secondary.Threads)
|
||||
fillInt(&primary.FrequencyMHz, secondary.FrequencyMHz)
|
||||
fillInt(&primary.MaxFreqMHz, secondary.MaxFreqMHz)
|
||||
fillInt(&primary.PortCount, secondary.PortCount)
|
||||
fillString(&primary.PortType, secondary.PortType)
|
||||
if len(primary.MACAddresses) == 0 && len(secondary.MACAddresses) > 0 {
|
||||
primary.MACAddresses = secondary.MACAddresses
|
||||
}
|
||||
fillInt(&primary.LinkWidth, secondary.LinkWidth)
|
||||
fillString(&primary.LinkSpeed, secondary.LinkSpeed)
|
||||
fillInt(&primary.MaxLinkWidth, secondary.MaxLinkWidth)
|
||||
fillString(&primary.MaxLinkSpeed, secondary.MaxLinkSpeed)
|
||||
fillInt(&primary.WattageW, secondary.WattageW)
|
||||
fillString(&primary.InputType, secondary.InputType)
|
||||
fillInt(&primary.InputPowerW, secondary.InputPowerW)
|
||||
fillInt(&primary.OutputPowerW, secondary.OutputPowerW)
|
||||
fillFloat(&primary.InputVoltage, secondary.InputVoltage)
|
||||
fillInt(&primary.TemperatureC, secondary.TemperatureC)
|
||||
fillString(&primary.Status, secondary.Status)
|
||||
if primary.StatusCheckedAt.IsZero() && !secondary.StatusCheckedAt.IsZero() {
|
||||
primary.StatusCheckedAt = secondary.StatusCheckedAt
|
||||
}
|
||||
if primary.StatusChangedAt.IsZero() && !secondary.StatusChangedAt.IsZero() {
|
||||
primary.StatusChangedAt = secondary.StatusChangedAt
|
||||
}
|
||||
if primary.StatusAtCollect == nil && secondary.StatusAtCollect != nil {
|
||||
primary.StatusAtCollect = secondary.StatusAtCollect
|
||||
}
|
||||
if len(primary.StatusHistory) == 0 && len(secondary.StatusHistory) > 0 {
|
||||
primary.StatusHistory = secondary.StatusHistory
|
||||
}
|
||||
fillString(&primary.ErrorDescription, secondary.ErrorDescription)
|
||||
primary.Details = mergeDetailMaps(primary.Details, secondary.Details)
|
||||
return primary
|
||||
}
|
||||
|
||||
func mergeDetailMaps(primary, secondary map[string]any) map[string]any {
|
||||
if len(secondary) == 0 {
|
||||
return primary
|
||||
}
|
||||
if primary == nil {
|
||||
primary = make(map[string]any, len(secondary))
|
||||
}
|
||||
for k, v := range secondary {
|
||||
if _, exists := primary[k]; !exists {
|
||||
primary[k] = v
|
||||
}
|
||||
}
|
||||
return primary
|
||||
}
|
||||
|
||||
func canonicalKey(item models.HardwareDevice) string {
|
||||
if sn := normalizedSerial(item.SerialNumber); sn != "" {
|
||||
return "sn:" + strings.ToLower(sn)
|
||||
@@ -483,6 +594,23 @@ func convertPCIeFromDevices(devices []models.HardwareDevice, collectedAt string)
|
||||
if model == "" {
|
||||
model = d.PartNumber
|
||||
}
|
||||
temperatureC := d.TemperatureC
|
||||
if temperatureC == 0 {
|
||||
temperatureC = firstNonZeroInt(
|
||||
intFromDetailMap(d.Details, "temperature_c"),
|
||||
intFromDetailMap(d.Details, "temperature"),
|
||||
)
|
||||
}
|
||||
powerW := firstNonZeroInt(
|
||||
intFromDetailMap(d.Details, "power_w"),
|
||||
intFromDetailMap(d.Details, "power"),
|
||||
)
|
||||
voltageV := firstNonZeroFloat(
|
||||
floatFromDetailMap(d.Details, "voltage_v"),
|
||||
floatFromDetailMap(d.Details, "voltage"),
|
||||
floatFromDetailMap(d.Details, "input_voltage"),
|
||||
d.InputVoltage,
|
||||
)
|
||||
status := normalizeStatus(d.Status, false)
|
||||
meta := buildStatusMeta(status, d.StatusCheckedAt, d.StatusChangedAt, d.StatusAtCollect, d.StatusHistory, d.ErrorDescription, collectedAt)
|
||||
result = append(result, ReanimatorPCIe{
|
||||
@@ -499,6 +627,9 @@ func convertPCIeFromDevices(devices []models.HardwareDevice, collectedAt string)
|
||||
MaxLinkSpeed: d.MaxLinkSpeed,
|
||||
SerialNumber: normalizedSerial(d.SerialNumber),
|
||||
Firmware: d.Firmware,
|
||||
TemperatureC: temperatureC,
|
||||
PowerW: powerW,
|
||||
VoltageV: voltageV,
|
||||
Status: status,
|
||||
StatusCheckedAt: meta.StatusCheckedAt,
|
||||
StatusChangedAt: meta.StatusChangedAt,
|
||||
@@ -536,6 +667,7 @@ func convertPSUsFromDevices(devices []models.HardwareDevice, collectedAt string)
|
||||
InputPowerW: d.InputPowerW,
|
||||
OutputPowerW: d.OutputPowerW,
|
||||
InputVoltage: d.InputVoltage,
|
||||
TemperatureC: d.TemperatureC,
|
||||
StatusCheckedAt: meta.StatusCheckedAt,
|
||||
StatusChangedAt: meta.StatusChangedAt,
|
||||
StatusAtCollect: meta.StatusAtCollection,
|
||||
@@ -804,6 +936,8 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
|
||||
MaxLinkSpeed: gpu.MaxLinkSpeed,
|
||||
SerialNumber: serialNumber,
|
||||
Firmware: gpu.Firmware,
|
||||
TemperatureC: gpu.Temperature,
|
||||
PowerW: gpu.Power,
|
||||
Status: status,
|
||||
StatusCheckedAt: meta.StatusCheckedAt,
|
||||
StatusChangedAt: meta.StatusChangedAt,
|
||||
@@ -954,6 +1088,7 @@ func convertPowerSupplies(psus []models.PSU, collectedAt string) []ReanimatorPSU
|
||||
InputPowerW: psu.InputPowerW,
|
||||
OutputPowerW: psu.OutputPowerW,
|
||||
InputVoltage: psu.InputVoltage,
|
||||
TemperatureC: psu.TemperatureC,
|
||||
StatusCheckedAt: meta.StatusCheckedAt,
|
||||
StatusChangedAt: meta.StatusChangedAt,
|
||||
StatusAtCollect: meta.StatusAtCollection,
|
||||
@@ -1286,13 +1421,73 @@ func intFromDetailMap(details map[string]any, key string) int {
|
||||
switch n := v.(type) {
|
||||
case int:
|
||||
return n
|
||||
case int64:
|
||||
return int(n)
|
||||
case int32:
|
||||
return int(n)
|
||||
case float64:
|
||||
return int(n)
|
||||
case float32:
|
||||
return int(n)
|
||||
case string:
|
||||
i, err := strconv.Atoi(strings.TrimSpace(n))
|
||||
if err == nil {
|
||||
return i
|
||||
}
|
||||
return 0
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func floatFromDetailMap(details map[string]any, key string) float64 {
|
||||
if details == nil {
|
||||
return 0
|
||||
}
|
||||
v, ok := details[key]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return n
|
||||
case float32:
|
||||
return float64(n)
|
||||
case int:
|
||||
return float64(n)
|
||||
case int64:
|
||||
return float64(n)
|
||||
case int32:
|
||||
return float64(n)
|
||||
case string:
|
||||
f, err := strconv.ParseFloat(strings.TrimSpace(n), 64)
|
||||
if err == nil {
|
||||
return f
|
||||
}
|
||||
return 0
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func firstNonZeroInt(values ...int) int {
|
||||
for _, v := range values {
|
||||
if v != 0 {
|
||||
return v
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func firstNonZeroFloat(values ...float64) float64 {
|
||||
for _, v := range values {
|
||||
if v != 0 {
|
||||
return v
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// inferStorageStatus determines storage device status
|
||||
func inferStorageStatus(stor models.Storage) string {
|
||||
if !stor.Present {
|
||||
|
||||
@@ -737,4 +737,110 @@ func TestConvertToReanimator_UsesCanonicalDevices(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertToReanimator_BindsDeviceVitals(t *testing.T) {
|
||||
input := &models.AnalysisResult{
|
||||
Filename: "vitals.json",
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"},
|
||||
Devices: []models.HardwareDevice{
|
||||
{
|
||||
Kind: models.DeviceKindGPU,
|
||||
Slot: "#GPU0",
|
||||
Model: "B200 180GB HBM3e",
|
||||
SerialNumber: "GPU-001",
|
||||
BDF: "0000:17:00.0",
|
||||
Details: map[string]any{
|
||||
"temperature": 71,
|
||||
"power": 350,
|
||||
"voltage": 12.2,
|
||||
},
|
||||
},
|
||||
{
|
||||
Kind: models.DeviceKindPSU,
|
||||
Slot: "PSU0",
|
||||
SerialNumber: "PSU-001",
|
||||
Present: boolPtr(true),
|
||||
InputPowerW: 1400,
|
||||
OutputPowerW: 1300,
|
||||
InputVoltage: 229.5,
|
||||
TemperatureC: 44,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
out, err := ConvertToReanimator(input)
|
||||
if err != nil {
|
||||
t.Fatalf("ConvertToReanimator() failed: %v", err)
|
||||
}
|
||||
|
||||
if len(out.Hardware.PCIeDevices) != 1 {
|
||||
t.Fatalf("expected one pcie device, got %d", len(out.Hardware.PCIeDevices))
|
||||
}
|
||||
pcie := out.Hardware.PCIeDevices[0]
|
||||
if pcie.TemperatureC != 71 {
|
||||
t.Fatalf("expected GPU temperature 71C, got %d", pcie.TemperatureC)
|
||||
}
|
||||
if pcie.PowerW != 350 {
|
||||
t.Fatalf("expected GPU power 350W, got %d", pcie.PowerW)
|
||||
}
|
||||
if pcie.VoltageV != 12.2 {
|
||||
t.Fatalf("expected device voltage 12.2V, got %.2f", pcie.VoltageV)
|
||||
}
|
||||
|
||||
if len(out.Hardware.PowerSupplies) != 1 {
|
||||
t.Fatalf("expected one PSU, got %d", len(out.Hardware.PowerSupplies))
|
||||
}
|
||||
psu := out.Hardware.PowerSupplies[0]
|
||||
if psu.TemperatureC != 44 {
|
||||
t.Fatalf("expected PSU temperature 44C, got %d", psu.TemperatureC)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertToReanimator_PreservesVitalsAcrossCanonicalDedup(t *testing.T) {
|
||||
input := &models.AnalysisResult{
|
||||
Filename: "dedup-vitals.json",
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"},
|
||||
PCIeDevices: []models.PCIeDevice{
|
||||
{
|
||||
Slot: "#GPU0",
|
||||
BDF: "0000:17:00.0",
|
||||
DeviceClass: "3D Controller",
|
||||
PartNumber: "Generic Display",
|
||||
Manufacturer: "NVIDIA",
|
||||
SerialNumber: "GPU-SN-001",
|
||||
},
|
||||
},
|
||||
GPUs: []models.GPU{
|
||||
{
|
||||
Slot: "#GPU0",
|
||||
BDF: "0000:17:00.0",
|
||||
Model: "B200 180GB HBM3e",
|
||||
Manufacturer: "NVIDIA",
|
||||
SerialNumber: "GPU-SN-001",
|
||||
Temperature: 67,
|
||||
Power: 330,
|
||||
Status: "OK",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
out, err := ConvertToReanimator(input)
|
||||
if err != nil {
|
||||
t.Fatalf("ConvertToReanimator() failed: %v", err)
|
||||
}
|
||||
if len(out.Hardware.PCIeDevices) != 1 {
|
||||
t.Fatalf("expected deduped one pcie entry, got %d", len(out.Hardware.PCIeDevices))
|
||||
}
|
||||
got := out.Hardware.PCIeDevices[0]
|
||||
if got.TemperatureC != 67 {
|
||||
t.Fatalf("expected deduped GPU temperature 67C, got %d", got.TemperatureC)
|
||||
}
|
||||
if got.PowerW != 330 {
|
||||
t.Fatalf("expected deduped GPU power 330W, got %d", got.PowerW)
|
||||
}
|
||||
}
|
||||
|
||||
func boolPtr(v bool) *bool { return &v }
|
||||
|
||||
@@ -118,6 +118,9 @@ type ReanimatorPCIe struct {
|
||||
MaxLinkSpeed string `json:"max_link_speed,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
TemperatureC int `json:"temperature_c,omitempty"`
|
||||
PowerW int `json:"power_w,omitempty"`
|
||||
VoltageV float64 `json:"voltage_v,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||
@@ -141,6 +144,7 @@ type ReanimatorPSU struct {
|
||||
InputPowerW int `json:"input_power_w,omitempty"`
|
||||
OutputPowerW int `json:"output_power_w,omitempty"`
|
||||
InputVoltage float64 `json:"input_voltage,omitempty"`
|
||||
TemperatureC int `json:"temperature_c,omitempty"`
|
||||
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *ReanimatorStatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
|
||||
Reference in New Issue
Block a user