From fe5da1dbd79a3c0486a8994094343979861a9614 Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Sat, 28 Feb 2026 18:42:01 +0300 Subject: [PATCH] Fix NIC port count handling and apply pending exporter updates --- internal/collector/redfish.go | 13 +- internal/collector/redfish_replay.go | 2 +- internal/collector/redfish_test.go | 34 +++ internal/exporter/reanimator_converter.go | 195 ++++++++++++++++++ .../exporter/reanimator_converter_test.go | 106 ++++++++++ internal/exporter/reanimator_models.go | 4 + web/static/js/app.js | 12 +- 7 files changed, 362 insertions(+), 4 deletions(-) diff --git a/internal/collector/redfish.go b/internal/collector/redfish.go index 207a53a..7807e18 100644 --- a/internal/collector/redfish.go +++ b/internal/collector/redfish.go @@ -2361,7 +2361,7 @@ func parseNIC(doc map[string]interface{}) models.NetworkAdapter { location = firstNonEmpty(location, redfishLocationLabel(ctrl["Location"])) firmware = asString(ctrl["FirmwarePackageVersion"]) if caps, ok := ctrl["ControllerCapabilities"].(map[string]interface{}); ok { - portCount = asInt(caps["NetworkPortCount"]) + portCount = sanitizeNetworkPortCount(asInt(caps["NetworkPortCount"])) } } } @@ -3406,6 +3406,8 @@ func mergeNetworkAdapterEntries(a, b models.NetworkAdapter) models.NetworkAdapte base, donor = donor, base } out := base + out.PortCount = sanitizeNetworkPortCount(out.PortCount) + donor.PortCount = sanitizeNetworkPortCount(donor.PortCount) if strings.TrimSpace(out.Slot) == "" && strings.TrimSpace(donor.Slot) != "" { out.Slot = donor.Slot } @@ -3452,6 +3454,15 @@ func mergeNetworkAdapterEntries(a, b models.NetworkAdapter) models.NetworkAdapte return out } +const maxReasonableNetworkPortCount = 256 + +func sanitizeNetworkPortCount(v int) int { + if v <= 0 || v > maxReasonableNetworkPortCount { + return 0 + } + return v +} + func dedupePCIeDevices(items []models.PCIeDevice) []models.PCIeDevice { if len(items) <= 1 { return items diff --git a/internal/collector/redfish_replay.go b/internal/collector/redfish_replay.go index d6df873..072230b 100644 --- a/internal/collector/redfish_replay.go +++ b/internal/collector/redfish_replay.go @@ -653,7 +653,7 @@ func (r redfishSnapshotReader) enrichNICsFromNetworkInterfaces(nics *[]models.Ne macs = append(macs, collectNetworkPortMACs(p)...) } (*nics)[idx].MACAddresses = dedupeStrings(macs) - if (*nics)[idx].PortCount == 0 { + if sanitizeNetworkPortCount((*nics)[idx].PortCount) == 0 { (*nics)[idx].PortCount = len(portDocs) } } diff --git a/internal/collector/redfish_test.go b/internal/collector/redfish_test.go index e297ed0..c622ff1 100644 --- a/internal/collector/redfish_test.go +++ b/internal/collector/redfish_test.go @@ -551,6 +551,40 @@ func TestEnrichNICFromPCIeFunctions(t *testing.T) { } } +func TestParseNIC_PortCountFromControllerCapabilities(t *testing.T) { + nic := parseNIC(map[string]interface{}{ + "Id": "1", + "Controllers": []interface{}{ + map[string]interface{}{ + "ControllerCapabilities": map[string]interface{}{ + "NetworkPortCount": 2, + }, + }, + }, + }) + + if nic.PortCount != 2 { + t.Fatalf("expected port_count=2, got %d", nic.PortCount) + } +} + +func TestParseNIC_DropsUnrealisticPortCount(t *testing.T) { + nic := parseNIC(map[string]interface{}{ + "Id": "1", + "Controllers": []interface{}{ + map[string]interface{}{ + "ControllerCapabilities": map[string]interface{}{ + "NetworkPortCount": 825307750, + }, + }, + }, + }) + + if nic.PortCount != 0 { + t.Fatalf("expected unrealistic port count to be dropped, got %d", nic.PortCount) + } +} + func TestParsePCIeDevice_PrefersFunctionClassOverDeviceType(t *testing.T) { doc := map[string]interface{}{ "Id": "NIC1", diff --git a/internal/exporter/reanimator_converter.go b/internal/exporter/reanimator_converter.go index ea5d6c4..cfbefd0 100644 --- a/internal/exporter/reanimator_converter.go +++ b/internal/exporter/reanimator_converter.go @@ -193,6 +193,7 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice { appendDevice(models.HardwareDevice{ Kind: models.DeviceKindGPU, Slot: gpu.Slot, + Location: gpu.Location, BDF: gpu.BDF, DeviceClass: "DisplayController", VendorID: gpu.VendorID, @@ -206,12 +207,27 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice { LinkSpeed: gpu.CurrentLinkSpeed, MaxLinkWidth: gpu.MaxLinkWidth, MaxLinkSpeed: gpu.MaxLinkSpeed, + TemperatureC: gpu.Temperature, Status: gpu.Status, StatusCheckedAt: gpu.StatusCheckedAt, StatusChangedAt: gpu.StatusChangedAt, StatusAtCollect: gpu.StatusAtCollect, StatusHistory: gpu.StatusHistory, ErrorDescription: gpu.ErrorDescription, + Details: map[string]any{ + "uuid": gpu.UUID, + "video_bios": gpu.VideoBIOS, + "irq": gpu.IRQ, + "bus_type": gpu.BusType, + "dma_size": gpu.DMASize, + "dma_mask": gpu.DMAMask, + "device_minor": gpu.DeviceMinor, + "temperature": gpu.Temperature, + "mem_temperature": gpu.MemTemperature, + "power": gpu.Power, + "max_power": gpu.MaxPower, + "clock_speed": gpu.ClockSpeed, + }, }) } for _, nic := range hw.NetworkAdapters { @@ -292,8 +308,14 @@ func dedupeCanonicalDevices(items []models.HardwareDevice) []models.HardwareDevi continue } if curr.score > prev.score { + curr.item = mergeCanonicalDevice(curr.item, prev.item) + curr.score = canonicalScore(curr.item) byKey[key] = curr + continue } + prev.item = mergeCanonicalDevice(prev.item, curr.item) + prev.score = canonicalScore(prev.item) + byKey[key] = prev } out := make([]models.HardwareDevice, 0, len(order)+len(noKey)) for _, key := range order { @@ -306,6 +328,95 @@ func dedupeCanonicalDevices(items []models.HardwareDevice) []models.HardwareDevi return out } +func mergeCanonicalDevice(primary, secondary models.HardwareDevice) models.HardwareDevice { + fillString := func(dst *string, src string) { + if strings.TrimSpace(*dst) == "" && strings.TrimSpace(src) != "" { + *dst = src + } + } + fillInt := func(dst *int, src int) { + if *dst == 0 && src != 0 { + *dst = src + } + } + fillFloat := func(dst *float64, src float64) { + if *dst == 0 && src != 0 { + *dst = src + } + } + + fillString(&primary.Kind, secondary.Kind) + fillString(&primary.Source, secondary.Source) + fillString(&primary.Slot, secondary.Slot) + fillString(&primary.Location, secondary.Location) + fillString(&primary.BDF, secondary.BDF) + fillString(&primary.DeviceClass, secondary.DeviceClass) + fillInt(&primary.VendorID, secondary.VendorID) + fillInt(&primary.DeviceID, secondary.DeviceID) + fillString(&primary.Model, secondary.Model) + fillString(&primary.PartNumber, secondary.PartNumber) + fillString(&primary.Manufacturer, secondary.Manufacturer) + fillString(&primary.SerialNumber, secondary.SerialNumber) + fillString(&primary.Firmware, secondary.Firmware) + fillString(&primary.Type, secondary.Type) + fillString(&primary.Interface, secondary.Interface) + if primary.Present == nil && secondary.Present != nil { + primary.Present = secondary.Present + } + fillInt(&primary.SizeMB, secondary.SizeMB) + fillInt(&primary.SizeGB, secondary.SizeGB) + fillInt(&primary.Cores, secondary.Cores) + fillInt(&primary.Threads, secondary.Threads) + fillInt(&primary.FrequencyMHz, secondary.FrequencyMHz) + fillInt(&primary.MaxFreqMHz, secondary.MaxFreqMHz) + fillInt(&primary.PortCount, secondary.PortCount) + fillString(&primary.PortType, secondary.PortType) + if len(primary.MACAddresses) == 0 && len(secondary.MACAddresses) > 0 { + primary.MACAddresses = secondary.MACAddresses + } + fillInt(&primary.LinkWidth, secondary.LinkWidth) + fillString(&primary.LinkSpeed, secondary.LinkSpeed) + fillInt(&primary.MaxLinkWidth, secondary.MaxLinkWidth) + fillString(&primary.MaxLinkSpeed, secondary.MaxLinkSpeed) + fillInt(&primary.WattageW, secondary.WattageW) + fillString(&primary.InputType, secondary.InputType) + fillInt(&primary.InputPowerW, secondary.InputPowerW) + fillInt(&primary.OutputPowerW, secondary.OutputPowerW) + fillFloat(&primary.InputVoltage, secondary.InputVoltage) + fillInt(&primary.TemperatureC, secondary.TemperatureC) + fillString(&primary.Status, secondary.Status) + if primary.StatusCheckedAt.IsZero() && !secondary.StatusCheckedAt.IsZero() { + primary.StatusCheckedAt = secondary.StatusCheckedAt + } + if primary.StatusChangedAt.IsZero() && !secondary.StatusChangedAt.IsZero() { + primary.StatusChangedAt = secondary.StatusChangedAt + } + if primary.StatusAtCollect == nil && secondary.StatusAtCollect != nil { + primary.StatusAtCollect = secondary.StatusAtCollect + } + if len(primary.StatusHistory) == 0 && len(secondary.StatusHistory) > 0 { + primary.StatusHistory = secondary.StatusHistory + } + fillString(&primary.ErrorDescription, secondary.ErrorDescription) + primary.Details = mergeDetailMaps(primary.Details, secondary.Details) + return primary +} + +func mergeDetailMaps(primary, secondary map[string]any) map[string]any { + if len(secondary) == 0 { + return primary + } + if primary == nil { + primary = make(map[string]any, len(secondary)) + } + for k, v := range secondary { + if _, exists := primary[k]; !exists { + primary[k] = v + } + } + return primary +} + func canonicalKey(item models.HardwareDevice) string { if sn := normalizedSerial(item.SerialNumber); sn != "" { return "sn:" + strings.ToLower(sn) @@ -483,6 +594,23 @@ func convertPCIeFromDevices(devices []models.HardwareDevice, collectedAt string) if model == "" { model = d.PartNumber } + temperatureC := d.TemperatureC + if temperatureC == 0 { + temperatureC = firstNonZeroInt( + intFromDetailMap(d.Details, "temperature_c"), + intFromDetailMap(d.Details, "temperature"), + ) + } + powerW := firstNonZeroInt( + intFromDetailMap(d.Details, "power_w"), + intFromDetailMap(d.Details, "power"), + ) + voltageV := firstNonZeroFloat( + floatFromDetailMap(d.Details, "voltage_v"), + floatFromDetailMap(d.Details, "voltage"), + floatFromDetailMap(d.Details, "input_voltage"), + d.InputVoltage, + ) status := normalizeStatus(d.Status, false) meta := buildStatusMeta(status, d.StatusCheckedAt, d.StatusChangedAt, d.StatusAtCollect, d.StatusHistory, d.ErrorDescription, collectedAt) result = append(result, ReanimatorPCIe{ @@ -499,6 +627,9 @@ func convertPCIeFromDevices(devices []models.HardwareDevice, collectedAt string) MaxLinkSpeed: d.MaxLinkSpeed, SerialNumber: normalizedSerial(d.SerialNumber), Firmware: d.Firmware, + TemperatureC: temperatureC, + PowerW: powerW, + VoltageV: voltageV, Status: status, StatusCheckedAt: meta.StatusCheckedAt, StatusChangedAt: meta.StatusChangedAt, @@ -536,6 +667,7 @@ func convertPSUsFromDevices(devices []models.HardwareDevice, collectedAt string) InputPowerW: d.InputPowerW, OutputPowerW: d.OutputPowerW, InputVoltage: d.InputVoltage, + TemperatureC: d.TemperatureC, StatusCheckedAt: meta.StatusCheckedAt, StatusChangedAt: meta.StatusChangedAt, StatusAtCollect: meta.StatusAtCollection, @@ -804,6 +936,8 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima MaxLinkSpeed: gpu.MaxLinkSpeed, SerialNumber: serialNumber, Firmware: gpu.Firmware, + TemperatureC: gpu.Temperature, + PowerW: gpu.Power, Status: status, StatusCheckedAt: meta.StatusCheckedAt, StatusChangedAt: meta.StatusChangedAt, @@ -954,6 +1088,7 @@ func convertPowerSupplies(psus []models.PSU, collectedAt string) []ReanimatorPSU InputPowerW: psu.InputPowerW, OutputPowerW: psu.OutputPowerW, InputVoltage: psu.InputVoltage, + TemperatureC: psu.TemperatureC, StatusCheckedAt: meta.StatusCheckedAt, StatusChangedAt: meta.StatusChangedAt, StatusAtCollect: meta.StatusAtCollection, @@ -1286,13 +1421,73 @@ func intFromDetailMap(details map[string]any, key string) int { switch n := v.(type) { case int: return n + case int64: + return int(n) + case int32: + return int(n) case float64: return int(n) + case float32: + return int(n) + case string: + i, err := strconv.Atoi(strings.TrimSpace(n)) + if err == nil { + return i + } + return 0 default: return 0 } } +func floatFromDetailMap(details map[string]any, key string) float64 { + if details == nil { + return 0 + } + v, ok := details[key] + if !ok { + return 0 + } + switch n := v.(type) { + case float64: + return n + case float32: + return float64(n) + case int: + return float64(n) + case int64: + return float64(n) + case int32: + return float64(n) + case string: + f, err := strconv.ParseFloat(strings.TrimSpace(n), 64) + if err == nil { + return f + } + return 0 + default: + return 0 + } +} + +func firstNonZeroInt(values ...int) int { + for _, v := range values { + if v != 0 { + return v + } + } + return 0 +} + +func firstNonZeroFloat(values ...float64) float64 { + for _, v := range values { + if v != 0 { + return v + } + } + return 0 +} + // inferStorageStatus determines storage device status func inferStorageStatus(stor models.Storage) string { if !stor.Present { diff --git a/internal/exporter/reanimator_converter_test.go b/internal/exporter/reanimator_converter_test.go index ebf19e2..eabf9bd 100644 --- a/internal/exporter/reanimator_converter_test.go +++ b/internal/exporter/reanimator_converter_test.go @@ -737,4 +737,110 @@ func TestConvertToReanimator_UsesCanonicalDevices(t *testing.T) { } } +func TestConvertToReanimator_BindsDeviceVitals(t *testing.T) { + input := &models.AnalysisResult{ + Filename: "vitals.json", + Hardware: &models.HardwareConfig{ + BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"}, + Devices: []models.HardwareDevice{ + { + Kind: models.DeviceKindGPU, + Slot: "#GPU0", + Model: "B200 180GB HBM3e", + SerialNumber: "GPU-001", + BDF: "0000:17:00.0", + Details: map[string]any{ + "temperature": 71, + "power": 350, + "voltage": 12.2, + }, + }, + { + Kind: models.DeviceKindPSU, + Slot: "PSU0", + SerialNumber: "PSU-001", + Present: boolPtr(true), + InputPowerW: 1400, + OutputPowerW: 1300, + InputVoltage: 229.5, + TemperatureC: 44, + }, + }, + }, + } + + out, err := ConvertToReanimator(input) + if err != nil { + t.Fatalf("ConvertToReanimator() failed: %v", err) + } + + if len(out.Hardware.PCIeDevices) != 1 { + t.Fatalf("expected one pcie device, got %d", len(out.Hardware.PCIeDevices)) + } + pcie := out.Hardware.PCIeDevices[0] + if pcie.TemperatureC != 71 { + t.Fatalf("expected GPU temperature 71C, got %d", pcie.TemperatureC) + } + if pcie.PowerW != 350 { + t.Fatalf("expected GPU power 350W, got %d", pcie.PowerW) + } + if pcie.VoltageV != 12.2 { + t.Fatalf("expected device voltage 12.2V, got %.2f", pcie.VoltageV) + } + + if len(out.Hardware.PowerSupplies) != 1 { + t.Fatalf("expected one PSU, got %d", len(out.Hardware.PowerSupplies)) + } + psu := out.Hardware.PowerSupplies[0] + if psu.TemperatureC != 44 { + t.Fatalf("expected PSU temperature 44C, got %d", psu.TemperatureC) + } +} + +func TestConvertToReanimator_PreservesVitalsAcrossCanonicalDedup(t *testing.T) { + input := &models.AnalysisResult{ + Filename: "dedup-vitals.json", + Hardware: &models.HardwareConfig{ + BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"}, + PCIeDevices: []models.PCIeDevice{ + { + Slot: "#GPU0", + BDF: "0000:17:00.0", + DeviceClass: "3D Controller", + PartNumber: "Generic Display", + Manufacturer: "NVIDIA", + SerialNumber: "GPU-SN-001", + }, + }, + GPUs: []models.GPU{ + { + Slot: "#GPU0", + BDF: "0000:17:00.0", + Model: "B200 180GB HBM3e", + Manufacturer: "NVIDIA", + SerialNumber: "GPU-SN-001", + Temperature: 67, + Power: 330, + Status: "OK", + }, + }, + }, + } + + out, err := ConvertToReanimator(input) + if err != nil { + t.Fatalf("ConvertToReanimator() failed: %v", err) + } + if len(out.Hardware.PCIeDevices) != 1 { + t.Fatalf("expected deduped one pcie entry, got %d", len(out.Hardware.PCIeDevices)) + } + got := out.Hardware.PCIeDevices[0] + if got.TemperatureC != 67 { + t.Fatalf("expected deduped GPU temperature 67C, got %d", got.TemperatureC) + } + if got.PowerW != 330 { + t.Fatalf("expected deduped GPU power 330W, got %d", got.PowerW) + } +} + func boolPtr(v bool) *bool { return &v } diff --git a/internal/exporter/reanimator_models.go b/internal/exporter/reanimator_models.go index e7b298b..c2328cd 100644 --- a/internal/exporter/reanimator_models.go +++ b/internal/exporter/reanimator_models.go @@ -118,6 +118,9 @@ type ReanimatorPCIe struct { MaxLinkSpeed string `json:"max_link_speed,omitempty"` SerialNumber string `json:"serial_number,omitempty"` Firmware string `json:"firmware,omitempty"` + TemperatureC int `json:"temperature_c,omitempty"` + PowerW int `json:"power_w,omitempty"` + VoltageV float64 `json:"voltage_v,omitempty"` Status string `json:"status,omitempty"` StatusCheckedAt string `json:"status_checked_at,omitempty"` StatusChangedAt string `json:"status_changed_at,omitempty"` @@ -141,6 +144,7 @@ type ReanimatorPSU struct { InputPowerW int `json:"input_power_w,omitempty"` OutputPowerW int `json:"output_power_w,omitempty"` InputVoltage float64 `json:"input_voltage,omitempty"` + TemperatureC int `json:"temperature_c,omitempty"` StatusCheckedAt string `json:"status_checked_at,omitempty"` StatusChangedAt string `json:"status_changed_at,omitempty"` StatusAtCollect *ReanimatorStatusAtCollection `json:"status_at_collection,omitempty"` diff --git a/web/static/js/app.js b/web/static/js/app.js index 2640700..9e7015d 100644 --- a/web/static/js/app.js +++ b/web/static/js/app.js @@ -994,9 +994,16 @@ function renderConfig(data) { // Network tab html += '
'; const networkRows = networkAdapters; + const normalizeNetworkPortCount = (value) => { + const num = Number(value); + if (!Number.isFinite(num) || num <= 0 || num > 256) { + return null; + } + return Math.trunc(num); + }; if (networkRows.length > 0) { const nicCount = networkRows.length; - const totalPorts = networkRows.reduce((sum, n) => sum + (n.port_count || 0), 0); + const totalPorts = networkRows.reduce((sum, n) => sum + (normalizeNetworkPortCount(n.port_count) || 0), 0); const nicTypes = [...new Set(networkRows.map(n => n.port_type).filter(t => t))]; const nicModels = [...new Set(networkRows.map(n => n.model).filter(m => m))]; html += `

Сетевые адаптеры

@@ -1010,11 +1017,12 @@ function renderConfig(data) { networkRows.forEach(nic => { const macs = nic.mac_addresses ? nic.mac_addresses.join(', ') : '-'; const statusClass = nic.status === 'OK' ? '' : 'status-warning'; + const displayPortCount = normalizeNetworkPortCount(nic.port_count); html += ` ${escapeHtml(nic.location || nic.slot || '-')} ${escapeHtml(nic.model || '-')} ${escapeHtml(nic.manufacturer || nic.vendor || '-')} - ${nic.port_count || '-'} + ${displayPortCount ?? '-'} ${escapeHtml(nic.port_type || '-')} ${escapeHtml(macs)} ${escapeHtml(nic.status || '-')}