diff --git a/bible b/bible index 5244435..d2600f1 160000 --- a/bible +++ b/bible @@ -1 +1 @@ -Subproject commit 52444350c1c5d580b5849ad6d7c449c95a5c2261 +Subproject commit d2600f12799451cd5233a8d2c0e3235d1e7d25ab diff --git a/internal/chart b/internal/chart index f651798..2a15bc8 160000 --- a/internal/chart +++ b/internal/chart @@ -1 +1 @@ -Subproject commit f6517987b3438ae46a5f45e7dda894d2a76a8e92 +Subproject commit 2a15bc87f138451f27218d0a02d6a36a3e4afdf4 diff --git a/internal/exporter/reanimator_converter.go b/internal/exporter/reanimator_converter.go index 60da7a4..fbb0210 100644 --- a/internal/exporter/reanimator_converter.go +++ b/internal/exporter/reanimator_converter.go @@ -159,6 +159,16 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice { } for _, stor := range hw.Storage { present := stor.Present + storDetails := mergeDetailMaps(nil, stor.Details) + if stor.LogicalBlockSizeBytes != 0 { + storDetails = mergeDetailMaps(storDetails, map[string]any{"logical_block_size_bytes": stor.LogicalBlockSizeBytes}) + } + if stor.PhysicalBlockSizeBytes != 0 { + storDetails = mergeDetailMaps(storDetails, map[string]any{"physical_block_size_bytes": stor.PhysicalBlockSizeBytes}) + } + if stor.MetadataBytesPerBlock != 0 { + storDetails = mergeDetailMaps(storDetails, map[string]any{"metadata_bytes_per_block": stor.MetadataBytesPerBlock}) + } appendDevice(models.HardwareDevice{ Kind: models.DeviceKindStorage, Slot: stor.Slot, @@ -177,27 +187,38 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice { StatusAtCollect: stor.StatusAtCollect, StatusHistory: stor.StatusHistory, ErrorDescription: stor.ErrorDescription, - Details: mergeDetailMaps(nil, stor.Details), + Details: storDetails, }) } for _, pcie := range hw.PCIeDevices { - // Use PartNumber as model when available; fall back to chip description. - // Description contains the chip/product name (e.g. "BCM57414 NetXtreme-E …") - // while PartNumber is a part/product code. Prefer PartNumber when set. - pcieModel := pcie.PartNumber - if pcieModel == "" { - pcieModel = pcie.Description - } + // Priority: PartNumber (vendor P/N) > Model (product name) > Description (chip label). + pcieModel := firstNonEmptyString(pcie.PartNumber, pcie.Model, pcie.Description) details := mergeDetailMaps(nil, pcie.Details) - pcieFirmware := stringFromDetailMap(details, "firmware") + // Firmware: prefer direct field, fall back to details, then NVSwitch lookup. + pcieFirmware := firstNonEmptyString(pcie.Firmware, stringFromDetailMap(details, "firmware")) if pcieFirmware == "" && isNVSwitchPCIeDevice(pcie) { pcieFirmware = nvswitchFirmwareBySlot[normalizeNVSwitchSlotForLookup(pcie.Slot)] - if pcieFirmware != "" { - details = mergeDetailMaps(details, map[string]any{ - "firmware": pcieFirmware, - }) - } } + if pcieFirmware != "" { + details = mergeDetailMaps(details, map[string]any{"firmware": pcieFirmware}) + } + // Telemetry fields: put into details so convertPCIeFromDevices can pick them up. + if pcie.TemperatureC != nil { + details = mergeDetailMaps(details, map[string]any{"temperature_c": *pcie.TemperatureC}) + } + if pcie.PowerW != nil { + details = mergeDetailMaps(details, map[string]any{"power_w": *pcie.PowerW}) + } + if pcie.ECCCorrectedTotal != nil { + details = mergeDetailMaps(details, map[string]any{"ecc_corrected_total": *pcie.ECCCorrectedTotal}) + } + if pcie.ECCUncorrectedTotal != nil { + details = mergeDetailMaps(details, map[string]any{"ecc_uncorrected_total": *pcie.ECCUncorrectedTotal}) + } + if pcie.HWSlowdown != nil { + details = mergeDetailMaps(details, map[string]any{"hw_slowdown": *pcie.HWSlowdown}) + } + present := pcie.Present appendDevice(models.HardwareDevice{ Kind: models.DeviceKindPCIe, Slot: pcie.Slot, @@ -209,11 +230,13 @@ func buildDevicesFromLegacy(hw *models.HardwareConfig) []models.HardwareDevice { PartNumber: pcie.PartNumber, Manufacturer: pcie.Manufacturer, SerialNumber: pcie.SerialNumber, + MACAddresses: append([]string(nil), pcie.MACAddresses...), LinkWidth: pcie.LinkWidth, LinkSpeed: pcie.LinkSpeed, MaxLinkWidth: pcie.MaxLinkWidth, MaxLinkSpeed: pcie.MaxLinkSpeed, NUMANode: pcie.NUMANode, + Present: present, Status: pcie.Status, StatusCheckedAt: pcie.StatusCheckedAt, StatusChangedAt: pcie.StatusChangedAt, @@ -738,36 +761,39 @@ func convertStorageFromDevices(devices []models.HardwareDevice, collectedAt stri meta := buildStatusMeta(status, d.StatusCheckedAt, d.StatusChangedAt, d.StatusHistory, d.ErrorDescription, collectedAt) presentValue := present result = append(result, ReanimatorStorage{ - Slot: d.Slot, - Type: d.Type, - Model: d.Model, - SizeGB: d.SizeGB, - SerialNumber: d.SerialNumber, - Manufacturer: d.Manufacturer, - Firmware: d.Firmware, - Interface: d.Interface, - Present: &presentValue, - TemperatureC: floatFromDetailMap(d.Details, "temperature_c"), - PowerOnHours: int64FromDetailMap(d.Details, "power_on_hours"), - PowerCycles: int64FromDetailMap(d.Details, "power_cycles"), - UnsafeShutdowns: int64FromDetailMap(d.Details, "unsafe_shutdowns"), - MediaErrors: int64FromDetailMap(d.Details, "media_errors"), - ErrorLogEntries: int64FromDetailMap(d.Details, "error_log_entries"), - WrittenBytes: int64FromDetailMap(d.Details, "written_bytes"), - ReadBytes: int64FromDetailMap(d.Details, "read_bytes"), - LifeUsedPct: floatFromDetailMap(d.Details, "life_used_pct"), - RemainingEndurancePct: d.RemainingEndurancePct, - LifeRemainingPct: floatFromDetailMap(d.Details, "life_remaining_pct"), - AvailableSparePct: floatFromDetailMap(d.Details, "available_spare_pct"), - ReallocatedSectors: int64FromDetailMap(d.Details, "reallocated_sectors"), - CurrentPendingSectors: int64FromDetailMap(d.Details, "current_pending_sectors"), - OfflineUncorrectable: int64FromDetailMap(d.Details, "offline_uncorrectable"), - Status: status, - StatusCheckedAt: meta.StatusCheckedAt, - StatusChangedAt: meta.StatusChangedAt, - ManufacturedYearWeek: manufacturedYearWeekFromDetails(d.Details), - StatusHistory: meta.StatusHistory, - ErrorDescription: meta.ErrorDescription, + Slot: d.Slot, + Type: d.Type, + Model: d.Model, + SizeGB: d.SizeGB, + SerialNumber: d.SerialNumber, + Manufacturer: d.Manufacturer, + Firmware: d.Firmware, + Interface: d.Interface, + Present: &presentValue, + LogicalBlockSizeBytes: int64FromDetailMap(d.Details, "logical_block_size_bytes"), + PhysicalBlockSizeBytes: int64FromDetailMap(d.Details, "physical_block_size_bytes"), + MetadataBytesPerBlock: int64FromDetailMap(d.Details, "metadata_bytes_per_block"), + TemperatureC: floatFromDetailMap(d.Details, "temperature_c"), + PowerOnHours: int64FromDetailMap(d.Details, "power_on_hours"), + PowerCycles: int64FromDetailMap(d.Details, "power_cycles"), + UnsafeShutdowns: int64FromDetailMap(d.Details, "unsafe_shutdowns"), + MediaErrors: int64FromDetailMap(d.Details, "media_errors"), + ErrorLogEntries: int64FromDetailMap(d.Details, "error_log_entries"), + WrittenBytes: int64FromDetailMap(d.Details, "written_bytes"), + ReadBytes: int64FromDetailMap(d.Details, "read_bytes"), + LifeUsedPct: floatFromDetailMap(d.Details, "life_used_pct"), + RemainingEndurancePct: d.RemainingEndurancePct, + LifeRemainingPct: floatFromDetailMap(d.Details, "life_remaining_pct"), + AvailableSparePct: floatFromDetailMap(d.Details, "available_spare_pct"), + ReallocatedSectors: int64FromDetailMap(d.Details, "reallocated_sectors"), + CurrentPendingSectors: int64FromDetailMap(d.Details, "current_pending_sectors"), + OfflineUncorrectable: int64FromDetailMap(d.Details, "offline_uncorrectable"), + Status: status, + StatusCheckedAt: meta.StatusCheckedAt, + StatusChangedAt: meta.StatusChangedAt, + ManufacturedYearWeek: manufacturedYearWeekFromDetails(d.Details), + StatusHistory: meta.StatusHistory, + ErrorDescription: meta.ErrorDescription, }) } return result diff --git a/internal/exporter/reanimator_models.go b/internal/exporter/reanimator_models.go index becc123..3c9a1c8 100644 --- a/internal/exporter/reanimator_models.go +++ b/internal/exporter/reanimator_models.go @@ -12,15 +12,16 @@ type ReanimatorExport struct { // ReanimatorHardware contains all hardware components type ReanimatorHardware struct { - Board ReanimatorBoard `json:"board"` - Firmware []ReanimatorFirmware `json:"firmware,omitempty"` - CPUs []ReanimatorCPU `json:"cpus,omitempty"` - Memory []ReanimatorMemory `json:"memory,omitempty"` - Storage []ReanimatorStorage `json:"storage,omitempty"` - PCIeDevices []ReanimatorPCIe `json:"pcie_devices,omitempty"` - PowerSupplies []ReanimatorPSU `json:"power_supplies,omitempty"` - Sensors *ReanimatorSensors `json:"sensors,omitempty"` - EventLogs []ReanimatorEventLog `json:"event_logs,omitempty"` + Board ReanimatorBoard `json:"board"` + Firmware []ReanimatorFirmware `json:"firmware,omitempty"` + CPUs []ReanimatorCPU `json:"cpus,omitempty"` + Memory []ReanimatorMemory `json:"memory,omitempty"` + Storage []ReanimatorStorage `json:"storage,omitempty"` + PCIeDevices []ReanimatorPCIe `json:"pcie_devices,omitempty"` + PowerSupplies []ReanimatorPSU `json:"power_supplies,omitempty"` + Sensors *ReanimatorSensors `json:"sensors,omitempty"` + EventLogs []ReanimatorEventLog `json:"event_logs,omitempty"` + PlatformConfig map[string]any `json:"platform_config,omitempty"` } // ReanimatorBoard represents motherboard/server information @@ -101,17 +102,20 @@ type ReanimatorMemory struct { // ReanimatorStorage represents a storage device type ReanimatorStorage struct { - Slot string `json:"slot"` - Type string `json:"type,omitempty"` - Model string `json:"model"` - SizeGB int `json:"size_gb,omitempty"` - SerialNumber string `json:"serial_number"` - Manufacturer string `json:"manufacturer,omitempty"` - Firmware string `json:"firmware,omitempty"` - Interface string `json:"interface,omitempty"` - Present *bool `json:"present,omitempty"` - TemperatureC float64 `json:"temperature_c,omitempty"` - PowerOnHours int64 `json:"power_on_hours,omitempty"` + Slot string `json:"slot"` + Type string `json:"type,omitempty"` + Model string `json:"model"` + SizeGB int `json:"size_gb,omitempty"` + SerialNumber string `json:"serial_number"` + Manufacturer string `json:"manufacturer,omitempty"` + Firmware string `json:"firmware,omitempty"` + Interface string `json:"interface,omitempty"` + Present *bool `json:"present,omitempty"` + LogicalBlockSizeBytes int64 `json:"logical_block_size_bytes,omitempty"` + PhysicalBlockSizeBytes int64 `json:"physical_block_size_bytes,omitempty"` + MetadataBytesPerBlock int64 `json:"metadata_bytes_per_block,omitempty"` + TemperatureC float64 `json:"temperature_c,omitempty"` + PowerOnHours int64 `json:"power_on_hours,omitempty"` PowerCycles int64 `json:"power_cycles,omitempty"` UnsafeShutdowns int64 `json:"unsafe_shutdowns,omitempty"` MediaErrors int64 `json:"media_errors,omitempty"` diff --git a/internal/models/models.go b/internal/models/models.go index 28549ae..4341b9b 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -245,6 +245,9 @@ type Storage struct { Location string `json:"location,omitempty"` // Front/Rear BackplaneID int `json:"backplane_id,omitempty"` RemainingEndurancePct *int `json:"remaining_endurance_pct,omitempty"` // 0-100 %; nil = not reported + LogicalBlockSizeBytes int64 `json:"logical_block_size_bytes,omitempty"` + PhysicalBlockSizeBytes int64 `json:"physical_block_size_bytes,omitempty"` + MetadataBytesPerBlock int64 `json:"metadata_bytes_per_block,omitempty"` Status string `json:"status,omitempty"` Details map[string]any `json:"details,omitempty"` @@ -278,6 +281,8 @@ type PCIeDevice struct { BDF string `json:"bdf"` DeviceClass string `json:"device_class"` Manufacturer string `json:"manufacturer,omitempty"` + Model string `json:"model,omitempty"` + Firmware string `json:"firmware,omitempty"` LinkWidth int `json:"link_width"` LinkSpeed string `json:"link_speed"` MaxLinkWidth int `json:"max_link_width"` @@ -286,8 +291,17 @@ type PCIeDevice struct { SerialNumber string `json:"serial_number,omitempty"` MACAddresses []string `json:"mac_addresses,omitempty"` NUMANode int `json:"numa_node,omitempty"` // 0 = not reported/N/A + Present *bool `json:"present,omitempty"` + IOMMUGroup *int `json:"iommu_group,omitempty"` Status string `json:"status,omitempty"` + // GPU telemetry fields (populated by bee audit for GPU devices) + TemperatureC *float64 `json:"temperature_c,omitempty"` + PowerW *float64 `json:"power_w,omitempty"` + ECCCorrectedTotal *int64 `json:"ecc_corrected_total,omitempty"` + ECCUncorrectedTotal *int64 `json:"ecc_uncorrected_total,omitempty"` + HWSlowdown *bool `json:"hw_slowdown,omitempty"` + StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"` StatusChangedAt *time.Time `json:"status_changed_at,omitempty"` StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`