Update Inspur parsing and align release docs
This commit is contained in:
@@ -8,9 +8,9 @@
|
|||||||
|
|
||||||
## Принципы импорта
|
## Принципы импорта
|
||||||
|
|
||||||
1. **Snapshot данных** - JSON содержит состояние сервера на момент сбора, без исторической информации
|
1. **Snapshot данных** - JSON содержит состояние сервера на момент сбора и может включать историю изменений статуса компонентов
|
||||||
2. **Автоматическое определение LOT** - классификация компонентов определяется приложением на основе vendor/model/type
|
2. **Автоматическое определение LOT** - классификация компонентов определяется приложением на основе vendor/model/type
|
||||||
3. **Статус компонентов** - каждый компонент имеет статус работоспособности (OK, Warning, Critical, Unknown)
|
3. **Статус компонентов** - каждый компонент имеет статус работоспособности (OK, Warning, Critical, Unknown) и может передавать время проверки статуса
|
||||||
4. **Идемпотентность** - повторный импорт с тем же snapshot не создает дубликаты
|
4. **Идемпотентность** - повторный импорт с тем же snapshot не создает дубликаты
|
||||||
5. **Event-driven обновления** - импорт создает события в timeline (LOG_COLLECTED, INSTALLED, REMOVED, FIRMWARE_CHANGED)
|
5. **Event-driven обновления** - импорт создает события в timeline (LOG_COLLECTED, INSTALLED, REMOVED, FIRMWARE_CHANGED)
|
||||||
|
|
||||||
@@ -53,6 +53,33 @@
|
|||||||
- `filename` (string, опционально) - идентификатор источника данных
|
- `filename` (string, опционально) - идентификатор источника данных
|
||||||
- `hardware` (object, обязательно) - структура с аппаратными компонентами
|
- `hardware` (object, обязательно) - структура с аппаратными компонентами
|
||||||
|
|
||||||
|
### Общее поле статуса для компонентных секций
|
||||||
|
|
||||||
|
Для секций `cpus`, `memory`, `storage`, `pcie_devices`, `power_supplies` поддерживается дополнительное поле:
|
||||||
|
|
||||||
|
- `status_checked_at` (string RFC3339, опционально) - дата/время, когда был проверен статус работоспособности компонента
|
||||||
|
- `status_changed_at` (string RFC3339, опционально) - дата/время последнего изменения статуса компонента
|
||||||
|
- `status_at_collection` (object, опционально) - зафиксированный статус на момент сбора логов:
|
||||||
|
- `status` (string) - статус в момент сбора (`OK`, `Warning`, `Critical`, `Unknown`, `Empty`)
|
||||||
|
- `at` (string RFC3339) - дата/время, к которому относится этот статус
|
||||||
|
- `status_history` (array, опционально) - история статусов компонента:
|
||||||
|
- `status` (string) - статус (`OK`, `Warning`, `Critical`, `Unknown`, `Empty`)
|
||||||
|
- `changed_at` (string RFC3339) - дата/время смены статуса
|
||||||
|
- `details` (string, опционально) - пояснение к переходу статуса
|
||||||
|
- `error_description` (string, опционально) - текст ошибки/диагностики для статуса компонента (например при `Warning`/`Critical`)
|
||||||
|
|
||||||
|
### Правила экспорта JSON для внешнего проекта
|
||||||
|
|
||||||
|
Используйте эти правила, если JSON формируется внешним сервисом/экспортером:
|
||||||
|
|
||||||
|
1. Всегда передавайте `status` как текущее состояние компонента в snapshot.
|
||||||
|
2. Если есть точное время последней смены, передавайте `status_changed_at` (RFC3339, UTC).
|
||||||
|
3. Если источник умеет фиксировать состояние именно на момент сбора, передавайте `status_at_collection` c полями `status` и `at`.
|
||||||
|
4. Если источник хранит историю (например Windows Event Log), передавайте `status_history` отсортированным по `changed_at` по возрастанию.
|
||||||
|
5. В `status_history` не отправляйте записи без `changed_at`; такие записи игнорируются.
|
||||||
|
6. Для совместимости допускается передавать только старые поля (`status`, `status_checked_at`) без истории.
|
||||||
|
7. Все даты/время в исторических полях должны быть RFC3339; рекомендуется использовать UTC (`Z`).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Секция hardware
|
## Секция hardware
|
||||||
@@ -99,7 +126,8 @@
|
|||||||
"frequency_mhz": 2100,
|
"frequency_mhz": 2100,
|
||||||
"max_frequency_mhz": 4000,
|
"max_frequency_mhz": 4000,
|
||||||
"manufacturer": "Intel",
|
"manufacturer": "Intel",
|
||||||
"status": "OK"
|
"status": "OK",
|
||||||
|
"status_checked_at": "2026-02-10T15:28:00Z"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"socket": 1,
|
"socket": 1,
|
||||||
@@ -109,7 +137,8 @@
|
|||||||
"frequency_mhz": 2100,
|
"frequency_mhz": 2100,
|
||||||
"max_frequency_mhz": 4000,
|
"max_frequency_mhz": 4000,
|
||||||
"manufacturer": "Intel",
|
"manufacturer": "Intel",
|
||||||
"status": "OK"
|
"status": "OK",
|
||||||
|
"status_checked_at": "2026-02-10T15:28:00Z"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -124,6 +153,7 @@
|
|||||||
- `max_frequency_mhz` (int, опционально) - максимальная частота в МГц
|
- `max_frequency_mhz` (int, опционально) - максимальная частота в МГц
|
||||||
- `manufacturer` (string, опционально) - производитель (Intel, AMD, etc.)
|
- `manufacturer` (string, опционально) - производитель (Intel, AMD, etc.)
|
||||||
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`
|
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`
|
||||||
|
- `status_checked_at` (string RFC3339, опционально) - дата/время проверки статуса
|
||||||
|
|
||||||
**Генерация serial_number:**
|
**Генерация serial_number:**
|
||||||
- Формат: `{board_serial}-CPU-{socket}`
|
- Формат: `{board_serial}-CPU-{socket}`
|
||||||
@@ -153,7 +183,8 @@
|
|||||||
"manufacturer": "Hynix",
|
"manufacturer": "Hynix",
|
||||||
"serial_number": "80AD032419E17CEEC1",
|
"serial_number": "80AD032419E17CEEC1",
|
||||||
"part_number": "HMCG88AGBRA191N",
|
"part_number": "HMCG88AGBRA191N",
|
||||||
"status": "OK"
|
"status": "OK",
|
||||||
|
"status_checked_at": "2026-02-10T15:28:00Z"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"slot": "CPU0_C1D0",
|
"slot": "CPU0_C1D0",
|
||||||
@@ -182,6 +213,7 @@
|
|||||||
- `serial_number` (string, условно обязательно если present=true) - серийный номер
|
- `serial_number` (string, условно обязательно если present=true) - серийный номер
|
||||||
- `part_number` (string, опционально) - партномер
|
- `part_number` (string, опционально) - партномер
|
||||||
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`, `Empty`
|
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`, `Empty`
|
||||||
|
- `status_checked_at` (string RFC3339, опционально) - дата/время проверки статуса
|
||||||
|
|
||||||
**Обработка:**
|
**Обработка:**
|
||||||
- Если `present = false` или `status = "Empty"`, компонент не создается/не обновляется
|
- Если `present = false` или `status = "Empty"`, компонент не создается/не обновляется
|
||||||
@@ -239,6 +271,7 @@
|
|||||||
- `interface` (string, опционально) - интерфейс: `NVMe`, `SATA`, `SAS`
|
- `interface` (string, опционально) - интерфейс: `NVMe`, `SATA`, `SAS`
|
||||||
- `present` (bool, обязательно) - наличие диска в слоте
|
- `present` (bool, обязательно) - наличие диска в слоте
|
||||||
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`
|
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`
|
||||||
|
- `status_checked_at` (string RFC3339, опционально) - дата/время проверки статуса
|
||||||
|
|
||||||
**Обработка firmware:**
|
**Обработка firmware:**
|
||||||
- Если версия firmware изменилась относительно предыдущего observation - создается событие FIRMWARE_CHANGED
|
- Если версия firmware изменилась относительно предыдущего observation - создается событие FIRMWARE_CHANGED
|
||||||
@@ -266,6 +299,7 @@
|
|||||||
"part_number": "V0310C9000000000",
|
"part_number": "V0310C9000000000",
|
||||||
"firmware": "00.03.05",
|
"firmware": "00.03.05",
|
||||||
"status": "OK",
|
"status": "OK",
|
||||||
|
"status_checked_at": "2026-02-10T15:28:00Z",
|
||||||
"input_type": "ACWideRange",
|
"input_type": "ACWideRange",
|
||||||
"input_power_w": 137,
|
"input_power_w": 137,
|
||||||
"output_power_w": 104,
|
"output_power_w": 104,
|
||||||
@@ -285,6 +319,7 @@
|
|||||||
- `part_number` (string, опционально) - партномер
|
- `part_number` (string, опционально) - партномер
|
||||||
- `firmware` (string, опционально) - версия прошивки
|
- `firmware` (string, опционально) - версия прошивки
|
||||||
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`
|
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`
|
||||||
|
- `status_checked_at` (string RFC3339, опционально) - дата/время проверки статуса
|
||||||
- `input_type` (string, опционально) - тип входа
|
- `input_type` (string, опционально) - тип входа
|
||||||
- `input_power_w` (int, опционально) - входная мощность (telemetry)
|
- `input_power_w` (int, опционально) - входная мощность (telemetry)
|
||||||
- `output_power_w` (int, опционально) - выходная мощность (telemetry)
|
- `output_power_w` (int, опционально) - выходная мощность (telemetry)
|
||||||
@@ -319,7 +354,8 @@
|
|||||||
"max_link_speed": "Gen3",
|
"max_link_speed": "Gen3",
|
||||||
"serial_number": "RAID-001-12345",
|
"serial_number": "RAID-001-12345",
|
||||||
"firmware": "50.9.1-4296",
|
"firmware": "50.9.1-4296",
|
||||||
"status": "OK"
|
"status": "OK",
|
||||||
|
"status_checked_at": "2026-02-10T15:28:00Z"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"slot": "PCIeCard2",
|
"slot": "PCIeCard2",
|
||||||
@@ -355,6 +391,7 @@
|
|||||||
- `serial_number` (string, опционально) - серийный номер (если доступен, иначе генерируется)
|
- `serial_number` (string, опционально) - серийный номер (если доступен, иначе генерируется)
|
||||||
- `firmware` (string, опционально) - версия прошивки
|
- `firmware` (string, опционально) - версия прошивки
|
||||||
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`
|
- `status` (string, опционально) - статус: `OK`, `Warning`, `Critical`, `Unknown`
|
||||||
|
- `status_checked_at` (string RFC3339, опционально) - дата/время проверки статуса
|
||||||
|
|
||||||
**Генерация serial_number (если отсутствует):**
|
**Генерация serial_number (если отсутствует):**
|
||||||
- Формат: `{board_serial}-PCIE-{slot}`
|
- Формат: `{board_serial}-PCIE-{slot}`
|
||||||
@@ -872,7 +909,7 @@ Content-Type: application/json
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Пример 2: Server с отказавшим диском
|
### Пример 2: Server с историей "сломан -> починен"
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
@@ -893,7 +930,24 @@ Content-Type: application/json
|
|||||||
"firmware": "9CV10510",
|
"firmware": "9CV10510",
|
||||||
"interface": "NVMe",
|
"interface": "NVMe",
|
||||||
"present": true,
|
"present": true,
|
||||||
"status": "Critical"
|
"status": "OK",
|
||||||
|
"status_changed_at": "2026-02-10T15:22:00Z",
|
||||||
|
"status_at_collection": {
|
||||||
|
"status": "OK",
|
||||||
|
"at": "2026-02-10T15:30:00Z"
|
||||||
|
},
|
||||||
|
"status_history": [
|
||||||
|
{
|
||||||
|
"status": "Critical",
|
||||||
|
"changed_at": "2026-02-10T15:10:00Z",
|
||||||
|
"details": "I/O timeout on NVMe queue 3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"status": "OK",
|
||||||
|
"changed_at": "2026-02-10T15:22:00Z",
|
||||||
|
"details": "Recovered after controller reset"
|
||||||
|
}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"slot": "Disk.Bay.1",
|
"slot": "Disk.Bay.1",
|
||||||
@@ -911,9 +965,9 @@ Content-Type: application/json
|
|||||||
```
|
```
|
||||||
|
|
||||||
**Обработка:**
|
**Обработка:**
|
||||||
- Disk.Bay.0 получит статус Critical
|
- Disk.Bay.0 получит текущий статус `OK`
|
||||||
- Автоматически создастся failure_event для компонента S5GUNG0N123456
|
- История статусов сохранится в `observations.details.status_history`
|
||||||
- Timeline event COMPONENT_FAILED
|
- Автоматический `failure_event` не создается, так как текущий статус snapshot не `Critical`
|
||||||
|
|
||||||
### Пример 3: Замена памяти
|
### Пример 3: Замена памяти
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -27,20 +28,22 @@ func ConvertToReanimator(result *models.AnalysisResult) (*ReanimatorExport, erro
|
|||||||
// Determine target host (optional field)
|
// Determine target host (optional field)
|
||||||
targetHost := inferTargetHost(result.TargetHost, result.Filename)
|
targetHost := inferTargetHost(result.TargetHost, result.Filename)
|
||||||
|
|
||||||
|
collectedAt := formatRFC3339(result.CollectedAt)
|
||||||
|
|
||||||
export := &ReanimatorExport{
|
export := &ReanimatorExport{
|
||||||
Filename: result.Filename,
|
Filename: result.Filename,
|
||||||
SourceType: normalizeSourceType(result.SourceType),
|
SourceType: normalizeSourceType(result.SourceType),
|
||||||
Protocol: normalizeProtocol(result.Protocol),
|
Protocol: normalizeProtocol(result.Protocol),
|
||||||
TargetHost: targetHost,
|
TargetHost: targetHost,
|
||||||
CollectedAt: formatRFC3339(result.CollectedAt),
|
CollectedAt: collectedAt,
|
||||||
Hardware: ReanimatorHardware{
|
Hardware: ReanimatorHardware{
|
||||||
Board: convertBoard(result.Hardware.BoardInfo),
|
Board: convertBoard(result.Hardware.BoardInfo),
|
||||||
Firmware: convertFirmware(result.Hardware.Firmware),
|
Firmware: dedupeFirmware(convertFirmware(result.Hardware.Firmware)),
|
||||||
CPUs: convertCPUs(result.Hardware.CPUs),
|
CPUs: dedupeCPUs(convertCPUs(result.Hardware.CPUs, collectedAt)),
|
||||||
Memory: convertMemory(result.Hardware.Memory),
|
Memory: dedupeMemory(convertMemory(result.Hardware.Memory, collectedAt)),
|
||||||
Storage: convertStorage(result.Hardware.Storage),
|
Storage: dedupeStorage(convertStorage(result.Hardware.Storage, collectedAt)),
|
||||||
PCIeDevices: convertPCIeDevices(result.Hardware),
|
PCIeDevices: dedupePCIe(convertPCIeDevices(result.Hardware, collectedAt)),
|
||||||
PowerSupplies: convertPowerSupplies(result.Hardware.PowerSupply),
|
PowerSupplies: dedupePSUs(convertPowerSupplies(result.Hardware.PowerSupply, collectedAt)),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -83,7 +86,7 @@ func convertFirmware(firmware []models.FirmwareInfo) []ReanimatorFirmware {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// convertCPUs converts CPU information to Reanimator format
|
// convertCPUs converts CPU information to Reanimator format
|
||||||
func convertCPUs(cpus []models.CPU) []ReanimatorCPU {
|
func convertCPUs(cpus []models.CPU, collectedAt string) []ReanimatorCPU {
|
||||||
if len(cpus) == 0 {
|
if len(cpus) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -92,22 +95,41 @@ func convertCPUs(cpus []models.CPU) []ReanimatorCPU {
|
|||||||
for _, cpu := range cpus {
|
for _, cpu := range cpus {
|
||||||
manufacturer := inferCPUManufacturer(cpu.Model)
|
manufacturer := inferCPUManufacturer(cpu.Model)
|
||||||
|
|
||||||
|
cpuStatus := normalizeStatus(cpu.Status, false)
|
||||||
|
if strings.TrimSpace(cpu.Status) == "" {
|
||||||
|
cpuStatus = "Unknown"
|
||||||
|
}
|
||||||
|
meta := buildStatusMeta(
|
||||||
|
cpuStatus,
|
||||||
|
cpu.StatusCheckedAt,
|
||||||
|
cpu.StatusChangedAt,
|
||||||
|
cpu.StatusAtCollect,
|
||||||
|
cpu.StatusHistory,
|
||||||
|
cpu.ErrorDescription,
|
||||||
|
collectedAt,
|
||||||
|
)
|
||||||
|
|
||||||
result = append(result, ReanimatorCPU{
|
result = append(result, ReanimatorCPU{
|
||||||
Socket: cpu.Socket,
|
Socket: cpu.Socket,
|
||||||
Model: cpu.Model,
|
Model: cpu.Model,
|
||||||
Cores: cpu.Cores,
|
Cores: cpu.Cores,
|
||||||
Threads: cpu.Threads,
|
Threads: cpu.Threads,
|
||||||
FrequencyMHz: cpu.FrequencyMHz,
|
FrequencyMHz: cpu.FrequencyMHz,
|
||||||
MaxFrequencyMHz: cpu.MaxFreqMHz,
|
MaxFrequencyMHz: cpu.MaxFreqMHz,
|
||||||
Manufacturer: manufacturer,
|
Manufacturer: manufacturer,
|
||||||
Status: "Unknown",
|
Status: cpuStatus,
|
||||||
|
StatusCheckedAt: meta.StatusCheckedAt,
|
||||||
|
StatusChangedAt: meta.StatusChangedAt,
|
||||||
|
StatusAtCollect: meta.StatusAtCollection,
|
||||||
|
StatusHistory: meta.StatusHistory,
|
||||||
|
ErrorDescription: meta.ErrorDescription,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertMemory converts memory modules to Reanimator format
|
// convertMemory converts memory modules to Reanimator format
|
||||||
func convertMemory(memory []models.MemoryDIMM) []ReanimatorMemory {
|
func convertMemory(memory []models.MemoryDIMM, collectedAt string) []ReanimatorMemory {
|
||||||
if len(memory) == 0 {
|
if len(memory) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -123,25 +145,40 @@ func convertMemory(memory []models.MemoryDIMM) []ReanimatorMemory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
meta := buildStatusMeta(
|
||||||
|
status,
|
||||||
|
mem.StatusCheckedAt,
|
||||||
|
mem.StatusChangedAt,
|
||||||
|
mem.StatusAtCollect,
|
||||||
|
mem.StatusHistory,
|
||||||
|
mem.ErrorDescription,
|
||||||
|
collectedAt,
|
||||||
|
)
|
||||||
|
|
||||||
result = append(result, ReanimatorMemory{
|
result = append(result, ReanimatorMemory{
|
||||||
Slot: mem.Slot,
|
Slot: mem.Slot,
|
||||||
Location: mem.Location,
|
Location: mem.Location,
|
||||||
Present: mem.Present,
|
Present: mem.Present,
|
||||||
SizeMB: mem.SizeMB,
|
SizeMB: mem.SizeMB,
|
||||||
Type: mem.Type,
|
Type: mem.Type,
|
||||||
MaxSpeedMHz: mem.MaxSpeedMHz,
|
MaxSpeedMHz: mem.MaxSpeedMHz,
|
||||||
CurrentSpeedMHz: mem.CurrentSpeedMHz,
|
CurrentSpeedMHz: mem.CurrentSpeedMHz,
|
||||||
Manufacturer: mem.Manufacturer,
|
Manufacturer: mem.Manufacturer,
|
||||||
SerialNumber: mem.SerialNumber,
|
SerialNumber: mem.SerialNumber,
|
||||||
PartNumber: mem.PartNumber,
|
PartNumber: mem.PartNumber,
|
||||||
Status: status,
|
Status: status,
|
||||||
|
StatusCheckedAt: meta.StatusCheckedAt,
|
||||||
|
StatusChangedAt: meta.StatusChangedAt,
|
||||||
|
StatusAtCollect: meta.StatusAtCollection,
|
||||||
|
StatusHistory: meta.StatusHistory,
|
||||||
|
ErrorDescription: meta.ErrorDescription,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertStorage converts storage devices to Reanimator format
|
// convertStorage converts storage devices to Reanimator format
|
||||||
func convertStorage(storage []models.Storage) []ReanimatorStorage {
|
func convertStorage(storage []models.Storage, collectedAt string) []ReanimatorStorage {
|
||||||
if len(storage) == 0 {
|
if len(storage) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -154,29 +191,60 @@ func convertStorage(storage []models.Storage) []ReanimatorStorage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
status := inferStorageStatus(stor)
|
status := inferStorageStatus(stor)
|
||||||
|
if strings.TrimSpace(stor.Status) != "" {
|
||||||
|
status = normalizeStatus(stor.Status, false)
|
||||||
|
}
|
||||||
|
meta := buildStatusMeta(
|
||||||
|
status,
|
||||||
|
stor.StatusCheckedAt,
|
||||||
|
stor.StatusChangedAt,
|
||||||
|
stor.StatusAtCollect,
|
||||||
|
stor.StatusHistory,
|
||||||
|
stor.ErrorDescription,
|
||||||
|
collectedAt,
|
||||||
|
)
|
||||||
|
|
||||||
result = append(result, ReanimatorStorage{
|
result = append(result, ReanimatorStorage{
|
||||||
Slot: stor.Slot,
|
Slot: stor.Slot,
|
||||||
Type: stor.Type,
|
Type: stor.Type,
|
||||||
Model: stor.Model,
|
Model: stor.Model,
|
||||||
SizeGB: stor.SizeGB,
|
SizeGB: stor.SizeGB,
|
||||||
SerialNumber: stor.SerialNumber,
|
SerialNumber: stor.SerialNumber,
|
||||||
Manufacturer: stor.Manufacturer,
|
Manufacturer: stor.Manufacturer,
|
||||||
Firmware: stor.Firmware,
|
Firmware: stor.Firmware,
|
||||||
Interface: stor.Interface,
|
Interface: stor.Interface,
|
||||||
Present: stor.Present,
|
Present: stor.Present,
|
||||||
Status: status,
|
Status: status,
|
||||||
|
StatusCheckedAt: meta.StatusCheckedAt,
|
||||||
|
StatusChangedAt: meta.StatusChangedAt,
|
||||||
|
StatusAtCollect: meta.StatusAtCollection,
|
||||||
|
StatusHistory: meta.StatusHistory,
|
||||||
|
ErrorDescription: meta.ErrorDescription,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertPCIeDevices converts PCIe devices, GPUs, and network adapters to Reanimator format
|
// convertPCIeDevices converts PCIe devices, GPUs, and network adapters to Reanimator format
|
||||||
func convertPCIeDevices(hw *models.HardwareConfig) []ReanimatorPCIe {
|
func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []ReanimatorPCIe {
|
||||||
result := make([]ReanimatorPCIe, 0)
|
result := make([]ReanimatorPCIe, 0)
|
||||||
|
gpuSlots := make(map[string]struct{}, len(hw.GPUs))
|
||||||
|
for _, gpu := range hw.GPUs {
|
||||||
|
slot := strings.ToLower(strings.TrimSpace(gpu.Slot))
|
||||||
|
if slot != "" {
|
||||||
|
gpuSlots[slot] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Convert regular PCIe devices
|
// Convert regular PCIe devices
|
||||||
for _, pcie := range hw.PCIeDevices {
|
for _, pcie := range hw.PCIeDevices {
|
||||||
|
slot := strings.ToLower(strings.TrimSpace(pcie.Slot))
|
||||||
|
if _, isDedicatedGPU := gpuSlots[slot]; isDedicatedGPU || isDisplayClass(pcie.DeviceClass) {
|
||||||
|
// Skip GPU-like PCIe entries to avoid duplicates:
|
||||||
|
// dedicated GPUs are exported from hw.GPUs with richer metadata.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
serialNumber := normalizedSerial(pcie.SerialNumber)
|
serialNumber := normalizedSerial(pcie.SerialNumber)
|
||||||
|
|
||||||
// Determine model (prefer PartNumber, fallback to DeviceClass)
|
// Determine model (prefer PartNumber, fallback to DeviceClass)
|
||||||
@@ -185,21 +253,37 @@ func convertPCIeDevices(hw *models.HardwareConfig) []ReanimatorPCIe {
|
|||||||
model = pcie.DeviceClass
|
model = pcie.DeviceClass
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status := normalizeStatus(pcie.Status, false)
|
||||||
|
meta := buildStatusMeta(
|
||||||
|
status,
|
||||||
|
pcie.StatusCheckedAt,
|
||||||
|
pcie.StatusChangedAt,
|
||||||
|
pcie.StatusAtCollect,
|
||||||
|
pcie.StatusHistory,
|
||||||
|
pcie.ErrorDescription,
|
||||||
|
collectedAt,
|
||||||
|
)
|
||||||
|
|
||||||
result = append(result, ReanimatorPCIe{
|
result = append(result, ReanimatorPCIe{
|
||||||
Slot: pcie.Slot,
|
Slot: pcie.Slot,
|
||||||
VendorID: pcie.VendorID,
|
VendorID: pcie.VendorID,
|
||||||
DeviceID: pcie.DeviceID,
|
DeviceID: pcie.DeviceID,
|
||||||
BDF: pcie.BDF,
|
BDF: pcie.BDF,
|
||||||
DeviceClass: pcie.DeviceClass,
|
DeviceClass: pcie.DeviceClass,
|
||||||
Manufacturer: pcie.Manufacturer,
|
Manufacturer: pcie.Manufacturer,
|
||||||
Model: model,
|
Model: model,
|
||||||
LinkWidth: pcie.LinkWidth,
|
LinkWidth: pcie.LinkWidth,
|
||||||
LinkSpeed: pcie.LinkSpeed,
|
LinkSpeed: pcie.LinkSpeed,
|
||||||
MaxLinkWidth: pcie.MaxLinkWidth,
|
MaxLinkWidth: pcie.MaxLinkWidth,
|
||||||
MaxLinkSpeed: pcie.MaxLinkSpeed,
|
MaxLinkSpeed: pcie.MaxLinkSpeed,
|
||||||
SerialNumber: serialNumber,
|
SerialNumber: serialNumber,
|
||||||
Firmware: "", // PCIeDevice doesn't have firmware in models
|
Firmware: "", // PCIeDevice doesn't have firmware in models
|
||||||
Status: "Unknown",
|
Status: status,
|
||||||
|
StatusCheckedAt: meta.StatusCheckedAt,
|
||||||
|
StatusChangedAt: meta.StatusChangedAt,
|
||||||
|
StatusAtCollect: meta.StatusAtCollection,
|
||||||
|
StatusHistory: meta.StatusHistory,
|
||||||
|
ErrorDescription: meta.ErrorDescription,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -210,21 +294,37 @@ func convertPCIeDevices(hw *models.HardwareConfig) []ReanimatorPCIe {
|
|||||||
// Determine device class
|
// Determine device class
|
||||||
deviceClass := "DisplayController"
|
deviceClass := "DisplayController"
|
||||||
|
|
||||||
|
status := normalizeStatus(gpu.Status, false)
|
||||||
|
meta := buildStatusMeta(
|
||||||
|
status,
|
||||||
|
gpu.StatusCheckedAt,
|
||||||
|
gpu.StatusChangedAt,
|
||||||
|
gpu.StatusAtCollect,
|
||||||
|
gpu.StatusHistory,
|
||||||
|
gpu.ErrorDescription,
|
||||||
|
collectedAt,
|
||||||
|
)
|
||||||
|
|
||||||
result = append(result, ReanimatorPCIe{
|
result = append(result, ReanimatorPCIe{
|
||||||
Slot: gpu.Slot,
|
Slot: gpu.Slot,
|
||||||
VendorID: gpu.VendorID,
|
VendorID: gpu.VendorID,
|
||||||
DeviceID: gpu.DeviceID,
|
DeviceID: gpu.DeviceID,
|
||||||
BDF: gpu.BDF,
|
BDF: gpu.BDF,
|
||||||
DeviceClass: deviceClass,
|
DeviceClass: deviceClass,
|
||||||
Manufacturer: gpu.Manufacturer,
|
Manufacturer: gpu.Manufacturer,
|
||||||
Model: gpu.Model,
|
Model: gpu.Model,
|
||||||
LinkWidth: gpu.CurrentLinkWidth,
|
LinkWidth: gpu.CurrentLinkWidth,
|
||||||
LinkSpeed: gpu.CurrentLinkSpeed,
|
LinkSpeed: gpu.CurrentLinkSpeed,
|
||||||
MaxLinkWidth: gpu.MaxLinkWidth,
|
MaxLinkWidth: gpu.MaxLinkWidth,
|
||||||
MaxLinkSpeed: gpu.MaxLinkSpeed,
|
MaxLinkSpeed: gpu.MaxLinkSpeed,
|
||||||
SerialNumber: serialNumber,
|
SerialNumber: serialNumber,
|
||||||
Firmware: gpu.Firmware,
|
Firmware: gpu.Firmware,
|
||||||
Status: normalizeStatus(gpu.Status, false),
|
Status: status,
|
||||||
|
StatusCheckedAt: meta.StatusCheckedAt,
|
||||||
|
StatusChangedAt: meta.StatusChangedAt,
|
||||||
|
StatusAtCollect: meta.StatusAtCollection,
|
||||||
|
StatusHistory: meta.StatusHistory,
|
||||||
|
ErrorDescription: meta.ErrorDescription,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -236,29 +336,52 @@ func convertPCIeDevices(hw *models.HardwareConfig) []ReanimatorPCIe {
|
|||||||
|
|
||||||
serialNumber := normalizedSerial(nic.SerialNumber)
|
serialNumber := normalizedSerial(nic.SerialNumber)
|
||||||
|
|
||||||
|
status := normalizeStatus(nic.Status, false)
|
||||||
|
meta := buildStatusMeta(
|
||||||
|
status,
|
||||||
|
nic.StatusCheckedAt,
|
||||||
|
nic.StatusChangedAt,
|
||||||
|
nic.StatusAtCollect,
|
||||||
|
nic.StatusHistory,
|
||||||
|
nic.ErrorDescription,
|
||||||
|
collectedAt,
|
||||||
|
)
|
||||||
|
|
||||||
result = append(result, ReanimatorPCIe{
|
result = append(result, ReanimatorPCIe{
|
||||||
Slot: nic.Slot,
|
Slot: nic.Slot,
|
||||||
VendorID: nic.VendorID,
|
VendorID: nic.VendorID,
|
||||||
DeviceID: nic.DeviceID,
|
DeviceID: nic.DeviceID,
|
||||||
BDF: "",
|
BDF: "",
|
||||||
DeviceClass: "NetworkController",
|
DeviceClass: "NetworkController",
|
||||||
Manufacturer: nic.Vendor,
|
Manufacturer: nic.Vendor,
|
||||||
Model: nic.Model,
|
Model: nic.Model,
|
||||||
LinkWidth: 0,
|
LinkWidth: 0,
|
||||||
LinkSpeed: "",
|
LinkSpeed: "",
|
||||||
MaxLinkWidth: 0,
|
MaxLinkWidth: 0,
|
||||||
MaxLinkSpeed: "",
|
MaxLinkSpeed: "",
|
||||||
SerialNumber: serialNumber,
|
SerialNumber: serialNumber,
|
||||||
Firmware: nic.Firmware,
|
Firmware: nic.Firmware,
|
||||||
Status: normalizeStatus(nic.Status, false),
|
Status: status,
|
||||||
|
StatusCheckedAt: meta.StatusCheckedAt,
|
||||||
|
StatusChangedAt: meta.StatusChangedAt,
|
||||||
|
StatusAtCollect: meta.StatusAtCollection,
|
||||||
|
StatusHistory: meta.StatusHistory,
|
||||||
|
ErrorDescription: meta.ErrorDescription,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isDisplayClass(deviceClass string) bool {
|
||||||
|
class := strings.ToLower(strings.TrimSpace(deviceClass))
|
||||||
|
return strings.Contains(class, "display") ||
|
||||||
|
strings.Contains(class, "vga") ||
|
||||||
|
strings.Contains(class, "3d controller")
|
||||||
|
}
|
||||||
|
|
||||||
// convertPowerSupplies converts power supplies to Reanimator format
|
// convertPowerSupplies converts power supplies to Reanimator format
|
||||||
func convertPowerSupplies(psus []models.PSU) []ReanimatorPSU {
|
func convertPowerSupplies(psus []models.PSU, collectedAt string) []ReanimatorPSU {
|
||||||
if len(psus) == 0 {
|
if len(psus) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -271,26 +394,291 @@ func convertPowerSupplies(psus []models.PSU) []ReanimatorPSU {
|
|||||||
}
|
}
|
||||||
|
|
||||||
status := normalizeStatus(psu.Status, false)
|
status := normalizeStatus(psu.Status, false)
|
||||||
|
meta := buildStatusMeta(
|
||||||
|
status,
|
||||||
|
psu.StatusCheckedAt,
|
||||||
|
psu.StatusChangedAt,
|
||||||
|
psu.StatusAtCollect,
|
||||||
|
psu.StatusHistory,
|
||||||
|
psu.ErrorDescription,
|
||||||
|
collectedAt,
|
||||||
|
)
|
||||||
|
|
||||||
result = append(result, ReanimatorPSU{
|
result = append(result, ReanimatorPSU{
|
||||||
Slot: psu.Slot,
|
Slot: psu.Slot,
|
||||||
Present: psu.Present,
|
Present: psu.Present,
|
||||||
Model: psu.Model,
|
Model: psu.Model,
|
||||||
Vendor: psu.Vendor,
|
Vendor: psu.Vendor,
|
||||||
WattageW: psu.WattageW,
|
WattageW: psu.WattageW,
|
||||||
SerialNumber: psu.SerialNumber,
|
SerialNumber: psu.SerialNumber,
|
||||||
PartNumber: psu.PartNumber,
|
PartNumber: psu.PartNumber,
|
||||||
Firmware: psu.Firmware,
|
Firmware: psu.Firmware,
|
||||||
Status: status,
|
Status: status,
|
||||||
InputType: psu.InputType,
|
InputType: psu.InputType,
|
||||||
InputPowerW: psu.InputPowerW,
|
InputPowerW: psu.InputPowerW,
|
||||||
OutputPowerW: psu.OutputPowerW,
|
OutputPowerW: psu.OutputPowerW,
|
||||||
InputVoltage: psu.InputVoltage,
|
InputVoltage: psu.InputVoltage,
|
||||||
|
StatusCheckedAt: meta.StatusCheckedAt,
|
||||||
|
StatusChangedAt: meta.StatusChangedAt,
|
||||||
|
StatusAtCollect: meta.StatusAtCollection,
|
||||||
|
StatusHistory: meta.StatusHistory,
|
||||||
|
ErrorDescription: meta.ErrorDescription,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type convertedStatusMeta struct {
|
||||||
|
StatusCheckedAt string
|
||||||
|
StatusChangedAt string
|
||||||
|
StatusAtCollection *ReanimatorStatusAtCollection
|
||||||
|
StatusHistory []ReanimatorStatusHistoryEntry
|
||||||
|
ErrorDescription string
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildStatusMeta(
|
||||||
|
currentStatus string,
|
||||||
|
checkedAt time.Time,
|
||||||
|
changedAt time.Time,
|
||||||
|
statusAtCollection *models.StatusAtCollection,
|
||||||
|
history []models.StatusHistoryEntry,
|
||||||
|
errorDescription string,
|
||||||
|
collectedAt string,
|
||||||
|
) convertedStatusMeta {
|
||||||
|
meta := convertedStatusMeta{
|
||||||
|
StatusCheckedAt: formatOptionalRFC3339(checkedAt),
|
||||||
|
StatusChangedAt: formatOptionalRFC3339(changedAt),
|
||||||
|
ErrorDescription: strings.TrimSpace(errorDescription),
|
||||||
|
}
|
||||||
|
|
||||||
|
convertedHistory := make([]ReanimatorStatusHistoryEntry, 0, len(history))
|
||||||
|
for _, h := range history {
|
||||||
|
changed := formatOptionalRFC3339(h.ChangedAt)
|
||||||
|
if changed == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
convertedHistory = append(convertedHistory, ReanimatorStatusHistoryEntry{
|
||||||
|
Status: normalizeStatus(h.Status, true),
|
||||||
|
ChangedAt: changed,
|
||||||
|
Details: strings.TrimSpace(h.Details),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.Slice(convertedHistory, func(i, j int) bool {
|
||||||
|
return convertedHistory[i].ChangedAt < convertedHistory[j].ChangedAt
|
||||||
|
})
|
||||||
|
if len(convertedHistory) > 0 {
|
||||||
|
meta.StatusHistory = convertedHistory
|
||||||
|
if meta.StatusChangedAt == "" {
|
||||||
|
meta.StatusChangedAt = convertedHistory[len(convertedHistory)-1].ChangedAt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if statusAtCollection != nil {
|
||||||
|
at := formatOptionalRFC3339(statusAtCollection.At)
|
||||||
|
if at != "" && strings.TrimSpace(statusAtCollection.Status) != "" {
|
||||||
|
meta.StatusAtCollection = &ReanimatorStatusAtCollection{
|
||||||
|
Status: normalizeStatus(statusAtCollection.Status, true),
|
||||||
|
At: at,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if meta.StatusAtCollection == nil && strings.TrimSpace(currentStatus) != "" && collectedAt != "" {
|
||||||
|
meta.StatusAtCollection = &ReanimatorStatusAtCollection{
|
||||||
|
Status: currentStatus,
|
||||||
|
At: collectedAt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if meta.StatusCheckedAt == "" && len(meta.StatusHistory) > 0 {
|
||||||
|
meta.StatusCheckedAt = meta.StatusHistory[len(meta.StatusHistory)-1].ChangedAt
|
||||||
|
}
|
||||||
|
if meta.StatusCheckedAt == "" && strings.TrimSpace(currentStatus) != "" && collectedAt != "" {
|
||||||
|
meta.StatusCheckedAt = collectedAt
|
||||||
|
}
|
||||||
|
|
||||||
|
return meta
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatOptionalRFC3339(t time.Time) string {
|
||||||
|
if t.IsZero() {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return t.UTC().Format(time.RFC3339)
|
||||||
|
}
|
||||||
|
|
||||||
|
func dedupeFirmware(items []ReanimatorFirmware) []ReanimatorFirmware {
|
||||||
|
if len(items) < 2 {
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
seen := make(map[string]struct{}, len(items))
|
||||||
|
result := make([]ReanimatorFirmware, 0, len(items))
|
||||||
|
for _, item := range items {
|
||||||
|
key := strings.ToLower(strings.TrimSpace(item.DeviceName))
|
||||||
|
if key == "" {
|
||||||
|
key = strings.ToLower(strings.TrimSpace(item.Version))
|
||||||
|
}
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
result = append(result, item)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func dedupeCPUs(items []ReanimatorCPU) []ReanimatorCPU {
|
||||||
|
if len(items) < 2 {
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
seen := make(map[int]struct{}, len(items))
|
||||||
|
result := make([]ReanimatorCPU, 0, len(items))
|
||||||
|
for _, item := range items {
|
||||||
|
if _, ok := seen[item.Socket]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[item.Socket] = struct{}{}
|
||||||
|
result = append(result, item)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func dedupeMemory(items []ReanimatorMemory) []ReanimatorMemory {
|
||||||
|
if len(items) < 2 {
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
seen := make(map[string]struct{}, len(items))
|
||||||
|
result := make([]ReanimatorMemory, 0, len(items))
|
||||||
|
for _, item := range items {
|
||||||
|
key := strings.ToLower(strings.TrimSpace(item.Slot))
|
||||||
|
if key == "" {
|
||||||
|
key = strings.ToLower(strings.TrimSpace(item.Location))
|
||||||
|
}
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
result = append(result, item)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func dedupeStorage(items []ReanimatorStorage) []ReanimatorStorage {
|
||||||
|
if len(items) < 2 {
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
seen := make(map[string]struct{}, len(items))
|
||||||
|
result := make([]ReanimatorStorage, 0, len(items))
|
||||||
|
for _, item := range items {
|
||||||
|
key := strings.ToLower(strings.TrimSpace(item.SerialNumber))
|
||||||
|
if key == "" {
|
||||||
|
key = "slot:" + strings.ToLower(strings.TrimSpace(item.Slot))
|
||||||
|
}
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
result = append(result, item)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func dedupePSUs(items []ReanimatorPSU) []ReanimatorPSU {
|
||||||
|
if len(items) < 2 {
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
seen := make(map[string]struct{}, len(items))
|
||||||
|
result := make([]ReanimatorPSU, 0, len(items))
|
||||||
|
for _, item := range items {
|
||||||
|
key := strings.ToLower(strings.TrimSpace(item.SerialNumber))
|
||||||
|
if key == "" {
|
||||||
|
key = "slot:" + strings.ToLower(strings.TrimSpace(item.Slot))
|
||||||
|
}
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
result = append(result, item)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func dedupePCIe(items []ReanimatorPCIe) []ReanimatorPCIe {
|
||||||
|
if len(items) < 2 {
|
||||||
|
return items
|
||||||
|
}
|
||||||
|
type scored struct {
|
||||||
|
item ReanimatorPCIe
|
||||||
|
score int
|
||||||
|
idx int
|
||||||
|
}
|
||||||
|
byKey := make(map[string]scored, len(items))
|
||||||
|
order := make([]string, 0, len(items))
|
||||||
|
for i, item := range items {
|
||||||
|
key := pcieDedupKey(item)
|
||||||
|
curr := scored{item: item, score: pcieQualityScore(item), idx: i}
|
||||||
|
existing, ok := byKey[key]
|
||||||
|
if !ok {
|
||||||
|
byKey[key] = curr
|
||||||
|
order = append(order, key)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if curr.score > existing.score {
|
||||||
|
byKey[key] = curr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result := make([]ReanimatorPCIe, 0, len(byKey))
|
||||||
|
for _, key := range order {
|
||||||
|
result = append(result, byKey[key].item)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func pcieDedupKey(item ReanimatorPCIe) string {
|
||||||
|
slot := strings.ToLower(strings.TrimSpace(item.Slot))
|
||||||
|
serial := strings.ToLower(strings.TrimSpace(item.SerialNumber))
|
||||||
|
bdf := strings.ToLower(strings.TrimSpace(item.BDF))
|
||||||
|
if slot != "" {
|
||||||
|
return "slot:" + slot
|
||||||
|
}
|
||||||
|
if serial != "" {
|
||||||
|
return "sn:" + serial
|
||||||
|
}
|
||||||
|
if bdf != "" {
|
||||||
|
return "bdf:" + bdf
|
||||||
|
}
|
||||||
|
return strings.ToLower(strings.TrimSpace(item.DeviceClass)) + "|" + strings.ToLower(strings.TrimSpace(item.Model))
|
||||||
|
}
|
||||||
|
|
||||||
|
func pcieQualityScore(item ReanimatorPCIe) int {
|
||||||
|
score := 0
|
||||||
|
if strings.TrimSpace(item.SerialNumber) != "" {
|
||||||
|
score += 4
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(item.Model) != "" && !isGenericPCIeModel(item.Model) {
|
||||||
|
score += 3
|
||||||
|
}
|
||||||
|
status := strings.ToLower(strings.TrimSpace(item.Status))
|
||||||
|
if status == "ok" || status == "warning" || status == "critical" {
|
||||||
|
score += 2
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(item.BDF) != "" {
|
||||||
|
score++
|
||||||
|
}
|
||||||
|
if strings.EqualFold(strings.TrimSpace(item.DeviceClass), "DisplayController") {
|
||||||
|
score++
|
||||||
|
}
|
||||||
|
return score
|
||||||
|
}
|
||||||
|
|
||||||
|
func isGenericPCIeModel(model string) bool {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(model)) {
|
||||||
|
case "", "unknown", "vga", "3d controller", "display controller":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// inferCPUManufacturer determines CPU manufacturer from model string
|
// inferCPUManufacturer determines CPU manufacturer from model string
|
||||||
func inferCPUManufacturer(model string) string {
|
func inferCPUManufacturer(model string) string {
|
||||||
upper := strings.ToUpper(model)
|
upper := strings.ToUpper(model)
|
||||||
|
|||||||
@@ -210,7 +210,7 @@ func TestConvertCPUs(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
result := convertCPUs(cpus)
|
result := convertCPUs(cpus, "2026-02-10T15:30:00Z")
|
||||||
|
|
||||||
if len(result) != 2 {
|
if len(result) != 2 {
|
||||||
t.Fatalf("expected 2 CPUs, got %d", len(result))
|
t.Fatalf("expected 2 CPUs, got %d", len(result))
|
||||||
@@ -245,7 +245,7 @@ func TestConvertMemory(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
result := convertMemory(memory)
|
result := convertMemory(memory, "2026-02-10T15:30:00Z")
|
||||||
|
|
||||||
if len(result) != 2 {
|
if len(result) != 2 {
|
||||||
t.Fatalf("expected 2 memory modules, got %d", len(result))
|
t.Fatalf("expected 2 memory modules, got %d", len(result))
|
||||||
@@ -278,7 +278,7 @@ func TestConvertStorage(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
result := convertStorage(storage)
|
result := convertStorage(storage, "2026-02-10T15:30:00Z")
|
||||||
|
|
||||||
if len(result) != 1 {
|
if len(result) != 1 {
|
||||||
t.Fatalf("expected 1 storage device (skipped one without serial), got %d", len(result))
|
t.Fatalf("expected 1 storage device (skipped one without serial), got %d", len(result))
|
||||||
@@ -329,7 +329,7 @@ func TestConvertPCIeDevices(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
result := convertPCIeDevices(hw)
|
result := convertPCIeDevices(hw, "2026-02-10T15:30:00Z")
|
||||||
|
|
||||||
// Should have: 2 PCIe devices + 1 GPU + 1 NIC = 4 total
|
// Should have: 2 PCIe devices + 1 GPU + 1 NIC = 4 total
|
||||||
if len(result) != 4 {
|
if len(result) != 4 {
|
||||||
@@ -369,7 +369,7 @@ func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
result := convertPCIeDevices(hw)
|
result := convertPCIeDevices(hw, "2026-02-10T15:30:00Z")
|
||||||
|
|
||||||
if len(result) != 1 {
|
if len(result) != 1 {
|
||||||
t.Fatalf("expected 1 PCIe device, got %d", len(result))
|
t.Fatalf("expected 1 PCIe device, got %d", len(result))
|
||||||
@@ -380,6 +380,74 @@ func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertPCIeDevices_SkipsDisplayControllerDuplicates(t *testing.T) {
|
||||||
|
hw := &models.HardwareConfig{
|
||||||
|
PCIeDevices: []models.PCIeDevice{
|
||||||
|
{
|
||||||
|
Slot: "#GPU0",
|
||||||
|
DeviceClass: "3D Controller",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{
|
||||||
|
Slot: "#GPU0",
|
||||||
|
Model: "B200 180GB HBM3e",
|
||||||
|
Manufacturer: "NVIDIA",
|
||||||
|
SerialNumber: "1655024043371",
|
||||||
|
Status: "OK",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result := convertPCIeDevices(hw, "2026-02-10T15:30:00Z")
|
||||||
|
if len(result) != 1 {
|
||||||
|
t.Fatalf("expected only dedicated GPU record without duplicate display PCIe, got %d", len(result))
|
||||||
|
}
|
||||||
|
if result[0].DeviceClass != "DisplayController" {
|
||||||
|
t.Fatalf("expected GPU record with DisplayController class, got %q", result[0].DeviceClass)
|
||||||
|
}
|
||||||
|
if result[0].Status != "OK" {
|
||||||
|
t.Fatalf("expected GPU status OK, got %q", result[0].Status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConvertPCIeDevices_MapsGPUStatusHistory(t *testing.T) {
|
||||||
|
hw := &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{
|
||||||
|
Slot: "#GPU6",
|
||||||
|
Model: "B200 180GB HBM3e",
|
||||||
|
Manufacturer: "NVIDIA",
|
||||||
|
SerialNumber: "1655024043204",
|
||||||
|
Status: "Critical",
|
||||||
|
StatusHistory: []models.StatusHistoryEntry{
|
||||||
|
{
|
||||||
|
Status: "Critical",
|
||||||
|
ChangedAt: time.Date(2026, 1, 12, 15, 5, 18, 0, time.UTC),
|
||||||
|
Details: "BIOS miss F_GPU6",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ErrorDescription: "BIOS miss F_GPU6",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result := convertPCIeDevices(hw, "2026-02-10T15:30:00Z")
|
||||||
|
if len(result) != 1 {
|
||||||
|
t.Fatalf("expected 1 converted GPU, got %d", len(result))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(result[0].StatusHistory) != 1 {
|
||||||
|
t.Fatalf("expected 1 history entry, got %d", len(result[0].StatusHistory))
|
||||||
|
}
|
||||||
|
if result[0].StatusHistory[0].ChangedAt != "2026-01-12T15:05:18Z" {
|
||||||
|
t.Fatalf("unexpected history changed_at: %q", result[0].StatusHistory[0].ChangedAt)
|
||||||
|
}
|
||||||
|
if result[0].StatusAtCollect == nil || result[0].StatusAtCollect.At != "2026-02-10T15:30:00Z" {
|
||||||
|
t.Fatalf("expected status_at_collection to be populated from collected_at")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestConvertPowerSupplies(t *testing.T) {
|
func TestConvertPowerSupplies(t *testing.T) {
|
||||||
psus := []models.PSU{
|
psus := []models.PSU{
|
||||||
{
|
{
|
||||||
@@ -398,7 +466,7 @@ func TestConvertPowerSupplies(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
result := convertPowerSupplies(psus)
|
result := convertPowerSupplies(psus, "2026-02-10T15:30:00Z")
|
||||||
|
|
||||||
if len(result) != 1 {
|
if len(result) != 1 {
|
||||||
t.Fatalf("expected 1 PSU (skipped empty), got %d", len(result))
|
t.Fatalf("expected 1 PSU (skipped empty), got %d", len(result))
|
||||||
@@ -506,3 +574,75 @@ func TestInferTargetHost(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertToReanimator_DeduplicatesAllSections(t *testing.T) {
|
||||||
|
input := &models.AnalysisResult{
|
||||||
|
Filename: "dup-test.json",
|
||||||
|
CollectedAt: time.Date(2026, 2, 10, 15, 30, 0, 0, time.UTC),
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"},
|
||||||
|
Firmware: []models.FirmwareInfo{
|
||||||
|
{DeviceName: "BMC", Version: "1.0"},
|
||||||
|
{DeviceName: "BMC", Version: "1.1"},
|
||||||
|
},
|
||||||
|
CPUs: []models.CPU{
|
||||||
|
{Socket: 0, Model: "CPU-A"},
|
||||||
|
{Socket: 0, Model: "CPU-A-DUP"},
|
||||||
|
},
|
||||||
|
Memory: []models.MemoryDIMM{
|
||||||
|
{Slot: "DIMM_A1", Present: true, SerialNumber: "MEM-1", Status: "OK"},
|
||||||
|
{Slot: "DIMM_A1", Present: true, SerialNumber: "MEM-1-DUP", Status: "OK"},
|
||||||
|
},
|
||||||
|
Storage: []models.Storage{
|
||||||
|
{Slot: "U.2-1", SerialNumber: "SSD-1", Model: "Disk1", Present: true},
|
||||||
|
{Slot: "U.2-2", SerialNumber: "SSD-1", Model: "Disk1-dup", Present: true},
|
||||||
|
},
|
||||||
|
PCIeDevices: []models.PCIeDevice{
|
||||||
|
{Slot: "#GPU0", DeviceClass: "3D Controller", BDF: "17:00.0"},
|
||||||
|
{Slot: "SLOT-NIC1", DeviceClass: "NetworkController", BDF: "18:00.0"},
|
||||||
|
{Slot: "SLOT-NIC1", DeviceClass: "NetworkController", BDF: "18:00.1"},
|
||||||
|
},
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{Slot: "#GPU0", Model: "B200 180GB HBM3e", SerialNumber: "GPU-1", Status: "OK"},
|
||||||
|
},
|
||||||
|
PowerSupply: []models.PSU{
|
||||||
|
{Slot: "0", Present: true, SerialNumber: "PSU-1", Status: "OK"},
|
||||||
|
{Slot: "1", Present: true, SerialNumber: "PSU-1", Status: "OK"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
out, err := ConvertToReanimator(input)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ConvertToReanimator() failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(out.Hardware.Firmware) != 1 {
|
||||||
|
t.Fatalf("expected deduped firmware len=1, got %d", len(out.Hardware.Firmware))
|
||||||
|
}
|
||||||
|
if len(out.Hardware.CPUs) != 1 {
|
||||||
|
t.Fatalf("expected deduped cpus len=1, got %d", len(out.Hardware.CPUs))
|
||||||
|
}
|
||||||
|
if len(out.Hardware.Memory) != 1 {
|
||||||
|
t.Fatalf("expected deduped memory len=1, got %d", len(out.Hardware.Memory))
|
||||||
|
}
|
||||||
|
if len(out.Hardware.Storage) != 1 {
|
||||||
|
t.Fatalf("expected deduped storage len=1, got %d", len(out.Hardware.Storage))
|
||||||
|
}
|
||||||
|
if len(out.Hardware.PowerSupplies) != 1 {
|
||||||
|
t.Fatalf("expected deduped psu len=1, got %d", len(out.Hardware.PowerSupplies))
|
||||||
|
}
|
||||||
|
if len(out.Hardware.PCIeDevices) != 2 {
|
||||||
|
t.Fatalf("expected deduped pcie len=2 (gpu+nic), got %d", len(out.Hardware.PCIeDevices))
|
||||||
|
}
|
||||||
|
|
||||||
|
gpuCount := 0
|
||||||
|
for _, dev := range out.Hardware.PCIeDevices {
|
||||||
|
if dev.Slot == "#GPU0" {
|
||||||
|
gpuCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if gpuCount != 1 {
|
||||||
|
t.Fatalf("expected single #GPU0 record, got %d", gpuCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -36,78 +36,114 @@ type ReanimatorFirmware struct {
|
|||||||
Version string `json:"version"`
|
Version string `json:"version"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ReanimatorStatusAtCollection struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
At string `json:"at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ReanimatorStatusHistoryEntry struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
ChangedAt string `json:"changed_at"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// ReanimatorCPU represents processor information
|
// ReanimatorCPU represents processor information
|
||||||
type ReanimatorCPU struct {
|
type ReanimatorCPU struct {
|
||||||
Socket int `json:"socket"`
|
Socket int `json:"socket"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Cores int `json:"cores,omitempty"`
|
Cores int `json:"cores,omitempty"`
|
||||||
Threads int `json:"threads,omitempty"`
|
Threads int `json:"threads,omitempty"`
|
||||||
FrequencyMHz int `json:"frequency_mhz,omitempty"`
|
FrequencyMHz int `json:"frequency_mhz,omitempty"`
|
||||||
MaxFrequencyMHz int `json:"max_frequency_mhz,omitempty"`
|
MaxFrequencyMHz int `json:"max_frequency_mhz,omitempty"`
|
||||||
Manufacturer string `json:"manufacturer,omitempty"`
|
Manufacturer string `json:"manufacturer,omitempty"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
|
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *ReanimatorStatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReanimatorMemory represents a memory module (DIMM)
|
// ReanimatorMemory represents a memory module (DIMM)
|
||||||
type ReanimatorMemory struct {
|
type ReanimatorMemory struct {
|
||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
Location string `json:"location,omitempty"`
|
Location string `json:"location,omitempty"`
|
||||||
Present bool `json:"present"`
|
Present bool `json:"present"`
|
||||||
SizeMB int `json:"size_mb,omitempty"`
|
SizeMB int `json:"size_mb,omitempty"`
|
||||||
Type string `json:"type,omitempty"`
|
Type string `json:"type,omitempty"`
|
||||||
MaxSpeedMHz int `json:"max_speed_mhz,omitempty"`
|
MaxSpeedMHz int `json:"max_speed_mhz,omitempty"`
|
||||||
CurrentSpeedMHz int `json:"current_speed_mhz,omitempty"`
|
CurrentSpeedMHz int `json:"current_speed_mhz,omitempty"`
|
||||||
Manufacturer string `json:"manufacturer,omitempty"`
|
Manufacturer string `json:"manufacturer,omitempty"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
PartNumber string `json:"part_number,omitempty"`
|
PartNumber string `json:"part_number,omitempty"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
|
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *ReanimatorStatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReanimatorStorage represents a storage device
|
// ReanimatorStorage represents a storage device
|
||||||
type ReanimatorStorage struct {
|
type ReanimatorStorage struct {
|
||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
Type string `json:"type,omitempty"`
|
Type string `json:"type,omitempty"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
SizeGB int `json:"size_gb,omitempty"`
|
SizeGB int `json:"size_gb,omitempty"`
|
||||||
SerialNumber string `json:"serial_number"`
|
SerialNumber string `json:"serial_number"`
|
||||||
Manufacturer string `json:"manufacturer,omitempty"`
|
Manufacturer string `json:"manufacturer,omitempty"`
|
||||||
Firmware string `json:"firmware,omitempty"`
|
Firmware string `json:"firmware,omitempty"`
|
||||||
Interface string `json:"interface,omitempty"`
|
Interface string `json:"interface,omitempty"`
|
||||||
Present bool `json:"present"`
|
Present bool `json:"present"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
|
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *ReanimatorStatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReanimatorPCIe represents a PCIe device
|
// ReanimatorPCIe represents a PCIe device
|
||||||
type ReanimatorPCIe struct {
|
type ReanimatorPCIe struct {
|
||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
VendorID int `json:"vendor_id,omitempty"`
|
VendorID int `json:"vendor_id,omitempty"`
|
||||||
DeviceID int `json:"device_id,omitempty"`
|
DeviceID int `json:"device_id,omitempty"`
|
||||||
BDF string `json:"bdf,omitempty"`
|
BDF string `json:"bdf,omitempty"`
|
||||||
DeviceClass string `json:"device_class,omitempty"`
|
DeviceClass string `json:"device_class,omitempty"`
|
||||||
Manufacturer string `json:"manufacturer,omitempty"`
|
Manufacturer string `json:"manufacturer,omitempty"`
|
||||||
Model string `json:"model,omitempty"`
|
Model string `json:"model,omitempty"`
|
||||||
LinkWidth int `json:"link_width,omitempty"`
|
LinkWidth int `json:"link_width,omitempty"`
|
||||||
LinkSpeed string `json:"link_speed,omitempty"`
|
LinkSpeed string `json:"link_speed,omitempty"`
|
||||||
MaxLinkWidth int `json:"max_link_width,omitempty"`
|
MaxLinkWidth int `json:"max_link_width,omitempty"`
|
||||||
MaxLinkSpeed string `json:"max_link_speed,omitempty"`
|
MaxLinkSpeed string `json:"max_link_speed,omitempty"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
Firmware string `json:"firmware,omitempty"`
|
Firmware string `json:"firmware,omitempty"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
|
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *ReanimatorStatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReanimatorPSU represents a power supply unit
|
// ReanimatorPSU represents a power supply unit
|
||||||
type ReanimatorPSU struct {
|
type ReanimatorPSU struct {
|
||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
Present bool `json:"present"`
|
Present bool `json:"present"`
|
||||||
Model string `json:"model,omitempty"`
|
Model string `json:"model,omitempty"`
|
||||||
Vendor string `json:"vendor,omitempty"`
|
Vendor string `json:"vendor,omitempty"`
|
||||||
WattageW int `json:"wattage_w,omitempty"`
|
WattageW int `json:"wattage_w,omitempty"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
PartNumber string `json:"part_number,omitempty"`
|
PartNumber string `json:"part_number,omitempty"`
|
||||||
Firmware string `json:"firmware,omitempty"`
|
Firmware string `json:"firmware,omitempty"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
InputType string `json:"input_type,omitempty"`
|
InputType string `json:"input_type,omitempty"`
|
||||||
InputPowerW int `json:"input_power_w,omitempty"`
|
InputPowerW int `json:"input_power_w,omitempty"`
|
||||||
OutputPowerW int `json:"output_power_w,omitempty"`
|
OutputPowerW int `json:"output_power_w,omitempty"`
|
||||||
InputVoltage float64 `json:"input_voltage,omitempty"`
|
InputVoltage float64 `json:"input_voltage,omitempty"`
|
||||||
|
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *ReanimatorStatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,6 +43,19 @@ const (
|
|||||||
SeverityInfo Severity = "info"
|
SeverityInfo Severity = "info"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// StatusAtCollection captures component status at a specific timestamp.
|
||||||
|
type StatusAtCollection struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
At time.Time `json:"at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// StatusHistoryEntry represents a status transition point.
|
||||||
|
type StatusHistoryEntry struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
ChangedAt time.Time `json:"changed_at"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// SensorReading represents a single sensor reading
|
// SensorReading represents a single sensor reading
|
||||||
type SensorReading struct {
|
type SensorReading struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
@@ -112,6 +125,13 @@ type CPU struct {
|
|||||||
TDP int `json:"tdp_w,omitempty"`
|
TDP int `json:"tdp_w,omitempty"`
|
||||||
PPIN string `json:"ppin,omitempty"`
|
PPIN string `json:"ppin,omitempty"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
|
Status string `json:"status,omitempty"`
|
||||||
|
|
||||||
|
StatusCheckedAt time.Time `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt time.Time `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// MemoryDIMM represents a memory module
|
// MemoryDIMM represents a memory module
|
||||||
@@ -129,6 +149,12 @@ type MemoryDIMM struct {
|
|||||||
PartNumber string `json:"part_number,omitempty"`
|
PartNumber string `json:"part_number,omitempty"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
Ranks int `json:"ranks,omitempty"`
|
Ranks int `json:"ranks,omitempty"`
|
||||||
|
|
||||||
|
StatusCheckedAt time.Time `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt time.Time `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Storage represents a storage device
|
// Storage represents a storage device
|
||||||
@@ -144,6 +170,13 @@ type Storage struct {
|
|||||||
Present bool `json:"present"`
|
Present bool `json:"present"`
|
||||||
Location string `json:"location,omitempty"` // Front/Rear
|
Location string `json:"location,omitempty"` // Front/Rear
|
||||||
BackplaneID int `json:"backplane_id,omitempty"`
|
BackplaneID int `json:"backplane_id,omitempty"`
|
||||||
|
Status string `json:"status,omitempty"`
|
||||||
|
|
||||||
|
StatusCheckedAt time.Time `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt time.Time `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// PCIeDevice represents a PCIe device
|
// PCIeDevice represents a PCIe device
|
||||||
@@ -161,6 +194,13 @@ type PCIeDevice struct {
|
|||||||
PartNumber string `json:"part_number,omitempty"`
|
PartNumber string `json:"part_number,omitempty"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
MACAddresses []string `json:"mac_addresses,omitempty"`
|
MACAddresses []string `json:"mac_addresses,omitempty"`
|
||||||
|
Status string `json:"status,omitempty"`
|
||||||
|
|
||||||
|
StatusCheckedAt time.Time `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt time.Time `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NIC represents a network interface card
|
// NIC represents a network interface card
|
||||||
@@ -189,6 +229,12 @@ type PSU struct {
|
|||||||
InputVoltage float64 `json:"input_voltage,omitempty"`
|
InputVoltage float64 `json:"input_voltage,omitempty"`
|
||||||
OutputVoltage float64 `json:"output_voltage,omitempty"`
|
OutputVoltage float64 `json:"output_voltage,omitempty"`
|
||||||
TemperatureC int `json:"temperature_c,omitempty"`
|
TemperatureC int `json:"temperature_c,omitempty"`
|
||||||
|
|
||||||
|
StatusCheckedAt time.Time `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt time.Time `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// GPU represents a graphics processing unit
|
// GPU represents a graphics processing unit
|
||||||
@@ -220,6 +266,12 @@ type GPU struct {
|
|||||||
CurrentLinkWidth int `json:"current_link_width,omitempty"`
|
CurrentLinkWidth int `json:"current_link_width,omitempty"`
|
||||||
CurrentLinkSpeed string `json:"current_link_speed,omitempty"`
|
CurrentLinkSpeed string `json:"current_link_speed,omitempty"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
|
|
||||||
|
StatusCheckedAt time.Time `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt time.Time `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NetworkAdapter represents a network adapter with detailed info
|
// NetworkAdapter represents a network adapter with detailed info
|
||||||
@@ -238,4 +290,10 @@ type NetworkAdapter struct {
|
|||||||
PortType string `json:"port_type,omitempty"`
|
PortType string `json:"port_type,omitempty"`
|
||||||
MACAddresses []string `json:"mac_addresses,omitempty"`
|
MACAddresses []string `json:"mac_addresses,omitempty"`
|
||||||
Status string `json:"status,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
|
|
||||||
|
StatusCheckedAt time.Time `json:"status_checked_at,omitempty"`
|
||||||
|
StatusChangedAt time.Time `json:"status_changed_at,omitempty"`
|
||||||
|
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||||
|
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
}
|
}
|
||||||
|
|||||||
101
internal/parser/vendors/inspur/gpu_status.go
vendored
101
internal/parser/vendors/inspur/gpu_status.go
vendored
@@ -2,6 +2,7 @@ package inspur
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -15,38 +16,96 @@ func applyGPUStatusFromEvents(hw *models.HardwareConfig, events []models.Event)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
faulty := make(map[int]bool)
|
gpuByIndex := make(map[int]*models.GPU)
|
||||||
for _, e := range events {
|
|
||||||
if !isGPUFaultEvent(e) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
matches := reFaultGPU.FindAllStringSubmatch(e.Description, -1)
|
|
||||||
for _, m := range matches {
|
|
||||||
if len(m) < 2 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
idx, err := strconv.Atoi(m[1])
|
|
||||||
if err == nil && idx >= 0 {
|
|
||||||
faulty[idx] = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range hw.GPUs {
|
for i := range hw.GPUs {
|
||||||
gpu := &hw.GPUs[i]
|
gpu := &hw.GPUs[i]
|
||||||
idx, ok := extractLogicalGPUIndex(gpu.Slot)
|
idx, ok := extractLogicalGPUIndex(gpu.Slot)
|
||||||
if ok && faulty[idx] {
|
if !ok {
|
||||||
gpu.Status = "Critical"
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
gpuByIndex[idx] = gpu
|
||||||
|
gpu.StatusHistory = nil
|
||||||
|
gpu.ErrorDescription = ""
|
||||||
|
}
|
||||||
|
|
||||||
if strings.TrimSpace(gpu.Status) == "" {
|
relevantEvents := make([]models.Event, 0)
|
||||||
|
for _, e := range events {
|
||||||
|
if !isGPUFaultEvent(e) || len(extractFaultyGPUSet(e.Description)) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
relevantEvents = append(relevantEvents, e)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(relevantEvents) == 0 {
|
||||||
|
for _, gpu := range gpuByIndex {
|
||||||
|
if strings.TrimSpace(gpu.Status) == "" {
|
||||||
|
gpu.Status = "OK"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(relevantEvents, func(i, j int) bool {
|
||||||
|
return relevantEvents[i].Timestamp.Before(relevantEvents[j].Timestamp)
|
||||||
|
})
|
||||||
|
|
||||||
|
currentStatus := make(map[int]string, len(gpuByIndex))
|
||||||
|
lastCriticalDetails := make(map[int]string, len(gpuByIndex))
|
||||||
|
for idx := range gpuByIndex {
|
||||||
|
currentStatus[idx] = "OK"
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, e := range relevantEvents {
|
||||||
|
faultySet := extractFaultyGPUSet(e.Description)
|
||||||
|
for idx, gpu := range gpuByIndex {
|
||||||
|
newStatus := "OK"
|
||||||
|
if faultySet[idx] {
|
||||||
|
newStatus = "Critical"
|
||||||
|
lastCriticalDetails[idx] = strings.TrimSpace(e.Description)
|
||||||
|
}
|
||||||
|
|
||||||
|
if currentStatus[idx] != newStatus {
|
||||||
|
gpu.StatusHistory = append(gpu.StatusHistory, models.StatusHistoryEntry{
|
||||||
|
Status: newStatus,
|
||||||
|
ChangedAt: e.Timestamp,
|
||||||
|
Details: strings.TrimSpace(e.Description),
|
||||||
|
})
|
||||||
|
gpu.StatusChangedAt = e.Timestamp
|
||||||
|
currentStatus[idx] = newStatus
|
||||||
|
}
|
||||||
|
|
||||||
|
gpu.StatusCheckedAt = e.Timestamp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for idx, gpu := range gpuByIndex {
|
||||||
|
gpu.Status = currentStatus[idx]
|
||||||
|
if gpu.Status == "Critical" {
|
||||||
|
gpu.ErrorDescription = lastCriticalDetails[idx]
|
||||||
|
} else {
|
||||||
|
gpu.ErrorDescription = ""
|
||||||
|
}
|
||||||
|
if gpu.StatusCheckedAt.IsZero() && strings.TrimSpace(gpu.Status) == "" {
|
||||||
gpu.Status = "OK"
|
gpu.Status = "OK"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func extractFaultyGPUSet(description string) map[int]bool {
|
||||||
|
faulty := make(map[int]bool)
|
||||||
|
matches := reFaultGPU.FindAllStringSubmatch(description, -1)
|
||||||
|
for _, m := range matches {
|
||||||
|
if len(m) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
idx, err := strconv.Atoi(m[1])
|
||||||
|
if err == nil && idx >= 0 {
|
||||||
|
faulty[idx] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return faulty
|
||||||
|
}
|
||||||
|
|
||||||
func isGPUFaultEvent(e models.Event) bool {
|
func isGPUFaultEvent(e models.Event) bool {
|
||||||
desc := strings.ToLower(e.Description)
|
desc := strings.ToLower(e.Description)
|
||||||
if strings.Contains(desc, "bios miss f_gpu") {
|
if strings.Contains(desc, "bios miss f_gpu") {
|
||||||
|
|||||||
69
internal/parser/vendors/inspur/hgx_firmware_test.go
vendored
Normal file
69
internal/parser/vendors/inspur/hgx_firmware_test.go
vendored
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
package inspur
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAppendHGXFirmwareFromHWInfo_AppendsInventoryEntries(t *testing.T) {
|
||||||
|
hw := &models.HardwareConfig{
|
||||||
|
Firmware: []models.FirmwareInfo{
|
||||||
|
{DeviceName: "BIOS", Version: "1.0.0"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
content := []byte(`
|
||||||
|
{
|
||||||
|
"@odata.id": "/redfish/v1/UpdateService/FirmwareInventory/HGX_FW_BMC_0",
|
||||||
|
"Id": "HGX_FW_BMC_0",
|
||||||
|
"Oem": {
|
||||||
|
"Nvidia": {
|
||||||
|
"ActiveFirmwareSlot": {"Version": "25.05-A"},
|
||||||
|
"InactiveFirmwareSlot": {"Version": "25.04-B"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Version": "25.05-A",
|
||||||
|
"WriteProtected": false
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"@odata.id": "/redfish/v1/UpdateService/FirmwareInventory/HGX_FW_GPU_SXM_1",
|
||||||
|
"Id": "HGX_FW_GPU_SXM_1",
|
||||||
|
"Version": "97.00.C5.00.0E",
|
||||||
|
"WriteProtected": false
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"@odata.id": "/redfish/v1/UpdateService/FirmwareInventory/HGX_Driver_GPU_SXM_1",
|
||||||
|
"Id": "HGX_Driver_GPU_SXM_1",
|
||||||
|
"Version": "",
|
||||||
|
"WriteProtected": false
|
||||||
|
}
|
||||||
|
`)
|
||||||
|
|
||||||
|
appendHGXFirmwareFromHWInfo(content, hw)
|
||||||
|
|
||||||
|
if len(hw.Firmware) != 5 {
|
||||||
|
t.Fatalf("expected 5 firmware entries after append, got %d", len(hw.Firmware))
|
||||||
|
}
|
||||||
|
|
||||||
|
seen := make(map[string]string)
|
||||||
|
for _, fw := range hw.Firmware {
|
||||||
|
seen[fw.DeviceName] = fw.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
if seen["HGX_FW_BMC_0"] != "25.05-A" {
|
||||||
|
t.Fatalf("expected HGX_FW_BMC_0 version 25.05-A, got %q", seen["HGX_FW_BMC_0"])
|
||||||
|
}
|
||||||
|
if seen["HGX_FW_BMC_0 Active Slot"] != "25.05-A" {
|
||||||
|
t.Fatalf("expected active slot version, got %q", seen["HGX_FW_BMC_0 Active Slot"])
|
||||||
|
}
|
||||||
|
if seen["HGX_FW_BMC_0 Inactive Slot"] != "25.04-B" {
|
||||||
|
t.Fatalf("expected inactive slot version, got %q", seen["HGX_FW_BMC_0 Inactive Slot"])
|
||||||
|
}
|
||||||
|
if seen["HGX_FW_GPU_SXM_1"] != "97.00.C5.00.0E" {
|
||||||
|
t.Fatalf("expected GPU FW entry, got %q", seen["HGX_FW_GPU_SXM_1"])
|
||||||
|
}
|
||||||
|
if _, ok := seen["HGX_Driver_GPU_SXM_1"]; ok {
|
||||||
|
t.Fatalf("did not expect empty version driver entry")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -24,6 +24,10 @@ func TestEnrichGPUsFromHGXHWInfo_UsesHGXLogicalMapping(t *testing.T) {
|
|||||||
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN3","SerialNumber":"SXM3SN"}
|
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN3","SerialNumber":"SXM3SN"}
|
||||||
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_5/Assembly
|
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_5/Assembly
|
||||||
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN5","SerialNumber":"SXM5SN"}
|
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN5","SerialNumber":"SXM5SN"}
|
||||||
|
{"Id":"HGX_FW_GPU_SXM_1","Version":"FW1"}
|
||||||
|
{"Id":"HGX_FW_GPU_SXM_3","Version":"FW3"}
|
||||||
|
{"Id":"HGX_FW_GPU_SXM_5","Version":"FW5"}
|
||||||
|
{"Id":"HGX_InfoROM_GPU_SXM_3","Version":"IR3"}
|
||||||
`)
|
`)
|
||||||
|
|
||||||
enrichGPUsFromHGXHWInfo(content, hw)
|
enrichGPUsFromHGXHWInfo(content, hw)
|
||||||
@@ -37,6 +41,15 @@ func TestEnrichGPUsFromHGXHWInfo_UsesHGXLogicalMapping(t *testing.T) {
|
|||||||
if hw.GPUs[2].SerialNumber != "SXM5SN" {
|
if hw.GPUs[2].SerialNumber != "SXM5SN" {
|
||||||
t.Fatalf("expected #GPU0 to map to SXM5 serial, got %q", hw.GPUs[2].SerialNumber)
|
t.Fatalf("expected #GPU0 to map to SXM5 serial, got %q", hw.GPUs[2].SerialNumber)
|
||||||
}
|
}
|
||||||
|
if hw.GPUs[0].Firmware != "FW3" {
|
||||||
|
t.Fatalf("expected #GPU6 firmware FW3, got %q", hw.GPUs[0].Firmware)
|
||||||
|
}
|
||||||
|
if hw.GPUs[0].VideoBIOS != "IR3" {
|
||||||
|
t.Fatalf("expected #GPU6 InfoROM in VideoBIOS IR3, got %q", hw.GPUs[0].VideoBIOS)
|
||||||
|
}
|
||||||
|
if hw.GPUs[2].Firmware != "FW5" {
|
||||||
|
t.Fatalf("expected #GPU0 firmware FW5, got %q", hw.GPUs[2].Firmware)
|
||||||
|
}
|
||||||
for _, g := range hw.GPUs {
|
for _, g := range hw.GPUs {
|
||||||
if g.Slot == "#CPU0_PE1_E_BMC" {
|
if g.Slot == "#CPU0_PE1_E_BMC" {
|
||||||
t.Fatalf("expected non-HGX BMC VGA entry to be filtered out")
|
t.Fatalf("expected non-HGX BMC VGA entry to be filtered out")
|
||||||
@@ -104,6 +117,44 @@ func TestApplyGPUStatusFromEvents_MarksFaultedGPU(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApplyGPUStatusFromEvents_UsesLatestEventAsCurrentStatusAndKeepsHistory(t *testing.T) {
|
||||||
|
hw := &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{Slot: "#GPU1"},
|
||||||
|
{Slot: "#GPU3"},
|
||||||
|
{Slot: "#GPU6"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
events := []models.Event{
|
||||||
|
{
|
||||||
|
ID: "17FFB002",
|
||||||
|
Timestamp: time.Date(2026, 1, 12, 22, 51, 16, 0, time.FixedZone("UTC+8", 8*3600)),
|
||||||
|
Description: "PCIe Present mismatch BIOS miss F_GPU1 F_GPU3 F_GPU6",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "17FFB002",
|
||||||
|
Timestamp: time.Date(2026, 1, 12, 23, 5, 18, 0, time.FixedZone("UTC+8", 8*3600)),
|
||||||
|
Description: "PCIe Present mismatch BIOS miss F_GPU6",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
applyGPUStatusFromEvents(hw, events)
|
||||||
|
|
||||||
|
if hw.GPUs[0].Status != "OK" {
|
||||||
|
t.Fatalf("expected #GPU1 to recover to OK on latest event, got %q", hw.GPUs[0].Status)
|
||||||
|
}
|
||||||
|
if hw.GPUs[1].Status != "OK" {
|
||||||
|
t.Fatalf("expected #GPU3 to recover to OK on latest event, got %q", hw.GPUs[1].Status)
|
||||||
|
}
|
||||||
|
if hw.GPUs[2].Status != "Critical" {
|
||||||
|
t.Fatalf("expected #GPU6 to remain Critical, got %q", hw.GPUs[2].Status)
|
||||||
|
}
|
||||||
|
if len(hw.GPUs[0].StatusHistory) == 0 {
|
||||||
|
t.Fatalf("expected #GPU1 status history to be populated")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseIDLLog_ParsesStructuredJSONLine(t *testing.T) {
|
func TestParseIDLLog_ParsesStructuredJSONLine(t *testing.T) {
|
||||||
content := []byte(`{ "MESSAGE": "|2026-01-12T23:05:18+08:00|PCIE|Assert|Critical|17FFB002|PCIe Present mismatch BIOS miss F_GPU6 - Assert|" }`)
|
content := []byte(`{ "MESSAGE": "|2026-01-12T23:05:18+08:00|PCIE|Assert|Critical|17FFB002|PCIe Present mismatch BIOS miss F_GPU6 - Assert|" }`)
|
||||||
|
|
||||||
|
|||||||
185
internal/parser/vendors/inspur/hgx_hwinfo.go
vendored
185
internal/parser/vendors/inspur/hgx_hwinfo.go
vendored
@@ -15,6 +15,18 @@ type hgxGPUAssemblyInfo struct {
|
|||||||
Serial string
|
Serial string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type hgxGPUFirmwareInfo struct {
|
||||||
|
Firmware string
|
||||||
|
InfoROM string
|
||||||
|
}
|
||||||
|
|
||||||
|
type hgxFirmwareInventoryEntry struct {
|
||||||
|
ID string
|
||||||
|
Version string
|
||||||
|
ActiveVersion string
|
||||||
|
InactiveVersion string
|
||||||
|
}
|
||||||
|
|
||||||
// Logical GPU index mapping used by HGX B200 UI ordering.
|
// Logical GPU index mapping used by HGX B200 UI ordering.
|
||||||
// Example from real logs/UI:
|
// Example from real logs/UI:
|
||||||
// GPU0->SXM5, GPU1->SXM7, GPU2->SXM6, GPU3->SXM8, GPU4->SXM2, GPU5->SXM4, GPU6->SXM3, GPU7->SXM1.
|
// GPU0->SXM5, GPU1->SXM7, GPU2->SXM6, GPU3->SXM8, GPU4->SXM2, GPU5->SXM4, GPU6->SXM3, GPU7->SXM1.
|
||||||
@@ -31,6 +43,10 @@ var hgxLogicalToSXM = map[int]int{
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
reHGXGPUBlock = regexp.MustCompile(`(?s)/redfish/v1/Chassis/HGX_GPU_SXM_(\d+)/Assembly.*?"Name":\s*"GPU Board Assembly".*?"Model":\s*"([^"]+)".*?"PartNumber":\s*"([^"]+)".*?"SerialNumber":\s*"([^"]+)"`)
|
reHGXGPUBlock = regexp.MustCompile(`(?s)/redfish/v1/Chassis/HGX_GPU_SXM_(\d+)/Assembly.*?"Name":\s*"GPU Board Assembly".*?"Model":\s*"([^"]+)".*?"PartNumber":\s*"([^"]+)".*?"SerialNumber":\s*"([^"]+)"`)
|
||||||
|
reHGXFWBlock = regexp.MustCompile(`(?s)"Id":\s*"HGX_FW_GPU_SXM_(\d+)".*?"Version":\s*"([^"]*)"`)
|
||||||
|
reHGXInfoROM = regexp.MustCompile(`(?s)"Id":\s*"HGX_InfoROM_GPU_SXM_(\d+)".*?"Version":\s*"([^"]*)"`)
|
||||||
|
reIDLine = regexp.MustCompile(`"Id":\s*"([^"]+)"`)
|
||||||
|
reVersion = regexp.MustCompile(`"Version":\s*"([^"]*)"`)
|
||||||
reSlotGPU = regexp.MustCompile(`(?i)gpu\s*#?\s*(\d+)`)
|
reSlotGPU = regexp.MustCompile(`(?i)gpu\s*#?\s*(\d+)`)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -43,6 +59,7 @@ func enrichGPUsFromHGXHWInfo(content []byte, hw *models.HardwareConfig) {
|
|||||||
if len(bySXM) == 0 {
|
if len(bySXM) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
fwBySXM := parseHGXGPUFirmware(content)
|
||||||
|
|
||||||
normalizeHGXGPUInventory(hw, bySXM)
|
normalizeHGXGPUInventory(hw, bySXM)
|
||||||
|
|
||||||
@@ -72,6 +89,57 @@ func enrichGPUsFromHGXHWInfo(content []byte, hw *models.HardwareConfig) {
|
|||||||
if strings.TrimSpace(gpu.Manufacturer) == "" {
|
if strings.TrimSpace(gpu.Manufacturer) == "" {
|
||||||
gpu.Manufacturer = "NVIDIA"
|
gpu.Manufacturer = "NVIDIA"
|
||||||
}
|
}
|
||||||
|
if fw, ok := fwBySXM[sxm]; ok {
|
||||||
|
if strings.TrimSpace(gpu.Firmware) == "" && strings.TrimSpace(fw.Firmware) != "" {
|
||||||
|
gpu.Firmware = fw.Firmware
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(gpu.VideoBIOS) == "" && strings.TrimSpace(fw.InfoROM) != "" {
|
||||||
|
gpu.VideoBIOS = fw.InfoROM
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func appendHGXFirmwareFromHWInfo(content []byte, hw *models.HardwareConfig) {
|
||||||
|
if hw == nil || len(content) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
entries := parseHGXFirmwareInventory(content)
|
||||||
|
if len(entries) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
existing := make(map[string]bool, len(hw.Firmware))
|
||||||
|
for _, fw := range hw.Firmware {
|
||||||
|
key := strings.ToLower(strings.TrimSpace(fw.DeviceName) + "|" + strings.TrimSpace(fw.Version))
|
||||||
|
existing[key] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
appendFW := func(name, version string) {
|
||||||
|
name = strings.TrimSpace(name)
|
||||||
|
version = strings.TrimSpace(version)
|
||||||
|
if name == "" || version == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
key := strings.ToLower(name + "|" + version)
|
||||||
|
if existing[key] {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
existing[key] = true
|
||||||
|
hw.Firmware = append(hw.Firmware, models.FirmwareInfo{
|
||||||
|
DeviceName: name,
|
||||||
|
Version: version,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, e := range entries {
|
||||||
|
appendFW(e.ID, e.Version)
|
||||||
|
|
||||||
|
if e.ActiveVersion != "" && e.InactiveVersion != "" && e.ActiveVersion != e.InactiveVersion {
|
||||||
|
appendFW(e.ID+" Active Slot", e.ActiveVersion)
|
||||||
|
appendFW(e.ID+" Inactive Slot", e.InactiveVersion)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,6 +165,123 @@ func parseHGXGPUAssembly(content []byte) map[int]hgxGPUAssemblyInfo {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseHGXGPUFirmware(content []byte) map[int]hgxGPUFirmwareInfo {
|
||||||
|
result := make(map[int]hgxGPUFirmwareInfo)
|
||||||
|
|
||||||
|
matchesFW := reHGXFWBlock.FindAllSubmatch(content, -1)
|
||||||
|
for _, m := range matchesFW {
|
||||||
|
if len(m) != 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sxmIdx, err := strconv.Atoi(string(m[1]))
|
||||||
|
if err != nil || sxmIdx <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
version := strings.TrimSpace(string(m[2]))
|
||||||
|
if version == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
current := result[sxmIdx]
|
||||||
|
if current.Firmware == "" {
|
||||||
|
current.Firmware = version
|
||||||
|
}
|
||||||
|
result[sxmIdx] = current
|
||||||
|
}
|
||||||
|
|
||||||
|
matchesInfoROM := reHGXInfoROM.FindAllSubmatch(content, -1)
|
||||||
|
for _, m := range matchesInfoROM {
|
||||||
|
if len(m) != 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sxmIdx, err := strconv.Atoi(string(m[1]))
|
||||||
|
if err != nil || sxmIdx <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
version := strings.TrimSpace(string(m[2]))
|
||||||
|
if version == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
current := result[sxmIdx]
|
||||||
|
if current.InfoROM == "" {
|
||||||
|
current.InfoROM = version
|
||||||
|
}
|
||||||
|
result[sxmIdx] = current
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseHGXFirmwareInventory(content []byte) []hgxFirmwareInventoryEntry {
|
||||||
|
lines := strings.Split(string(content), "\n")
|
||||||
|
result := make([]hgxFirmwareInventoryEntry, 0)
|
||||||
|
|
||||||
|
var current *hgxFirmwareInventoryEntry
|
||||||
|
section := ""
|
||||||
|
|
||||||
|
flush := func() {
|
||||||
|
if current == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if current.Version == "" && current.ActiveVersion == "" && current.InactiveVersion == "" {
|
||||||
|
current = nil
|
||||||
|
section = ""
|
||||||
|
return
|
||||||
|
}
|
||||||
|
result = append(result, *current)
|
||||||
|
current = nil
|
||||||
|
section = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, line := range lines {
|
||||||
|
if m := reIDLine.FindStringSubmatch(line); len(m) > 1 {
|
||||||
|
flush()
|
||||||
|
id := strings.TrimSpace(m[1])
|
||||||
|
if strings.HasPrefix(id, "HGX_") {
|
||||||
|
current = &hgxFirmwareInventoryEntry{ID: id}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if current == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(line, `"ActiveFirmwareSlot"`) {
|
||||||
|
section = "active"
|
||||||
|
}
|
||||||
|
if strings.Contains(line, `"InactiveFirmwareSlot"`) {
|
||||||
|
section = "inactive"
|
||||||
|
}
|
||||||
|
|
||||||
|
if m := reVersion.FindStringSubmatch(line); len(m) > 1 {
|
||||||
|
version := strings.TrimSpace(m[1])
|
||||||
|
if version == "" {
|
||||||
|
section = ""
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch section {
|
||||||
|
case "active":
|
||||||
|
if current.ActiveVersion == "" {
|
||||||
|
current.ActiveVersion = version
|
||||||
|
}
|
||||||
|
case "inactive":
|
||||||
|
if current.InactiveVersion == "" {
|
||||||
|
current.InactiveVersion = version
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// Keep top-level version from the last seen plain "Version" in current entry.
|
||||||
|
current.Version = version
|
||||||
|
}
|
||||||
|
section = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flush()
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
func extractLogicalGPUIndex(slot string) (int, bool) {
|
func extractLogicalGPUIndex(slot string) (int, bool) {
|
||||||
m := reSlotGPU.FindStringSubmatch(slot)
|
m := reSlotGPU.FindStringSubmatch(slot)
|
||||||
if len(m) < 2 {
|
if len(m) < 2 {
|
||||||
|
|||||||
1
internal/parser/vendors/inspur/parser.go
vendored
1
internal/parser/vendors/inspur/parser.go
vendored
@@ -161,6 +161,7 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
|||||||
// Enrich GPU inventory from HGX Redfish snapshot (serial/model/part mapping).
|
// Enrich GPU inventory from HGX Redfish snapshot (serial/model/part mapping).
|
||||||
if f := parser.FindFileByName(files, "HGX_HWInfo_FWVersion.log"); f != nil && result.Hardware != nil {
|
if f := parser.FindFileByName(files, "HGX_HWInfo_FWVersion.log"); f != nil && result.Hardware != nil {
|
||||||
enrichGPUsFromHGXHWInfo(f.Content, result.Hardware)
|
enrichGPUsFromHGXHWInfo(f.Content, result.Hardware)
|
||||||
|
appendHGXFirmwareFromHWInfo(f.Content, result.Hardware)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark problematic GPUs from IDL errors like "BIOS miss F_GPU6".
|
// Mark problematic GPUs from IDL errors like "BIOS miss F_GPU6".
|
||||||
|
|||||||
Reference in New Issue
Block a user