Compare commits
107 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
96e65d8f65 | ||
|
|
30409eef67 | ||
|
|
65e65968cf | ||
|
|
380c199705 | ||
|
|
d650a6ba1c | ||
|
|
d8d3d8c524 | ||
|
|
057a222288 | ||
|
|
f11a43f690 | ||
|
|
476630190d | ||
|
|
9007f1b360 | ||
|
|
0acdc2b202 | ||
|
|
47bb0ee939 | ||
|
|
5815100e2f | ||
|
|
1eb639e6bf | ||
|
|
a9f58b3cf4 | ||
|
|
d8ffe3d3a5 | ||
|
|
9df29b1be9 | ||
|
|
62d6ad6f66 | ||
|
|
f09344e288 | ||
| 19d857b459 | |||
| 8d80048117 | |||
| 21ea129933 | |||
| 9c5512d238 | |||
| 206496efae | |||
| 7d1a02cb72 | |||
| 070971685f | |||
| 78806f9fa0 | |||
| 4940cd9645 | |||
| 736b77f055 | |||
| 0252264ddc | |||
| 25e3b8bb42 | |||
| bb4505a249 | |||
| 2fa4a1235a | |||
| fe5da1dbd7 | |||
| 612058ed16 | |||
| e0146adfff | |||
| 9a30705c9a | |||
| 8dbbec3610 | |||
| 4c60ebbf1d | |||
| c52fea2fec | |||
| dae4744eb3 | |||
| b6ff47fea8 | |||
| 1d282c4196 | |||
| f35cabac48 | |||
| a2c9e9a57f | |||
| b918363252 | |||
| 6c19a58b24 | |||
| 9aadf2f1e9 | |||
|
|
ddab93a5ee | ||
|
|
000199fbdc | ||
|
|
68592da9f5 | ||
|
|
b1dde592ae | ||
|
|
693b7346ab | ||
|
|
a4a1a19a94 | ||
|
|
66fb90233f | ||
|
|
7a1285db99 | ||
|
|
144d298efa | ||
|
|
a6c90b6e77 | ||
|
|
2e348751f3 | ||
|
|
15dc86a0e4 | ||
|
|
752b063613 | ||
|
|
6f66a8b2a1 | ||
|
|
ce30f943df | ||
|
|
810c4b5ff9 | ||
|
|
5d9e9d73de | ||
| 38cc051f23 | |||
|
|
fcd57c1ba9 | ||
|
|
82ee513835 | ||
| de5521a4e5 | |||
| a82b55b144 | |||
| 758fa66282 | |||
| b33cca5fcc | |||
| 514da76ddb | |||
| c13788132b | |||
| 5e49adaf05 | |||
| c7b2a7ab29 | |||
| 0af3cee9b6 | |||
| 8715fcace4 | |||
| 1b1bc74fc7 | |||
| 77e25ddc02 | |||
| bcce975fd6 | |||
| 8b065c6cca | |||
| aa22034944 | |||
|
|
7d9135dc63 | ||
|
|
80e726d756 | ||
| 92134a6cc1 | |||
| ae588ae75a | |||
| b64a8d8709 | |||
|
|
f9230e12f3 | ||
|
|
bb48b03677 | ||
|
|
c89ee0118f | ||
|
|
60c52b18b1 | ||
|
|
f6a10d4eac | ||
|
|
53849032fe | ||
|
|
c54abf11b7 | ||
|
|
596eda709c | ||
|
|
d38d0c9d30 | ||
|
|
aa3c82d9ba | ||
|
|
5a982d7ca8 | ||
|
|
601e21f184 | ||
|
|
c8772d97ed | ||
|
|
8e99c36888 | ||
| 241e4e3605 | |||
| eeed509b43 | |||
|
|
70cd541d9e | ||
|
|
21f4e5a67e | ||
| c7422e95aa |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -62,3 +62,10 @@ go.work.sum
|
||||
|
||||
# Distribution binaries
|
||||
dist/
|
||||
|
||||
# Release artifacts
|
||||
release/
|
||||
releases/
|
||||
releases/**/SHA256SUMS.txt
|
||||
releases/**/*.tar.gz
|
||||
releases/**/*.zip
|
||||
|
||||
9
.gitmodules
vendored
Normal file
9
.gitmodules
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
[submodule "third_party/pciids"]
|
||||
path = third_party/pciids
|
||||
url = https://github.com/pciutils/pciids.git
|
||||
[submodule "bible"]
|
||||
path = bible
|
||||
url = https://git.mchus.pro/mchus/bible.git
|
||||
[submodule "internal/chart"]
|
||||
path = internal/chart
|
||||
url = https://git.mchus.pro/reanimator/chart.git
|
||||
11
AGENTS.md
Normal file
11
AGENTS.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# LOGPile — Instructions for Codex
|
||||
|
||||
## Shared Engineering Rules
|
||||
Read `bible/` — shared rules for all projects (CSV, logging, DB, tables, background tasks, code style).
|
||||
Start with `bible/rules/patterns/` for specific contracts.
|
||||
|
||||
## Project Architecture
|
||||
Read `bible-local/` — LOGPile specific architecture.
|
||||
Read order: `bible-local/README.md` → `01-overview.md` → `02-architecture.md` → `04-data-models.md` → relevant file(s) for the task.
|
||||
|
||||
Every architectural decision specific to this project must be recorded in `bible-local/10-decisions.md`.
|
||||
198
CLAUDE.md
198
CLAUDE.md
@@ -1,193 +1,11 @@
|
||||
# BMC Analyzer - Инструкции для Claude Code
|
||||
# LOGPile — Instructions for Claude
|
||||
|
||||
## Описание проекта
|
||||
## Shared Engineering Rules
|
||||
Read `bible/` — shared rules for all projects (CSV, logging, DB, tables, background tasks, code style).
|
||||
Start with `bible/rules/patterns/` for specific contracts.
|
||||
|
||||
Приложение для анализа диагностической информации с BMC серверов (IPMI).
|
||||
Представляет собой standalone Go-бинарник со встроенным веб-интерфейсом.
|
||||
## Project Architecture
|
||||
Read `bible-local/` — LOGPile specific architecture.
|
||||
Read order: `bible-local/README.md` → `01-overview.md` → `02-architecture.md` → `04-data-models.md` → relevant file(s) for the task.
|
||||
|
||||
### Функциональность
|
||||
|
||||
**Входные данные:**
|
||||
- Архив (tar.gz/zip) с диагностическими данными IPMI сервера
|
||||
|
||||
**Обработка:**
|
||||
- Парсинг System Event Log (SEL) - журнал событий IPMI
|
||||
- Парсинг FRU (Field Replaceable Unit) - серийные номера компонентов
|
||||
- Парсинг конфигурации сервера (CPU, RAM, диски, и т.д.)
|
||||
|
||||
**Выходные данные:**
|
||||
- Веб-интерфейс с человекочитаемой информацией
|
||||
- Экспорт логов в TXT/JSON
|
||||
- Экспорт конфигурации в JSON
|
||||
- Экспорт серийных номеров в CSV
|
||||
|
||||
## Архитектура
|
||||
|
||||
- **Тип:** Standalone бинарник с embedded веб-сервером
|
||||
- **Язык:** Go
|
||||
- **UI:** Embedded HTML + CSS + Vanilla JS (или Alpine.js)
|
||||
- **Порт:** localhost:8080 (по умолчанию)
|
||||
|
||||
## Структура проекта
|
||||
|
||||
```
|
||||
bmc-analyzer/
|
||||
├── cmd/bmc-analyzer/main.go # Точка входа
|
||||
├── internal/
|
||||
│ ├── parser/ # Парсинг архивов и IPMI данных
|
||||
│ ├── models/ # Модели данных
|
||||
│ ├── analyzer/ # Логика анализа
|
||||
│ ├── exporter/ # Экспорт данных
|
||||
│ └── server/ # HTTP сервер и handlers
|
||||
├── web/ # Embedded веб-интерфейс
|
||||
│ ├── static/ # CSS, JS, изображения
|
||||
│ └── templates/ # HTML шаблоны
|
||||
├── testdata/ # Примеры архивов для тестов
|
||||
├── go.mod
|
||||
├── Makefile
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## Технический стек
|
||||
|
||||
### Backend
|
||||
- Go 1.21+
|
||||
- Стандартная библиотека (net/http, archive/tar, compress/gzip)
|
||||
- embed для встраивания веб-ресурсов
|
||||
- Возможно: fiber или gin для роутинга (на ваше усмотрение)
|
||||
|
||||
### Frontend
|
||||
- Vanilla JavaScript или Alpine.js (минимализм)
|
||||
- CSS (можно Tailwind CSS через CDN)
|
||||
- Без сборщиков - всё embedded в бинарник
|
||||
|
||||
### Парсинг IPMI
|
||||
- SEL формат: обычно текстовый вывод `ipmitool sel list` или бинарный
|
||||
- FRU формат: вывод `ipmitool fru print`
|
||||
- Конфигурация: различные текстовые файлы из архива
|
||||
|
||||
## Этапы разработки
|
||||
|
||||
### 1. Базовая структура ✓
|
||||
- [x] Создана структура директорий
|
||||
- [ ] go.mod инициализирован
|
||||
- [ ] Makefile создан
|
||||
|
||||
### 2. Парсер архивов
|
||||
- [ ] Распаковка tar.gz
|
||||
- [ ] Распаковка zip
|
||||
- [ ] Определение типов файлов внутри архива
|
||||
|
||||
### 3. Парсеры IPMI данных
|
||||
- [ ] SEL parser (System Event Log)
|
||||
- [ ] FRU parser (серийные номера)
|
||||
- [ ] Config parser (конфигурация сервера)
|
||||
|
||||
### 4. Модели данных
|
||||
- [ ] Event (события из SEL)
|
||||
- [ ] Hardware (конфигурация)
|
||||
- [ ] SerialNumber (серийники компонентов)
|
||||
|
||||
### 5. Веб-сервер
|
||||
- [ ] HTTP сервер с embedded файлами
|
||||
- [ ] Upload handler для архивов
|
||||
- [ ] API endpoints для получения данных
|
||||
- [ ] Handlers для экспорта
|
||||
|
||||
### 6. Веб-интерфейс
|
||||
- [ ] Главная страница с upload формой
|
||||
- [ ] Отображение событий (timeline/таблица)
|
||||
- [ ] Отображение конфигурации
|
||||
- [ ] Таблица серийных номеров
|
||||
- [ ] Кнопки экспорта
|
||||
|
||||
### 7. Экспортеры
|
||||
- [ ] CSV экспорт (серийники)
|
||||
- [ ] JSON экспорт (конфиг, события)
|
||||
- [ ] TXT отчет (логи)
|
||||
|
||||
### 8. Тестирование и сборка
|
||||
- [ ] Unit тесты для парсеров
|
||||
- [ ] Интеграционные тесты
|
||||
- [ ] Cross-platform сборка (Linux, Windows, Mac)
|
||||
|
||||
## Примеры использования
|
||||
|
||||
```bash
|
||||
# Простой запуск
|
||||
./bmc-analyzer
|
||||
|
||||
# С указанием порта
|
||||
./bmc-analyzer --port 9000
|
||||
|
||||
# С предзагрузкой файла
|
||||
./bmc-analyzer --file /path/to/bmc-archive.tar.gz
|
||||
|
||||
# Кросс-компиляция
|
||||
make build-all
|
||||
```
|
||||
|
||||
## Формат данных IPMI
|
||||
|
||||
### SEL (System Event Log)
|
||||
```
|
||||
SEL Record ID : 0001
|
||||
Record Type : 02
|
||||
Timestamp : 01/15/2025 14:23:45
|
||||
Generator ID : 0020
|
||||
EvM Revision : 04
|
||||
Sensor Type : Temperature
|
||||
Sensor Number : 01
|
||||
Event Type : Threshold
|
||||
Event Direction : Assertion Event
|
||||
Event Data : 010000
|
||||
Description : Upper Critical - going high
|
||||
```
|
||||
|
||||
### FRU (Field Replaceable Unit)
|
||||
```
|
||||
FRU Device Description : Builtin FRU Device (ID 0)
|
||||
Board Mfg Date : Mon Jan 1 00:00:00 1996
|
||||
Board Mfg : Supermicro
|
||||
Board Product : X11DPH-T
|
||||
Board Serial : WM194S001234
|
||||
Board Part Number : X11DPH-TQ
|
||||
```
|
||||
|
||||
## API Endpoints (планируемые)
|
||||
|
||||
```
|
||||
POST /api/upload # Загрузить архив
|
||||
GET /api/events # Получить список событий
|
||||
GET /api/config # Получить конфигурацию
|
||||
GET /api/serials # Получить серийные номера
|
||||
GET /api/export/csv # Экспорт в CSV
|
||||
GET /api/export/json # Экспорт в JSON
|
||||
GET /api/export/txt # Экспорт текстового отчета
|
||||
DELETE /api/clear # Очистить загруженные данные
|
||||
```
|
||||
|
||||
## Следующие шаги
|
||||
|
||||
1. Инициализировать Go модуль
|
||||
2. Создать базовую структуру пакетов
|
||||
3. Реализовать парсер архивов (tar.gz)
|
||||
4. Создать простой HTTP сервер с upload формой
|
||||
5. Реализовать парсинг SEL логов
|
||||
6. Добавить веб-интерфейс для отображения данных
|
||||
|
||||
## Примечания
|
||||
|
||||
- Все файлы веб-интерфейса должны быть embedded в бинарник через `//go:embed`
|
||||
- Приоритет на простоту и минимум зависимостей
|
||||
- Безопасность: валидация загружаемых архивов (размер, типы файлов)
|
||||
- UI должен быть простым и функциональным, не перегруженным
|
||||
- Поддержка русского языка в интерфейсе
|
||||
|
||||
## Вопросы для уточнения
|
||||
|
||||
1. Какие конкретно производители BMC используются? (Supermicro, Dell iDRAC, HP iLO, etc.)
|
||||
2. Есть ли примеры реальных архивов для тестирования?
|
||||
3. Нужна ли поддержка разных форматов SEL (текстовый vs бинарный)?
|
||||
4. Какие метрики/события наиболее важны для анализа?
|
||||
5. Нужна ли фильтрация событий по severity (Critical, Warning, Info)?
|
||||
Every architectural decision specific to this project must be recorded in `bible-local/10-decisions.md`.
|
||||
|
||||
19
Makefile
19
Makefile
@@ -1,4 +1,4 @@
|
||||
.PHONY: build run clean test build-all
|
||||
.PHONY: build run clean test build-all update-pci-ids
|
||||
|
||||
BINARY_NAME=logpile
|
||||
VERSION=$(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
|
||||
@@ -6,7 +6,8 @@ COMMIT=$(shell git rev-parse --short HEAD 2>/dev/null || echo "none")
|
||||
LDFLAGS=-ldflags "-X main.version=$(VERSION) -X main.commit=$(COMMIT)"
|
||||
|
||||
build:
|
||||
go build $(LDFLAGS) -o bin/$(BINARY_NAME) ./cmd/logpile
|
||||
@if [ "$(SKIP_PCI_IDS_UPDATE)" != "1" ]; then ./scripts/update-pci-ids.sh --best-effort; fi
|
||||
CGO_ENABLED=0 go build $(LDFLAGS) -o bin/$(BINARY_NAME) ./cmd/logpile
|
||||
|
||||
run: build
|
||||
./bin/$(BINARY_NAME)
|
||||
@@ -19,11 +20,12 @@ test:
|
||||
|
||||
# Cross-platform builds
|
||||
build-all: clean
|
||||
GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-amd64 ./cmd/logpile
|
||||
GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-arm64 ./cmd/logpile
|
||||
GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-amd64 ./cmd/logpile
|
||||
GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-arm64 ./cmd/logpile
|
||||
GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-amd64.exe ./cmd/logpile
|
||||
@if [ "$(SKIP_PCI_IDS_UPDATE)" != "1" ]; then ./scripts/update-pci-ids.sh --best-effort; fi
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-amd64 ./cmd/logpile
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-linux-arm64 ./cmd/logpile
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-amd64 ./cmd/logpile
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-darwin-arm64 ./cmd/logpile
|
||||
CGO_ENABLED=0 GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o bin/$(BINARY_NAME)-windows-amd64.exe ./cmd/logpile
|
||||
|
||||
dev:
|
||||
go run ./cmd/logpile
|
||||
@@ -33,3 +35,6 @@ fmt:
|
||||
|
||||
lint:
|
||||
golangci-lint run
|
||||
|
||||
update-pci-ids:
|
||||
./scripts/update-pci-ids.sh --sync-submodule
|
||||
|
||||
33
README.md
33
README.md
@@ -1,18 +1,29 @@
|
||||
# logpile
|
||||
# LOGPile
|
||||
|
||||
BMC Log analyzer
|
||||
Standalone Go application for BMC diagnostics analysis with an embedded web UI.
|
||||
|
||||
## Запуск из исходников
|
||||
## What it does
|
||||
|
||||
- Parses vendor diagnostic archives into a normalized hardware inventory
|
||||
- Collects live BMC data via Redfish
|
||||
- Exports normalized data as CSV, raw re-analysis bundles, and Reanimator JSON
|
||||
- Runs as a single Go binary with embedded UI assets
|
||||
|
||||
## Documentation
|
||||
|
||||
- Shared engineering rules: [`bible/README.md`](bible/README.md)
|
||||
- Project architecture and API contracts: [`bible-local/README.md`](bible-local/README.md)
|
||||
- Agent entrypoints: [`AGENTS.md`](AGENTS.md), [`CLAUDE.md`](CLAUDE.md)
|
||||
|
||||
## Run
|
||||
|
||||
```bash
|
||||
# Сборка
|
||||
make build
|
||||
|
||||
# Запуск веб-сервера
|
||||
./bin/logpile serve
|
||||
|
||||
# Открыть в браузере
|
||||
open http://localhost:8080
|
||||
./bin/logpile
|
||||
```
|
||||
|
||||
Требования: Go 1.22+
|
||||
Default port: `8082`
|
||||
|
||||
## License
|
||||
|
||||
MIT (see `LICENSE`)
|
||||
|
||||
1
bible
Submodule
1
bible
Submodule
Submodule bible added at 0c829182a1
43
bible-local/01-overview.md
Normal file
43
bible-local/01-overview.md
Normal file
@@ -0,0 +1,43 @@
|
||||
# 01 — Overview
|
||||
|
||||
## Purpose
|
||||
|
||||
LOGPile is a standalone Go application for BMC diagnostics analysis with an embedded web UI.
|
||||
It runs as a single binary and normalizes hardware data from archives or live Redfish collection.
|
||||
|
||||
## Operating modes
|
||||
|
||||
| Mode | Entry point | Outcome |
|
||||
|------|-------------|---------|
|
||||
| Archive upload | `POST /api/upload` | Parse a supported archive, raw export bundle, or JSON snapshot into `AnalysisResult` |
|
||||
| Live collection | `POST /api/collect` | Collect from a live BMC via Redfish and store the result in memory |
|
||||
| Batch convert | `POST /api/convert` | Convert multiple supported input files into Reanimator JSON in a ZIP artifact |
|
||||
|
||||
All modes converge on the same normalized hardware model and exporter pipeline.
|
||||
|
||||
## In scope
|
||||
|
||||
- Single-binary desktop/server utility with embedded UI
|
||||
- Vendor archive parsing and live Redfish collection
|
||||
- Canonical hardware inventory across UI and exports
|
||||
- Reopenable raw export bundles for future re-analysis
|
||||
- Reanimator export and batch conversion workflows
|
||||
- Embedded `pci.ids` lookup for vendor/device name enrichment
|
||||
|
||||
## Current vendor coverage
|
||||
|
||||
- Dell TSR
|
||||
- H3C SDS G5/G6
|
||||
- Inspur / Kaytus
|
||||
- NVIDIA HGX Field Diagnostics
|
||||
- NVIDIA Bug Report
|
||||
- Unraid
|
||||
- XigmaNAS
|
||||
- Generic fallback parser
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Persistent storage or multi-user state
|
||||
- Production IPMI collection
|
||||
- Authentication/authorization on the built-in HTTP server
|
||||
- Long-term server-side job history beyond in-memory process lifetime
|
||||
98
bible-local/02-architecture.md
Normal file
98
bible-local/02-architecture.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# 02 — Architecture
|
||||
|
||||
## Runtime stack
|
||||
|
||||
| Layer | Implementation |
|
||||
|-------|----------------|
|
||||
| Language | Go 1.22+ |
|
||||
| HTTP | `net/http` + `http.ServeMux` |
|
||||
| UI | Embedded templates and static assets via `go:embed` |
|
||||
| State | In-memory only |
|
||||
| Build | `CGO_ENABLED=0`, single binary |
|
||||
|
||||
Default port: `8082`
|
||||
|
||||
Audit result rendering is delegated to embedded `reanimator/chart`, vendored as git submodule `internal/chart`.
|
||||
LOGPile remains responsible for upload, collection, parsing, normalization, and Reanimator export generation.
|
||||
|
||||
## Code map
|
||||
|
||||
```text
|
||||
cmd/logpile/main.go entrypoint and CLI flags
|
||||
internal/server/ HTTP handlers, jobs, upload/export flows
|
||||
internal/ingest/ source-family orchestration for upload and raw replay
|
||||
internal/collector/ live collection and Redfish replay
|
||||
internal/analyzer/ shared analysis helpers
|
||||
internal/parser/ archive extraction and parser dispatch
|
||||
internal/exporter/ CSV and Reanimator conversion
|
||||
internal/chart/ vendored `reanimator/chart` viewer submodule
|
||||
internal/models/ stable data contracts
|
||||
web/ embedded UI assets
|
||||
```
|
||||
|
||||
## Server state
|
||||
|
||||
`internal/server.Server` stores:
|
||||
|
||||
| Field | Purpose |
|
||||
|------|---------|
|
||||
| `result` | Current `AnalysisResult` shown in UI and used by exports |
|
||||
| `detectedVendor` | Parser/collector identity for the current dataset |
|
||||
| `rawExport` | Reopenable raw-export package associated with current result |
|
||||
| `jobManager` | Shared async job state for collect and convert flows |
|
||||
| `collectors` | Registered live collectors (`redfish`, `ipmi`) |
|
||||
| `convertOutput` | Temporary ZIP artifacts for batch convert downloads |
|
||||
|
||||
State is replaced only on successful upload or successful live collection.
|
||||
Failed or canceled jobs do not overwrite the previous dataset.
|
||||
|
||||
## Main flows
|
||||
|
||||
### Upload
|
||||
|
||||
1. `POST /api/upload` receives multipart field `archive`
|
||||
2. `internal/ingest.Service` resolves the source family
|
||||
3. JSON inputs are checked for raw-export package or `AnalysisResult` snapshot
|
||||
4. Non-JSON archives go through the archive parser family
|
||||
5. Archive metadata is normalized onto `AnalysisResult`
|
||||
6. Result becomes the current in-memory dataset
|
||||
|
||||
### Live collect
|
||||
|
||||
1. `POST /api/collect` validates request fields
|
||||
2. Server creates an async job and returns `202 Accepted`
|
||||
3. Selected collector gathers raw data
|
||||
4. For Redfish, collector runs minimal discovery, matches Redfish profiles, and builds an acquisition plan
|
||||
5. Collector applies profile tuning hints (for example crawl breadth, prefetch, bounded plan-B passes)
|
||||
6. Collector saves `raw_payloads.redfish_tree` plus acquisition diagnostics
|
||||
7. Result is normalized, source metadata applied, and state replaced on success
|
||||
|
||||
### Batch convert
|
||||
|
||||
1. `POST /api/convert` accepts multiple files
|
||||
2. Each supported file is analyzed independently
|
||||
3. Successful results are converted to Reanimator JSON
|
||||
4. Outputs are packaged into a temporary ZIP artifact
|
||||
5. Client polls job status and downloads the artifact when ready
|
||||
|
||||
## Redfish design rule
|
||||
|
||||
Live Redfish collection and offline Redfish re-analysis must use the same replay path.
|
||||
The collector first captures `raw_payloads.redfish_tree`, then the replay logic builds the normalized result.
|
||||
|
||||
Redfish is being split into two coordinated phases:
|
||||
- acquisition: profile-driven snapshot collection strategy
|
||||
- analysis: replay over the saved snapshot with the same profile framework
|
||||
|
||||
## PCI IDs lookup
|
||||
|
||||
Lookup order:
|
||||
|
||||
1. Embedded `internal/parser/vendors/pciids/pci.ids`
|
||||
2. `./pci.ids`
|
||||
3. `/usr/share/hwdata/pci.ids`
|
||||
4. `/usr/share/misc/pci.ids`
|
||||
5. `/opt/homebrew/share/pciids/pci.ids`
|
||||
6. Extra paths from `LOGPILE_PCI_IDS_PATH`
|
||||
|
||||
Later sources override earlier ones for the same IDs.
|
||||
199
bible-local/03-api.md
Normal file
199
bible-local/03-api.md
Normal file
@@ -0,0 +1,199 @@
|
||||
# 03 — API Reference
|
||||
|
||||
## Conventions
|
||||
|
||||
- All endpoints are under `/api/`
|
||||
- JSON responses are used unless the endpoint downloads a file
|
||||
- Async jobs share the same status model: `queued`, `running`, `success`, `failed`, `canceled`
|
||||
- Export filenames use `YYYY-MM-DD (MODEL) - SERIAL.<ext>` when board metadata exists
|
||||
- Embedded chart viewer routes live under `/chart/` and return HTML/CSS, not JSON
|
||||
|
||||
## Input endpoints
|
||||
|
||||
### `POST /api/upload`
|
||||
|
||||
Uploads one file in multipart field `archive`.
|
||||
|
||||
Accepted inputs:
|
||||
- supported archive/log formats from the parser registry
|
||||
- `.json` `AnalysisResult` snapshots
|
||||
- raw-export JSON packages
|
||||
- raw-export ZIP bundles
|
||||
|
||||
Result:
|
||||
- parses or replays the input
|
||||
- stores the result as current in-memory state
|
||||
- returns parsed summary JSON
|
||||
|
||||
Related helper:
|
||||
- `GET /api/file-types` returns `archive_extensions`, `upload_extensions`, and `convert_extensions`
|
||||
|
||||
### `POST /api/collect`
|
||||
|
||||
Starts a live collection job.
|
||||
|
||||
Request body:
|
||||
|
||||
```json
|
||||
{
|
||||
"host": "bmc01.example.local",
|
||||
"protocol": "redfish",
|
||||
"port": 443,
|
||||
"username": "admin",
|
||||
"auth_type": "password",
|
||||
"password": "secret",
|
||||
"tls_mode": "insecure"
|
||||
}
|
||||
```
|
||||
|
||||
Supported values:
|
||||
- `protocol`: `redfish` or `ipmi`
|
||||
- `auth_type`: `password` or `token`
|
||||
- `tls_mode`: `strict` or `insecure`
|
||||
|
||||
Responses:
|
||||
- `202` on accepted job creation
|
||||
- `400` on malformed JSON
|
||||
- `422` on validation errors
|
||||
|
||||
Optional request field:
|
||||
- `power_on_if_host_off`: when `true`, Redfish collection may power on the host before collection if preflight found it powered off
|
||||
|
||||
### `POST /api/collect/probe`
|
||||
|
||||
Checks that live API connectivity works and returns host power state before collection starts.
|
||||
|
||||
Typical request body is the same as `POST /api/collect`.
|
||||
|
||||
Typical response fields:
|
||||
- `reachable`
|
||||
- `protocol`
|
||||
- `host_power_state`
|
||||
- `host_powered_on`
|
||||
- `power_control_available`
|
||||
- `message`
|
||||
|
||||
### `GET /api/collect/{id}`
|
||||
|
||||
Returns async collection job status, progress, timestamps, and accumulated logs.
|
||||
|
||||
### `POST /api/collect/{id}/cancel`
|
||||
|
||||
Requests cancellation for a running collection job.
|
||||
|
||||
### `POST /api/convert`
|
||||
|
||||
Starts a batch conversion job that accepts multiple files under `files[]` or `files`.
|
||||
Each supported file is parsed independently and converted to Reanimator JSON.
|
||||
|
||||
Response fields:
|
||||
- `job_id`
|
||||
- `status`
|
||||
- `accepted`
|
||||
- `skipped`
|
||||
- `total_files`
|
||||
|
||||
### `GET /api/convert/{id}`
|
||||
|
||||
Returns batch convert job status using the same async job envelope as collection.
|
||||
|
||||
### `GET /api/convert/{id}/download`
|
||||
|
||||
Downloads the ZIP artifact produced by a successful convert job.
|
||||
|
||||
## Read endpoints
|
||||
|
||||
### `GET /api/status`
|
||||
|
||||
Returns source metadata for the current dataset.
|
||||
If nothing is loaded, response is `{ "loaded": false }`.
|
||||
|
||||
Typical fields:
|
||||
- `loaded`
|
||||
- `filename`
|
||||
- `vendor`
|
||||
- `source_type`
|
||||
- `protocol`
|
||||
- `target_host`
|
||||
- `source_timezone`
|
||||
- `collected_at`
|
||||
- `stats`
|
||||
|
||||
### `GET /api/config`
|
||||
|
||||
Returns the main UI configuration payload, including:
|
||||
- source metadata
|
||||
- `hardware.board`
|
||||
- `hardware.firmware`
|
||||
- canonical `hardware.devices`
|
||||
- computed specification lines
|
||||
|
||||
### `GET /api/events`
|
||||
|
||||
Returns events sorted newest first.
|
||||
|
||||
### `GET /api/sensors`
|
||||
|
||||
Returns parsed sensors plus synthesized PSU voltage sensors when telemetry is available.
|
||||
|
||||
### `GET /api/serials`
|
||||
|
||||
Returns serial-oriented inventory built from canonical devices.
|
||||
|
||||
### `GET /api/firmware`
|
||||
|
||||
Returns firmware-oriented inventory built from canonical devices.
|
||||
|
||||
### `GET /api/parse-errors`
|
||||
|
||||
Returns normalized parse and collection issues combined from:
|
||||
- Redfish fetch errors in `raw_payloads`
|
||||
- raw-export collect logs
|
||||
- derived partial-inventory warnings
|
||||
|
||||
### `GET /api/parsers`
|
||||
|
||||
Returns registered parser metadata.
|
||||
|
||||
### `GET /api/file-types`
|
||||
|
||||
Returns supported file extensions for upload and batch convert.
|
||||
|
||||
## Viewer endpoints
|
||||
|
||||
### `GET /chart/current`
|
||||
|
||||
Renders the current in-memory dataset as Reanimator HTML using embedded `reanimator/chart`.
|
||||
The server first converts the current result to Reanimator JSON, then passes that snapshot to the viewer.
|
||||
|
||||
### `GET /chart/static/...`
|
||||
|
||||
Serves embedded `reanimator/chart` static assets.
|
||||
|
||||
## Export endpoints
|
||||
|
||||
### `GET /api/export/csv`
|
||||
|
||||
Downloads serial-number CSV.
|
||||
|
||||
### `GET /api/export/json`
|
||||
|
||||
Downloads a raw-export artifact for reopen and re-analysis.
|
||||
Current implementation emits a ZIP bundle containing:
|
||||
- `raw_export.json`
|
||||
- `collect.log`
|
||||
- `parser_fields.json`
|
||||
|
||||
### `GET /api/export/reanimator`
|
||||
|
||||
Downloads Reanimator JSON built from the current normalized result.
|
||||
|
||||
## Management endpoints
|
||||
|
||||
### `DELETE /api/clear`
|
||||
|
||||
Clears current in-memory dataset, raw export state, and temporary convert artifacts.
|
||||
|
||||
### `POST /api/shutdown`
|
||||
|
||||
Gracefully shuts down the process after responding.
|
||||
87
bible-local/04-data-models.md
Normal file
87
bible-local/04-data-models.md
Normal file
@@ -0,0 +1,87 @@
|
||||
# 04 — Data Models
|
||||
|
||||
## Core contract: `AnalysisResult`
|
||||
|
||||
`internal/models/models.go` defines the shared result passed between parsers, collectors, server handlers, and exporters.
|
||||
|
||||
Stability rule:
|
||||
- do not rename or remove JSON fields from `AnalysisResult`
|
||||
- additive fields are allowed
|
||||
- UI and exporter compatibility depends on this shape remaining stable
|
||||
|
||||
Key fields:
|
||||
|
||||
| Field | Meaning |
|
||||
|------|---------|
|
||||
| `filename` | Original upload name or synthesized live source name |
|
||||
| `source_type` | `archive` or `api` |
|
||||
| `protocol` | `redfish`, `ipmi`, or empty for archive uploads |
|
||||
| `target_host` | Hostname or IP for live collection |
|
||||
| `source_timezone` | Source timezone/offset if known |
|
||||
| `collected_at` | Canonical collection/upload time |
|
||||
| `raw_payloads` | Raw source data used for replay or diagnostics |
|
||||
| `events` | Parsed event timeline |
|
||||
| `fru` | FRU-derived inventory details |
|
||||
| `sensors` | Sensor readings |
|
||||
| `hardware` | Normalized hardware inventory |
|
||||
|
||||
## `HardwareConfig`
|
||||
|
||||
Main sections:
|
||||
|
||||
```text
|
||||
hardware.board
|
||||
hardware.devices
|
||||
hardware.cpus
|
||||
hardware.memory
|
||||
hardware.storage
|
||||
hardware.volumes
|
||||
hardware.pcie_devices
|
||||
hardware.gpus
|
||||
hardware.network_adapters
|
||||
hardware.network_cards
|
||||
hardware.power_supplies
|
||||
hardware.firmware
|
||||
```
|
||||
|
||||
`network_cards` is legacy/alternate source data.
|
||||
`hardware.devices` is the canonical cross-section inventory.
|
||||
|
||||
## Canonical inventory: `hardware.devices`
|
||||
|
||||
`hardware.devices` is the single source of truth for device-oriented UI and Reanimator export.
|
||||
|
||||
Required rules:
|
||||
|
||||
1. UI hardware views must read from `hardware.devices`
|
||||
2. Reanimator conversion must derive device sections from `hardware.devices`
|
||||
3. UI/export mismatches are bugs, not accepted divergence
|
||||
4. New shared device fields belong in `HardwareDevice` first
|
||||
|
||||
Deduplication priority:
|
||||
|
||||
| Priority | Key |
|
||||
|----------|-----|
|
||||
| 1 | usable `serial_number` |
|
||||
| 2 | `bdf` |
|
||||
| 3 | keep records separate |
|
||||
|
||||
## Raw payloads
|
||||
|
||||
`raw_payloads` is authoritative for replayable sources.
|
||||
|
||||
Current important payloads:
|
||||
- `redfish_tree`
|
||||
- `redfish_fetch_errors`
|
||||
- `source_timezone`
|
||||
|
||||
Normalized hardware fields are derived output, not the long-term source of truth.
|
||||
|
||||
## Raw export package
|
||||
|
||||
`/api/export/json` produces a reopenable raw-export artifact.
|
||||
|
||||
Design rules:
|
||||
- raw source stays authoritative
|
||||
- uploads of raw-export artifacts must re-analyze from raw source
|
||||
- parsed snapshots inside the bundle are diagnostic only
|
||||
154
bible-local/05-collectors.md
Normal file
154
bible-local/05-collectors.md
Normal file
@@ -0,0 +1,154 @@
|
||||
# 05 — Collectors
|
||||
|
||||
Collectors live in `internal/collector/`.
|
||||
|
||||
Core files:
|
||||
- `registry.go` for protocol registration
|
||||
- `redfish.go` for live collection
|
||||
- `redfish_replay.go` for replay from raw payloads
|
||||
- `redfish_replay_gpu.go` for profile-driven GPU replay collectors and GPU fallback helpers
|
||||
- `redfish_replay_storage.go` for profile-driven storage replay collectors and storage recovery helpers
|
||||
- `redfish_replay_inventory.go` for replay inventory collectors (PCIe, NIC, BMC MAC, NIC enrichment)
|
||||
- `redfish_replay_fru.go` for board fallback helpers and Assembly/FRU replay extraction
|
||||
- `redfish_replay_profiles.go` for profile-driven replay helpers and vendor-aware recovery helpers
|
||||
- `redfishprofile/` for Redfish profile matching and acquisition/analysis hooks
|
||||
- `ipmi_mock.go` for the placeholder IPMI implementation
|
||||
- `types.go` for request/progress contracts
|
||||
|
||||
## Redfish collector
|
||||
|
||||
Status: active production path.
|
||||
|
||||
Request fields passed from the server:
|
||||
- `host`
|
||||
- `port`
|
||||
- `username`
|
||||
- `auth_type`
|
||||
- credential field (`password` or token)
|
||||
- `tls_mode`
|
||||
- optional `power_on_if_host_off`
|
||||
|
||||
### Core rule
|
||||
|
||||
Live collection and replay must stay behaviorally aligned.
|
||||
If the collector adds a fallback, probe, or normalization rule, replay must mirror it.
|
||||
|
||||
### Preflight and host power
|
||||
|
||||
- `Probe()` may be used before collection to verify API connectivity and current host `PowerState`
|
||||
- if the host is off and the user chose power-on, the collector may issue `ComputerSystem.Reset`
|
||||
with `ResetType=On`
|
||||
- power-on attempts are bounded and logged
|
||||
- after a successful power-on, the collector waits an extra stabilization window, then checks
|
||||
`PowerState` again and only starts collection if the host is still on
|
||||
- if the collector powered on the host itself for collection, it must attempt to power it back off
|
||||
after collection completes
|
||||
- if the host was already on before collection, the collector must not power it off afterward
|
||||
- if power-on fails, collection still continues against the powered-off host
|
||||
- all power-control decisions and attempts must be visible in the collection log so they are
|
||||
preserved in raw-export bundles
|
||||
|
||||
### Discovery model
|
||||
|
||||
The collector does not rely on one fixed vendor tree.
|
||||
It discovers and follows Redfish resources dynamically from root collections such as:
|
||||
- `Systems`
|
||||
- `Chassis`
|
||||
- `Managers`
|
||||
|
||||
After minimal discovery the collector builds `MatchSignals` and selects a Redfish profile mode:
|
||||
- `matched` when one or more profiles score with high confidence
|
||||
- `fallback` when vendor/platform confidence is low; in this mode the collector aggregates safe additive profile probes to maximize snapshot completeness
|
||||
|
||||
Profile modules may contribute:
|
||||
- primary acquisition seeds
|
||||
- bounded `PlanBPaths` for secondary recovery
|
||||
- critical paths
|
||||
- acquisition notes/diagnostics
|
||||
- tuning hints such as snapshot document cap, prefetch behavior, and expensive post-probe toggles
|
||||
- post-probe policy for numeric collection recovery, direct NVMe `Disk.Bay` recovery, and sensor post-probe enablement
|
||||
- recovery policy for critical collection member retry, slow numeric plan-B probing, and profile-specific plan-B activation
|
||||
- scoped path policy for discovered `Systems/*`, `Chassis/*`, and `Managers/*` branches when a profile needs extra seeds/critical targets beyond the vendor-neutral core set
|
||||
- prefetch policy for which critical paths are eligible for adaptive prefetch and which path shapes are explicitly excluded
|
||||
|
||||
Model- or topology-specific `CriticalPaths` and profile `PlanBPaths` must live in the profile
|
||||
module that owns the behavior. The collector core may execute those paths, but it should not
|
||||
hardcode vendor-specific recovery targets.
|
||||
The same rule applies to expensive post-probe decisions: the collector core may execute bounded
|
||||
post-probe loops, but profiles own whether those loops are enabled for a given platform shape.
|
||||
The same rule applies to critical recovery passes: the collector core may run bounded plan-B
|
||||
loops, but profiles own whether member retry, slow numeric recovery, and profile-specific plan-B
|
||||
passes are enabled.
|
||||
When a profile needs extra discovered-path branches such as storage controller subtrees, it must
|
||||
provide them as scoped suffix policy rather than by hardcoding platform-shaped suffixes into the
|
||||
collector core baseline seed list.
|
||||
The same applies to prefetch shaping: the collector core may execute adaptive prefetch, but
|
||||
profiles own the include/exclude rules for which critical paths should participate.
|
||||
The same applies to critical inventory shaping: the collector core should keep only a minimal
|
||||
vendor-neutral critical baseline, while profiles own additional system/chassis/manager critical
|
||||
suffixes and top-level critical targets.
|
||||
Resolved live acquisition plans should be built inside `redfishprofile/`, not by hand in
|
||||
`redfish.go`. The collector core should receive discovered resources plus the selected profile
|
||||
plan and then execute the resolved seed/critical paths.
|
||||
When profile behavior depends on what discovery actually returned, use a post-discovery
|
||||
refinement hook in `redfishprofile/` instead of hardcoding guessed absolute paths in the static
|
||||
plan. MSI GPU chassis refinement is the reference example.
|
||||
|
||||
Live Redfish collection must expose profile-match diagnostics:
|
||||
- collector logs must include the selected modules and score for every known module
|
||||
- job status responses must carry structured `active_modules` and `module_scores`
|
||||
- the collect page should render active modules as chips from structured status data, not by
|
||||
parsing log lines
|
||||
|
||||
On replay, profile-derived analysis directives may enable vendor-specific inventory linking
|
||||
helpers such as processor-GPU fallback, chassis-ID alias resolution, and bounded storage recovery.
|
||||
Replay should now resolve a structured analysis plan inside `redfishprofile/`, analogous to the
|
||||
live acquisition plan. The replay core may execute collectors against the resolved directives, but
|
||||
snapshot-aware vendor decisions should live in profile analysis hooks, not in `redfish_replay.go`.
|
||||
GPU and storage replay executors should consume the resolved analysis plan directly, not a raw
|
||||
`AnalysisDirectives` struct, so the boundary between planning and execution stays explicit.
|
||||
|
||||
Profile matching and acquisition tuning must be regression-tested against repo-owned compact
|
||||
fixtures under `internal/collector/redfishprofile/testdata/`, derived from representative
|
||||
raw-export snapshots, for at least MSI and Supermicro shapes.
|
||||
When multiple raw-export snapshots exist for the same platform, profile selection must remain
|
||||
stable across those sibling fixtures unless the topology actually changes.
|
||||
Analysis-plan metadata should be stored in replay raw payloads so vendor hook activation is
|
||||
debuggable offline.
|
||||
|
||||
### Stored raw data
|
||||
|
||||
Important raw payloads:
|
||||
- `raw_payloads.redfish_tree`
|
||||
- `raw_payloads.redfish_fetch_errors`
|
||||
- `raw_payloads.redfish_profiles`
|
||||
- `raw_payloads.source_timezone` when available
|
||||
|
||||
### Snapshot crawler rules
|
||||
|
||||
- bounded by `LOGPILE_REDFISH_SNAPSHOT_MAX_DOCS`
|
||||
- prioritized toward high-value inventory paths
|
||||
- tolerant of expected vendor-specific failures
|
||||
- normalizes `@odata.id` values before queueing
|
||||
|
||||
### Redfish implementation guidance
|
||||
|
||||
When changing collection logic:
|
||||
|
||||
1. Prefer profile modules over ad-hoc vendor branches in the collector core
|
||||
2. Keep expensive probing bounded
|
||||
3. Deduplicate by serial, then BDF, then location/model fallbacks
|
||||
4. Preserve replay determinism from saved raw payloads
|
||||
5. Add tests for both the motivating topology and a negative case
|
||||
|
||||
### Known vendor fallbacks
|
||||
|
||||
- empty standard drive collections may trigger bounded `Disk.Bay` probing
|
||||
- `Storage.Links.Enclosures[*]` may be followed to recover physical drives
|
||||
- `PowerSubsystem/PowerSupplies` is preferred over legacy `Power` when available
|
||||
|
||||
## IPMI collector
|
||||
|
||||
Status: mock scaffold only.
|
||||
|
||||
It remains registered for protocol completeness, but it is not a real collection path.
|
||||
149
bible-local/06-parsers.md
Normal file
149
bible-local/06-parsers.md
Normal file
@@ -0,0 +1,149 @@
|
||||
# 06 — Parsers
|
||||
|
||||
## Framework
|
||||
|
||||
Parsers live in `internal/parser/` and vendor implementations live in `internal/parser/vendors/`.
|
||||
|
||||
Core behavior:
|
||||
- registration uses `init()` side effects
|
||||
- all registered parsers run `Detect()`
|
||||
- the highest-confidence parser wins
|
||||
- generic fallback stays last and low-confidence
|
||||
|
||||
`VendorParser` contract:
|
||||
|
||||
```go
|
||||
type VendorParser interface {
|
||||
Name() string
|
||||
Vendor() string
|
||||
Version() string
|
||||
Detect(files []ExtractedFile) int
|
||||
Parse(files []ExtractedFile) (*models.AnalysisResult, error)
|
||||
}
|
||||
```
|
||||
|
||||
## Adding a parser
|
||||
|
||||
1. Create `internal/parser/vendors/<vendor>/`
|
||||
2. Start from `internal/parser/vendors/template/parser.go.template`
|
||||
3. Implement `Detect()` and `Parse()`
|
||||
4. Add a blank import in `internal/parser/vendors/vendors.go`
|
||||
5. Add at least one positive and one negative detection test
|
||||
|
||||
## Data quality rules
|
||||
|
||||
### System firmware only in `hardware.firmware`
|
||||
|
||||
`hardware.firmware` must contain system-level firmware only.
|
||||
Device-bound firmware belongs on the device record and must not be duplicated at the top level.
|
||||
|
||||
### Strip embedded MAC addresses from model names
|
||||
|
||||
If a source embeds ` - XX:XX:XX:XX:XX:XX` in a model/name field, remove that suffix before storing it.
|
||||
|
||||
### Use `pci.ids` for empty or generic PCI model names
|
||||
|
||||
When `vendor_id` and `device_id` are known but the model name is missing or generic, resolve the name via `internal/parser/vendors/pciids`.
|
||||
|
||||
## Active vendor coverage
|
||||
|
||||
| Vendor ID | Input family | Notes |
|
||||
|-----------|--------------|-------|
|
||||
| `dell` | TSR ZIP archives | Broad hardware, firmware, sensors, lifecycle events |
|
||||
| `h3c_g5` | H3C SDS G5 bundles | INI/XML/CSV-driven hardware and event parsing |
|
||||
| `h3c_g6` | H3C SDS G6 bundles | Similar flow with G6-specific files |
|
||||
| `inspur` | onekeylog archives | FRU/SDR plus optional Redis enrichment |
|
||||
| `nvidia` | HGX Field Diagnostics | GPU- and fabric-heavy diagnostic input |
|
||||
| `nvidia_bug_report` | `nvidia-bug-report-*.log.gz` | dmidecode, lspci, NVIDIA driver sections |
|
||||
| `unraid` | Unraid diagnostics/log bundles | Server and storage-focused parsing |
|
||||
| `xigmanas` | XigmaNAS plain logs | FreeBSD/NAS-oriented inventory |
|
||||
| `generic` | fallback | Low-confidence text fallback when nothing else matches |
|
||||
|
||||
## Practical guidance
|
||||
|
||||
- Be conservative with high detect scores
|
||||
- Prefer filling missing fields over overwriting stronger source data
|
||||
- Keep parser version constants current when behavior changes
|
||||
- Any new vendor-specific filtering or dedup logic must ship with tests for that vendor format
|
||||
|
||||
**Archive format:** Unraid diagnostics archive contents (text-heavy diagnostics directories).
|
||||
|
||||
**Detection:** Combines filename/path markers (`diagnostics-*`, `unraid-*.txt`, `vars.txt`)
|
||||
with content markers (e.g. `Unraid kernel build`, parity data markers).
|
||||
|
||||
**Extracted data (current):**
|
||||
- Board / BIOS metadata (from motherboard/system files)
|
||||
- CPU summary (from `lscpu.txt`)
|
||||
- Memory modules (from diagnostics memory file)
|
||||
- Storage devices (from `vars.txt` + SMART files)
|
||||
- Syslog events
|
||||
|
||||
---
|
||||
|
||||
### H3C SDS G5 (`h3c_g5`)
|
||||
|
||||
**Status:** Ready (v1.0.0). Tested on H3C UniServer R4900 G5 SDS archives.
|
||||
|
||||
**Archive format:** `.sds` (tar archive)
|
||||
|
||||
**Detection:** `hardware_info.ini`, `hardware.info`, `firmware_version.ini`, `user/test*.csv`, plus H3C markers.
|
||||
|
||||
**Extracted data (current):**
|
||||
- Board/FRU inventory (`FRUInfo.ini`, `board_info.ini`)
|
||||
- Firmware list (`firmware_version.ini`)
|
||||
- CPU inventory (`hardware_info.ini`)
|
||||
- Memory DIMM inventory (`hardware_info.ini`)
|
||||
- Storage inventory (`hardware.info`, `storage_disk.ini`, `NVMe_info.txt`, RAID text enrichments)
|
||||
- Logical RAID volumes (`raid.json`, `Storage_RAID-*.txt`)
|
||||
- Sensor snapshot (`sensor_info.ini`)
|
||||
- SEL events (`user/test.csv`, `user/test1.csv`, fallback `Sel.json` / `sel_list.txt`)
|
||||
|
||||
---
|
||||
|
||||
### H3C SDS G6 (`h3c_g6`)
|
||||
|
||||
**Status:** Ready (v1.0.0). Tested on H3C UniServer R4700 G6 SDS archives.
|
||||
|
||||
**Archive format:** `.sds` (tar archive)
|
||||
|
||||
**Detection:** `CPUDetailInfo.xml`, `MemoryDetailInfo.xml`, `firmware_version.json`, `Sel.json`, plus H3C markers.
|
||||
|
||||
**Extracted data (current):**
|
||||
- Board/FRU inventory (`FRUInfo.ini`, `board_info.ini`)
|
||||
- Firmware list (`firmware_version.json`)
|
||||
- CPU inventory (`CPUDetailInfo.xml`)
|
||||
- Memory DIMM inventory (`MemoryDetailInfo.xml`)
|
||||
- Storage inventory + capacity/model/interface (`storage_disk.ini`, `Storage_RAID-*.txt`, `NVMe_info.txt`)
|
||||
- Logical RAID volumes (`raid.json`, fallback from `Storage_RAID-*.txt` when available)
|
||||
- Sensor snapshot (`sensor_info.ini`)
|
||||
- SEL events (`user/Sel.json`, fallback `user/sel_list.txt`)
|
||||
|
||||
---
|
||||
|
||||
### Generic text fallback (`generic`)
|
||||
|
||||
**Status:** Ready (v1.0.0).
|
||||
|
||||
**Confidence:** 15 (lowest — only matches if no other parser scores higher)
|
||||
|
||||
**Purpose:** Fallback for any text file or single `.gz` file not matching a specific vendor.
|
||||
|
||||
**Behavior:**
|
||||
- If filename matches `nvidia-bug-report-*.log.gz`: extracts driver version and GPU list.
|
||||
- Otherwise: confirms file is text (not binary) and records a basic "Text File" event.
|
||||
|
||||
---
|
||||
|
||||
## Supported vendor matrix
|
||||
|
||||
| Vendor | ID | Status | Tested on |
|
||||
|--------|----|--------|-----------|
|
||||
| Dell TSR | `dell` | Ready | TSR nested zip archives |
|
||||
| Inspur / Kaytus | `inspur` | Ready | KR4268X2 onekeylog |
|
||||
| NVIDIA HGX Field Diag | `nvidia` | Ready | Various HGX servers |
|
||||
| NVIDIA Bug Report | `nvidia_bug_report` | Ready | H100 systems |
|
||||
| Unraid | `unraid` | Ready | Unraid diagnostics archives |
|
||||
| XigmaNAS | `xigmanas` | Ready | FreeBSD NAS logs |
|
||||
| H3C SDS G5 | `h3c_g5` | Ready | H3C UniServer R4900 G5 SDS archives |
|
||||
| H3C SDS G6 | `h3c_g6` | Ready | H3C UniServer R4700 G6 SDS archives |
|
||||
| Generic fallback | `generic` | Ready | Any text file |
|
||||
93
bible-local/07-exporters.md
Normal file
93
bible-local/07-exporters.md
Normal file
@@ -0,0 +1,93 @@
|
||||
# 07 — Exporters
|
||||
|
||||
## Export surfaces
|
||||
|
||||
| Endpoint | Output | Purpose |
|
||||
|----------|--------|---------|
|
||||
| `GET /api/export/csv` | CSV | Serial-number export |
|
||||
| `GET /api/export/json` | raw-export ZIP bundle | Reopen and re-analyze later |
|
||||
| `GET /api/export/reanimator` | JSON | Reanimator hardware payload |
|
||||
| `POST /api/convert` | async ZIP artifact | Batch archive-to-Reanimator conversion |
|
||||
|
||||
## Raw export
|
||||
|
||||
Raw export is not a final report dump.
|
||||
It is a replayable artifact that preserves enough source data for future parser improvements.
|
||||
|
||||
Current bundle contents:
|
||||
- `raw_export.json`
|
||||
- `collect.log`
|
||||
- `parser_fields.json`
|
||||
|
||||
Design rules:
|
||||
- raw source is authoritative
|
||||
- uploads of raw export must replay from raw source
|
||||
- parsed snapshots inside the bundle are diagnostic only
|
||||
|
||||
## Reanimator export
|
||||
|
||||
Implementation files:
|
||||
- `internal/exporter/reanimator_models.go`
|
||||
- `internal/exporter/reanimator_converter.go`
|
||||
- `internal/server/handlers.go`
|
||||
- `bible-local/docs/hardware-ingest-contract.md`
|
||||
|
||||
Conversion rules:
|
||||
- canonical source is merged canonical inventory derived from `hardware.devices` plus legacy hardware slices
|
||||
- output must conform to the strict Reanimator ingest contract in `docs/hardware-ingest-contract.md`
|
||||
- local mirror currently tracks upstream contract `v2.7`
|
||||
- timestamps are RFC3339
|
||||
- status is normalized to Reanimator-friendly values
|
||||
- missing component serial numbers must stay absent; LOGPile must not synthesize fake serials for Reanimator export
|
||||
- CPU `firmware` field means CPU microcode, not generic processor firmware inventory
|
||||
- `NULL`-style board manufacturer/product values are treated as absent
|
||||
- optional component telemetry/health fields are exported when LOGPile already has the data
|
||||
- partial `hardware.devices` must not suppress components still present only in legacy parser/collector fields
|
||||
- `present` is not serialized for exported components; presence is expressed by the existence of the component record itself
|
||||
- Reanimator ingest may apply its own server-side fallback serial rules for CPU and PCIe when LOGPile leaves serials absent
|
||||
|
||||
## Inclusion rules
|
||||
|
||||
Included:
|
||||
- PCIe-class devices when the component itself is present, even if serial number is missing
|
||||
- contract `v2.7` component telemetry and health fields when source data exists
|
||||
- hardware sensors grouped into `fans`, `power`, `temperatures`, `other` only when the sensor has a real numeric reading
|
||||
- sensor `location` is not exported; LOGPile keeps only sensor `name` plus measured values and status
|
||||
- Redfish linked metric docs that carry component telemetry: `ProcessorMetrics`, `MemoryMetrics`, `DriveMetrics`, `EnvironmentMetrics`, `Metrics`
|
||||
- `pcie_devices.slot` is treated as the canonical PCIe address; `bdf` is used only as an internal fallback/dedupe key and is not serialized in the payload
|
||||
- `event_logs` are exported only from normalized parser/collector events that can be mapped to contract sources `host` / `bmc` / `redfish` without synthesizing content
|
||||
- `manufactured_year_week` is exported only as a reliable passthrough when the parser/collector already extracted a valid `YYYY-Www` value
|
||||
|
||||
Excluded:
|
||||
- storage endpoints from `pcie_devices`; disks and NVMe drives export only through `hardware.storage`
|
||||
- fake serial numbers for PCIe-class devices; any fallback serial generation belongs to Reanimator ingest, not LOGPile
|
||||
- sensors without a real numeric reading
|
||||
- events with internal-only or unmappable sources such as LOGPile internal warnings
|
||||
- memory with missing serial number
|
||||
- memory with `present=false` or `status=Empty`
|
||||
- CPUs with `present=false`
|
||||
- storage without `serial_number`
|
||||
- storage with `present=false`
|
||||
- power supplies without `serial_number`
|
||||
- power supplies with `present=false`
|
||||
- non-present network adapters
|
||||
- non-present PCIe / GPU devices
|
||||
- device-bound firmware duplicated at top-level firmware list
|
||||
- any field not present in the strict ingest contract
|
||||
|
||||
## Batch convert
|
||||
|
||||
`POST /api/convert` accepts multiple supported files and produces a ZIP with:
|
||||
- one `*.reanimator.json` file per successful input
|
||||
- `convert-summary.txt`
|
||||
|
||||
Behavior:
|
||||
- unsupported filenames are skipped
|
||||
- each file is parsed independently
|
||||
- one bad file must not fail the whole batch if at least one conversion succeeds
|
||||
- result artifact is temporary and deleted after download
|
||||
|
||||
## CSV export
|
||||
|
||||
`GET /api/export/csv` uses the same merged canonical inventory as Reanimator export,
|
||||
with legacy network-card fallback kept only for records that still have no canonical device match.
|
||||
81
bible-local/08-build-release.md
Normal file
81
bible-local/08-build-release.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# 08 — Build & Release
|
||||
|
||||
## CLI flags
|
||||
|
||||
Defined in `cmd/logpile/main.go`:
|
||||
|
||||
| Flag | Default | Purpose |
|
||||
|------|---------|---------|
|
||||
| `--port` | `8082` | HTTP server port |
|
||||
| `--file` | empty | Preload archive file |
|
||||
| `--version` | `false` | Print version and exit |
|
||||
| `--no-browser` | `false` | Do not auto-open browser |
|
||||
| `--hold-on-crash` | `true` on Windows | Keep console open after fatal crash |
|
||||
|
||||
## Common commands
|
||||
|
||||
```bash
|
||||
make build
|
||||
make build-all
|
||||
make test
|
||||
make fmt
|
||||
make update-pci-ids
|
||||
```
|
||||
|
||||
Notes:
|
||||
- `make build` outputs `bin/logpile`
|
||||
- `make build-all` builds the supported cross-platform binaries
|
||||
- `make build` and `make build-all` run `scripts/update-pci-ids.sh --best-effort` unless `SKIP_PCI_IDS_UPDATE=1`
|
||||
|
||||
## PCI IDs
|
||||
|
||||
Source submodule: `third_party/pciids`
|
||||
Embedded copy: `internal/parser/vendors/pciids/pci.ids`
|
||||
|
||||
Typical setup after clone:
|
||||
|
||||
```bash
|
||||
git submodule update --init third_party/pciids
|
||||
```
|
||||
|
||||
## Release script
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
./scripts/release.sh
|
||||
```
|
||||
|
||||
Current behavior:
|
||||
|
||||
1. Reads version from `git describe --tags`
|
||||
2. Refuses a dirty tree unless `ALLOW_DIRTY=1`
|
||||
3. Sets stable Go cache/toolchain environment
|
||||
4. Creates `releases/{VERSION}/`
|
||||
5. Creates a release-notes template if missing
|
||||
6. Builds `darwin-arm64` and `windows-amd64`
|
||||
7. Packages any already-present binaries from `bin/`
|
||||
8. Generates `SHA256SUMS.txt`
|
||||
|
||||
Important limitation:
|
||||
- `scripts/release.sh` does not run `make build-all` for you
|
||||
- if you want Linux or additional macOS archives in the release directory, build them before running the script
|
||||
|
||||
Toolchain note:
|
||||
- `scripts/release.sh` defaults `GOTOOLCHAIN=local` to use the already installed Go toolchain and avoid implicit network downloads during release builds
|
||||
- if you intentionally want another toolchain, pass it explicitly, for example `GOTOOLCHAIN=go1.24.0 ./scripts/release.sh`
|
||||
|
||||
## Run locally
|
||||
|
||||
```bash
|
||||
./bin/logpile
|
||||
./bin/logpile --port 9090
|
||||
./bin/logpile --no-browser
|
||||
./bin/logpile --version
|
||||
```
|
||||
|
||||
## macOS Gatekeeper
|
||||
|
||||
```bash
|
||||
xattr -d com.apple.quarantine /path/to/logpile-darwin-arm64
|
||||
```
|
||||
54
bible-local/09-testing.md
Normal file
54
bible-local/09-testing.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# 09 — Testing
|
||||
|
||||
## Baseline
|
||||
|
||||
Required before merge:
|
||||
|
||||
```bash
|
||||
go test ./...
|
||||
```
|
||||
|
||||
## Test locations
|
||||
|
||||
| Area | Location |
|
||||
|------|----------|
|
||||
| Collectors and replay | `internal/collector/*_test.go` |
|
||||
| HTTP handlers and jobs | `internal/server/*_test.go` |
|
||||
| Exporters | `internal/exporter/*_test.go` |
|
||||
| Vendor parsers | `internal/parser/vendors/<vendor>/*_test.go` |
|
||||
|
||||
## General rules
|
||||
|
||||
- Prefer table-driven tests
|
||||
- No network access in unit tests
|
||||
- Cover happy path and realistic failure/partial-data cases
|
||||
- New vendor parsers need both detection and parse coverage
|
||||
|
||||
## Mandatory coverage for dedup/filter/classify logic
|
||||
|
||||
Any new deduplication, filtering, or classification function must have:
|
||||
|
||||
1. A true-positive case
|
||||
2. A true-negative case
|
||||
3. A regression case for the vendor or topology that motivated the change
|
||||
|
||||
This is mandatory for inventory logic, firmware filtering, and similar code paths where silent data drift is likely.
|
||||
|
||||
## Mandatory coverage for expensive path selection
|
||||
|
||||
Any function that decides whether to crawl or probe an expensive path must have:
|
||||
|
||||
1. A positive selection case
|
||||
2. A negative exclusion case
|
||||
3. A topology-level count/integration case
|
||||
|
||||
The goal is to catch runaway I/O regressions before they ship.
|
||||
|
||||
## Useful focused commands
|
||||
|
||||
```bash
|
||||
go test ./internal/exporter/...
|
||||
go test ./internal/collector/...
|
||||
go test ./internal/server/...
|
||||
go test ./internal/parser/vendors/...
|
||||
```
|
||||
920
bible-local/10-decisions.md
Normal file
920
bible-local/10-decisions.md
Normal file
@@ -0,0 +1,920 @@
|
||||
# 10 — Architectural Decision Log (ADL)
|
||||
|
||||
> **Rule:** Every significant architectural decision **must be recorded here** before or alongside
|
||||
> the code change. This applies to humans and AI assistants alike.
|
||||
>
|
||||
> Format: date · title · context · decision · consequences
|
||||
|
||||
---
|
||||
|
||||
## ADL-001 — In-memory only state (no database)
|
||||
|
||||
**Date:** project start
|
||||
**Context:** LOGPile is designed as a standalone diagnostic tool, not a persistent service.
|
||||
**Decision:** All parsed/collected data lives in `Server.result` (in-memory). No database, no files written.
|
||||
**Consequences:**
|
||||
- Data is lost on process restart — intentional.
|
||||
- Simple deployment: single binary, no setup required.
|
||||
- JSON export is the persistence mechanism for users who want to save results.
|
||||
|
||||
---
|
||||
|
||||
## ADL-002 — Vendor parser auto-registration via init()
|
||||
|
||||
**Date:** project start
|
||||
**Context:** Need an extensible parser registry without a central factory function.
|
||||
**Decision:** Each vendor parser registers itself in its package's `init()` function.
|
||||
`vendors/vendors.go` holds blank imports to trigger registration.
|
||||
**Consequences:**
|
||||
- Adding a new parser requires only: implement interface + add one blank import.
|
||||
- No central list to maintain (other than the import file).
|
||||
- `go test ./...` will include new parsers automatically.
|
||||
|
||||
---
|
||||
|
||||
## ADL-003 — Highest-confidence parser wins
|
||||
|
||||
**Date:** project start
|
||||
**Context:** Multiple parsers may partially match an archive (e.g. generic + specific vendor).
|
||||
**Decision:** Run all parsers' `Detect()`, select the one returning the highest score (0–100).
|
||||
**Consequences:**
|
||||
- Generic fallback (score 15) only activates when no vendor parser scores higher.
|
||||
- Parsers must be conservative with high scores (70+) to avoid false positives.
|
||||
|
||||
---
|
||||
|
||||
## ADL-004 — Canonical hardware.devices as single source of truth
|
||||
|
||||
**Date:** v1.5.0
|
||||
**Context:** UI tabs and Reanimator exporter were reading from different sub-fields of
|
||||
`AnalysisResult`, causing potential drift.
|
||||
**Decision:** Introduce `hardware.devices` as the canonical inventory repository.
|
||||
All UI tabs and all exporters must read exclusively from this repository.
|
||||
**Consequences:**
|
||||
- Any UI vs Reanimator discrepancy is classified as a bug, not a "known difference".
|
||||
- Deduplication logic runs once in the repository builder (serial → bdf → distinct).
|
||||
- New hardware attributes must be added to canonical schema first, then mapped to consumers.
|
||||
|
||||
---
|
||||
|
||||
## ADL-005 — No hardcoded PCI model strings; use pci.ids
|
||||
|
||||
**Date:** v1.5.0
|
||||
**Context:** NVIDIA and other vendors release new GPU models frequently; hardcoded maps
|
||||
required code changes for each new model ID.
|
||||
**Decision:** Use the `pciutils/pciids` database (git submodule, embedded at build time).
|
||||
PCI vendor/device ID → human-readable model name via lookup.
|
||||
**Consequences:**
|
||||
- New GPU models can be supported by updating `pci.ids` without code changes.
|
||||
- `make build` auto-syncs `pci.ids` from submodule before compilation.
|
||||
- External override via `LOGPILE_PCI_IDS_PATH` env var.
|
||||
|
||||
---
|
||||
|
||||
## ADL-006 — Reanimator export uses canonical hardware.devices (not raw sub-fields)
|
||||
|
||||
**Date:** v1.5.0
|
||||
**Context:** Early Reanimator exporter read from `Hardware.GPUs`, `Hardware.NICs`, etc.
|
||||
directly, diverging from UI data.
|
||||
**Decision:** Reanimator exporter must use `hardware.devices` — the same source as the UI.
|
||||
Exporter groups/filters canonical records by section; does not rebuild from sub-fields.
|
||||
**Consequences:**
|
||||
- Guarantees UI and export consistency.
|
||||
- Exporter code is simpler — mainly a filter+map, not a data reconstruction.
|
||||
|
||||
---
|
||||
|
||||
## ADL-007 — Documentation language is English
|
||||
|
||||
**Date:** 2026-02-20
|
||||
**Context:** Codebase documentation was mixed Russian/English, reducing clarity for
|
||||
international contributors and AI assistants.
|
||||
**Decision:** All maintained project documentation (`docs/bible/`, `README.md`,
|
||||
`CLAUDE.md`, and new technical docs) must be written in English.
|
||||
**Consequences:**
|
||||
- Bible is authoritative in English.
|
||||
- AI assistants get consistent, unambiguous context.
|
||||
|
||||
---
|
||||
|
||||
## ADL-008 — Bible is the single source of truth for architecture docs
|
||||
|
||||
**Date:** 2026-02-23
|
||||
**Context:** Architecture information was duplicated across `README.md`, `CLAUDE.md`,
|
||||
and the Bible, creating drift risk and stale guidance for humans and AI agents.
|
||||
**Decision:** Keep architecture and technical design documentation only in `docs/bible/`.
|
||||
Top-level `README.md` and `CLAUDE.md` must remain minimal pointers/instructions.
|
||||
**Consequences:**
|
||||
- Reduces documentation drift and duplicate updates.
|
||||
- AI assistants are directed to one authoritative source before making changes.
|
||||
- Documentation updates that affect architecture must include Bible changes (and ADL entries when significant).
|
||||
|
||||
---
|
||||
|
||||
## ADL-009 — Redfish analysis is performed from raw snapshot replay (unified tunnel)
|
||||
|
||||
**Date:** 2026-02-24
|
||||
**Context:** Live Redfish collection and raw export re-analysis used different parsing paths,
|
||||
which caused drift and made bug fixes difficult to validate consistently.
|
||||
**Decision:** Redfish live collection must produce a `raw_payloads.redfish_tree` snapshot first,
|
||||
then run the same replay analyzer used for imported raw exports.
|
||||
**Consequences:**
|
||||
- Same `redfish_tree` input produces the same parsed result in live and offline modes.
|
||||
- Debugging parser issues can be done against exported raw bundles without live BMC access.
|
||||
- Snapshot completeness becomes critical; collector seeds/limits are part of analyzer correctness.
|
||||
|
||||
---
|
||||
|
||||
## ADL-010 — Raw export is a self-contained re-analysis package (not a final result dump)
|
||||
|
||||
**Date:** 2026-02-24
|
||||
**Context:** Exporting only normalized `AnalysisResult` loses raw source fidelity and prevents
|
||||
future parser improvements from being applied to already collected data.
|
||||
**Decision:** `Export Raw Data` produces a self-contained raw package (JSON or ZIP bundle)
|
||||
that the application can reopen and re-analyze. Parsed data in the package is optional and not
|
||||
the source of truth on import.
|
||||
**Consequences:**
|
||||
- Re-opening an export always re-runs analysis from raw source (`redfish_tree` or uploaded file bytes).
|
||||
- Raw bundles include collection context and diagnostics for debugging (`collect.log`, `parser_fields.json`).
|
||||
- Endpoint compatibility is preserved (`/api/export/json`) while actual payload format may be a bundle.
|
||||
|
||||
---
|
||||
|
||||
## ADL-011 — Redfish snapshot crawler is bounded, prioritized, and failure-tolerant
|
||||
|
||||
**Date:** 2026-02-24
|
||||
**Context:** Full Redfish trees on modern GPU systems are large, noisy, and contain many
|
||||
vendor-specific or non-fetchable links. Unbounded crawling and naive queue design caused hangs
|
||||
and incomplete snapshots.
|
||||
**Decision:** Use a bounded snapshot crawler with:
|
||||
- explicit document cap (`LOGPILE_REDFISH_SNAPSHOT_MAX_DOCS`)
|
||||
- priority seed paths (PCIe/Fabrics/Firmware/Storage/PowerSubsystem/ThermalSubsystem)
|
||||
- normalized `@odata.id` paths (strip `#fragment`)
|
||||
- noisy expected error filtering (404/405/410/501 hidden from UI)
|
||||
- queue capacity sized to crawl cap to avoid producer/consumer deadlock
|
||||
**Consequences:**
|
||||
- Snapshot collection remains stable on large BMC trees.
|
||||
- Most high-value inventory paths are reached before the cap.
|
||||
- UI progress remains useful while debug logs retain low-level fetch failures.
|
||||
|
||||
---
|
||||
|
||||
## ADL-012 — Vendor-specific storage inventory probing is allowed as fallback
|
||||
|
||||
**Date:** 2026-02-24
|
||||
**Context:** Some Supermicro BMCs expose empty standard `Storage/.../Drives` collections while
|
||||
real disk inventory exists under vendor-specific `Disk.Bay` endpoints and enclosure links.
|
||||
**Decision:** When standard drive collections are empty, collector/replay may probe vendor-style
|
||||
`.../Drives/Disk.Bay.*` endpoints and follow `Storage.Links.Enclosures[*]` to recover physical drives.
|
||||
**Consequences:**
|
||||
- Higher storage inventory coverage on Supermicro HBA/HA-RAID/MRVL/NVMe backplane implementations.
|
||||
- Replay must mirror the same probing behavior to preserve deterministic results.
|
||||
- Probing remains bounded (finite candidate set) to avoid runaway requests.
|
||||
|
||||
---
|
||||
|
||||
## ADL-013 — PowerSubsystem is preferred over legacy Power on newer Redfish implementations
|
||||
|
||||
**Date:** 2026-02-24
|
||||
**Context:** X14+/newer Redfish implementations increasingly expose authoritative PSU data in
|
||||
`PowerSubsystem/PowerSupplies`, while legacy `/Power` may be incomplete or schema-shifted.
|
||||
**Decision:** Prefer `Chassis/*/PowerSubsystem/PowerSupplies` as the primary PSU source and use
|
||||
legacy `Chassis/*/Power` as fallback.
|
||||
**Consequences:**
|
||||
- Better compatibility with newer BMC firmware generations.
|
||||
- Legacy systems remain supported without special-case collector selection.
|
||||
- Snapshot priority seeds must include `PowerSubsystem` resources.
|
||||
|
||||
---
|
||||
|
||||
## ADL-014 — Threshold logic lives on the server; UI reflects status only
|
||||
|
||||
**Date:** 2026-02-24
|
||||
**Context:** Duplicating threshold math in frontend and backend creates drift and inconsistent
|
||||
highlighting (e.g. PSU mains voltage range checks).
|
||||
**Decision:** Business threshold evaluation (e.g. PSU voltage nominal range) must be computed on
|
||||
the server; frontend only renders status/flags returned by the API.
|
||||
**Consequences:**
|
||||
- Single source of truth for threshold policies.
|
||||
- UI can evolve visually without re-implementing domain logic.
|
||||
- API payloads may carry richer status semantics over time.
|
||||
|
||||
---
|
||||
|
||||
## ADL-015 — Supermicro crashdump archive parser removed from active registry
|
||||
|
||||
**Date:** 2026-03-01
|
||||
**Context:** The Supermicro crashdump parser (`SMC Crash Dump Parser`) produced low-value
|
||||
results for current workflows and was explicitly rejected as a supported archive path.
|
||||
**Decision:** Remove `supermicro` vendor parser from active registration and project source.
|
||||
Do not include it in `/api/parsers` output or parser documentation matrix.
|
||||
**Consequences:**
|
||||
- Supermicro crashdump archives (`CDump.txt` format) are no longer parsed by a dedicated vendor parser.
|
||||
- Such archives fall back to other matching parsers (typically `generic`) unless a new replacement parser is added.
|
||||
- Reintroduction requires a new parser package and an explicit registry import in `vendors/vendors.go`.
|
||||
|
||||
---
|
||||
|
||||
## ADL-016 — Device-bound firmware must not appear in hardware.firmware
|
||||
|
||||
**Date:** 2026-03-01
|
||||
**Context:** Dell TSR `DCIM_SoftwareIdentity` lists firmware for every component (NICs,
|
||||
PSUs, disks, backplanes) in addition to system-level firmware. Naively importing all entries
|
||||
into `Hardware.Firmware` caused device firmware to appear twice in Reanimator: once in the
|
||||
device's own record and again in the top-level firmware list.
|
||||
**Decision:**
|
||||
- `Hardware.Firmware` contains only system-level firmware (BIOS, BMC/iDRAC, CPLD,
|
||||
Lifecycle Controller, storage controllers, BOSS).
|
||||
- Device-bound entries (NIC, PSU, Disk, Backplane, GPU) must not be added to
|
||||
`Hardware.Firmware`.
|
||||
- Parsers must store the FQDD (or equivalent slot identifier) in `FirmwareInfo.Description`
|
||||
so the Reanimator exporter can filter by FQDD prefix.
|
||||
- The exporter's `isDeviceBoundFirmwareFQDD()` function performs this filter.
|
||||
**Consequences:**
|
||||
- Any new parser that ingests a per-device firmware inventory must follow the same rule.
|
||||
- Device firmware is accessible only via the device's own record, not the firmware list.
|
||||
|
||||
---
|
||||
|
||||
## ADL-017 — Vendor-embedded MAC addresses must be stripped from model name fields
|
||||
|
||||
**Date:** 2026-03-01
|
||||
**Context:** Dell TSR embeds MAC addresses directly in `ProductName` and `ElementName`
|
||||
fields (e.g. `"NVIDIA ConnectX-6 Lx 2x 25G SFP28 OCP3.0 SFF - C4:70:BD:DB:56:08"`).
|
||||
This caused model names to contain MAC addresses in NIC model, NIC firmware device name,
|
||||
and potentially other fields.
|
||||
**Decision:** Strip any ` - XX:XX:XX:XX:XX:XX` suffix from all model/name string fields
|
||||
at parse time before storing in any model struct. Use the regex
|
||||
`\s+-\s+([0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}$`.
|
||||
**Consequences:**
|
||||
- Model names are clean and consistent across all devices.
|
||||
- All parsers must apply this stripping to any field used as a device name or model.
|
||||
- Confirmed affected fields in Dell: `DCIM_NICView.ProductName`, `DCIM_SoftwareIdentity.ElementName`.
|
||||
|
||||
---
|
||||
|
||||
## ADL-018 — NVMe bay probe must be restricted to storage-capable chassis types
|
||||
|
||||
**Date:** 2026-03-12
|
||||
**Context:** `shouldAdaptiveNVMeProbe` was introduced in `2fa4a12` to recover NVMe drives on
|
||||
Supermicro BMCs that expose empty `Drives` collections but serve disks at direct `Disk.Bay.N`
|
||||
paths. The function returns `true` for any chassis with an empty `Members` array. On
|
||||
Supermicro HGX systems (SYS-A21GE-NBRT and similar) ~35 sub-chassis (GPU, NVSwitch,
|
||||
PCIeRetimer, ERoT, IRoT, BMC, FPGA) all carry `ChassisType=Module/Component/Zone` and
|
||||
expose empty `/Drives` collections. Without filtering, each triggered 384 HTTP requests →
|
||||
13 440 requests ≈ 22 minutes of pure I/O waste per collection.
|
||||
**Decision:** Before probing `Disk.Bay.N` candidates for a chassis, check its `ChassisType`
|
||||
via `chassisTypeCanHaveNVMe`. Skip if type is `Module`, `Component`, or `Zone`. Keep probing
|
||||
for `Enclosure`, `RackMount`, and any unrecognised type (fail-safe).
|
||||
**Consequences:**
|
||||
- On HGX systems post-probe NVMe goes from ~22 min to effectively zero.
|
||||
- NVMe backplane recovery (`Enclosure` type) is unaffected.
|
||||
- Any new chassis type that hosts NVMe storage is covered by the default `true` path.
|
||||
- `chassisTypeCanHaveNVMe` and the candidate-selection loop must have unit tests covering
|
||||
both the excluded types and the storage-capable types (see `TestChassisTypeCanHaveNVMe`
|
||||
and `TestNVMePostProbeSkipsNonStorageChassis`).
|
||||
|
||||
## ADL-019 — Redfish post-probe recovery is profile-owned acquisition policy
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** Numeric collection post-probe and direct NVMe `Disk.Bay` recovery were still
|
||||
controlled by collector-core heuristics, which kept platform-specific acquisition behavior in
|
||||
`redfish.go` and made vendor/topology refactoring incomplete.
|
||||
**Decision:** Move expensive Redfish post-probe enablement into profile-owned acquisition policy.
|
||||
The collector core may execute bounded post-probe loops, but profiles must explicitly enable:
|
||||
- numeric collection post-probe
|
||||
- direct NVMe `Disk.Bay` recovery
|
||||
- sensor collection post-probe
|
||||
**Consequences:**
|
||||
- Generic collector flow no longer implicitly turns on storage/NVMe recovery for every platform.
|
||||
- Supermicro-specific direct NVMe recovery and generic numeric collection recovery are now
|
||||
regression-tested through profile fixtures.
|
||||
- Future platform storage/post-probe behavior must be added through profile tuning, not new
|
||||
vendor-shaped `if` branches in collector core.
|
||||
|
||||
## ADL-020 — Redfish critical plan-B activation is profile-owned recovery policy
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** `critical plan-B` and `profile plan-B` were still effectively always-on collector
|
||||
behavior once paths were present, including critical collection member retry and slow numeric
|
||||
child probing. That kept acquisition recovery semantics in `redfish.go` instead of the profile
|
||||
layer.
|
||||
**Decision:** Move plan-B activation into profile-owned recovery policy. Profiles must explicitly
|
||||
enable:
|
||||
- critical collection member retry
|
||||
- slow numeric probing during critical plan-B
|
||||
- profile-specific plan-B pass
|
||||
**Consequences:**
|
||||
- Recovery behavior is now observable in raw Redfish diagnostics alongside other tuning.
|
||||
- Generic/fallback recovery remains available through profile policy instead of implicit collector
|
||||
defaults.
|
||||
- Future platform-specific plan-B behavior must be introduced through profile tuning and tests,
|
||||
not through new unconditional collector branches.
|
||||
|
||||
## ADL-021 — Extra discovered-path storage seeds must be profile-scoped, not core-baseline
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** The collector core baseline seed list still contained storage-specific discovered-path
|
||||
suffixes such as `SimpleStorage` and `Storage/IntelVROC/*`. These are useful on some platforms,
|
||||
but they are acquisition extensions layered on top of discovered `Systems/*` resources, not part
|
||||
of the minimal vendor-neutral Redfish baseline.
|
||||
**Decision:** Move such discovered-path expansions into profile-owned scoped path policy. The
|
||||
collector core keeps the vendor-neutral baseline; profiles may add extra system/chassis/manager
|
||||
suffixes that are expanded over discovered members during acquisition planning.
|
||||
**Consequences:**
|
||||
- Platform-shaped storage discovery no longer lives in `redfish.go` baseline seed construction.
|
||||
- Extra discovered-path branches are visible in plan diagnostics and fixture regression tests.
|
||||
- Future model/vendor storage path expansions must be added through scoped profile policy instead
|
||||
of editing the shared baseline seed list.
|
||||
|
||||
## ADL-022 — Adaptive prefetch eligibility is profile-owned policy
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** The adaptive prefetch executor was still driven by hardcoded include/exclude path
|
||||
rules in `redfish.go`. That made GPU/storage/network prefetch shaping part of collector-core
|
||||
knowledge rather than profile-owned acquisition policy.
|
||||
**Decision:** Move prefetch eligibility rules into profile tuning. The collector core still runs
|
||||
adaptive prefetch, but profiles provide:
|
||||
- `IncludeSuffixes` for critical paths eligible for prefetch
|
||||
- `ExcludeContains` for path shapes that must never be prefetched
|
||||
**Consequences:**
|
||||
- Prefetch behavior is now visible in raw Redfish diagnostics and test fixtures.
|
||||
- Platform- or topology-specific prefetch shaping no longer requires editing collector-core
|
||||
string lists.
|
||||
- Future prefetch tuning must be introduced through profiles and regression tests.
|
||||
|
||||
## ADL-023 — Core critical baseline is roots-only; critical shaping is profile-owned
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** `redfishCriticalEndpoints(...)` still encoded a broad set of system/chassis/manager
|
||||
critical branches directly in collector core. This mixed minimal crawl invariants with profile-
|
||||
specific acquisition shaping.
|
||||
**Decision:** Reduce collector-core critical baseline to vendor-neutral roots only:
|
||||
- `/redfish/v1`
|
||||
- discovered `Systems/*`
|
||||
- discovered `Chassis/*`
|
||||
- discovered `Managers/*`
|
||||
|
||||
Profiles now own additional critical shaping through:
|
||||
- scoped critical suffix policy for discovered resources
|
||||
- explicit top-level `CriticalPaths`
|
||||
**Consequences:**
|
||||
- Critical inventory breadth is now explained by the acquisition plan, not hidden in collector
|
||||
helper defaults.
|
||||
- Generic profile still provides the previous broad critical coverage, so behavior stays stable.
|
||||
- Future critical-path tuning must be implemented in profiles and regression-tested there.
|
||||
|
||||
## ADL-024 — Live Redfish execution plans are resolved inside redfishprofile
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** Even after moving seeds, scoped paths, critical shaping, recovery, and prefetch
|
||||
policy into profiles, `redfish.go` still manually merged discovered resources with those policy
|
||||
fragments. That left acquisition-plan resolution logic in collector core.
|
||||
**Decision:** Introduce `redfishprofile.ResolveAcquisitionPlan(...)` as the boundary between
|
||||
profile planning and collector execution. `redfishprofile` now resolves:
|
||||
- baseline seeds
|
||||
- baseline critical roots
|
||||
- scoped path expansions
|
||||
- explicit profile seed/critical/plan-B paths
|
||||
|
||||
The collector core consumes the resolved plan and executes it.
|
||||
**Consequences:**
|
||||
- Acquisition planning logic is now testable in `redfishprofile` without going through the live
|
||||
collector.
|
||||
- `redfish.go` no longer owns path-resolution helpers for seeds/critical planning.
|
||||
- This creates a clean next step toward true per-profile acquisition hooks beyond static policy
|
||||
fragments.
|
||||
|
||||
## ADL-025 — Post-discovery acquisition refinement belongs to profile hooks
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** Some acquisition behavior depends not only on vendor/model hints, but on what the
|
||||
lightweight Redfish discovery actually returned. Static absolute path lists in profile plans are
|
||||
too rigid for such cases and reintroduce guessed platform knowledge.
|
||||
**Decision:** Add a post-discovery acquisition refinement hook to Redfish profiles. Profiles may
|
||||
mutate the resolved execution plan after discovered `Systems/*`, `Chassis/*`, and `Managers/*`
|
||||
are known.
|
||||
|
||||
First concrete use:
|
||||
- MSI now derives GPU chassis seeds and `.../Sensors` critical/plan-B paths from discovered
|
||||
`Chassis/GPU*` resources instead of hardcoded `GPU1..GPU4` absolute paths in the static plan.
|
||||
Additional use:
|
||||
- Supermicro now derives `UpdateService/Oem/Supermicro/FirmwareInventory` critical/plan-B paths
|
||||
from resource hints instead of carrying that absolute path in the static plan.
|
||||
Additional use:
|
||||
- Dell now derives `Managers/iDRAC.Embedded.*` acquisition paths from discovered manager
|
||||
resources instead of carrying `Managers/iDRAC.Embedded.1` as a static absolute path.
|
||||
**Consequences:**
|
||||
- Profile modules can react to actual discovery results without pushing conditional logic back
|
||||
into `redfish.go`.
|
||||
- Diagnostics still show the final refined plan because the collector stores the refined plan,
|
||||
not only the pre-refinement template.
|
||||
- Future vendor-specific discovery-dependent acquisition behavior should be implemented through
|
||||
this hook rather than new collector-core branches.
|
||||
|
||||
## ADL-026 — Replay analysis uses a resolved profile plan, not ad-hoc directives only
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** Replay still relied on a flat `AnalysisDirectives` struct assembled centrally,
|
||||
while vendor-specific conditions often depended on the actual snapshot shape. That made analysis
|
||||
behavior harder to explain and kept too much vendor logic in generic replay collectors.
|
||||
**Decision:** Introduce `redfishprofile.ResolveAnalysisPlan(...)` for replay. The resolved
|
||||
analysis plan contains:
|
||||
- active match result
|
||||
- resolved analysis directives
|
||||
- analysis notes explaining snapshot-aware hook activation
|
||||
|
||||
Profiles may refine this plan using the snapshot and discovered resources before replay collectors
|
||||
run.
|
||||
|
||||
First concrete uses:
|
||||
- MSI enables processor-GPU fallback and MSI chassis lookup only when the snapshot actually
|
||||
contains GPU processors and `Chassis/GPU*`
|
||||
- HGX enables processor-GPU alias fallback from actual HGX/GPU_SXM topology signals in the snapshot
|
||||
- Supermicro enables NVMe backplane and known-controller recovery from actual snapshot paths
|
||||
**Consequences:**
|
||||
- Replay behavior is now closer to the acquisition architecture: a resolved profile plan feeds the
|
||||
executor.
|
||||
- `redfish_analysis_plan` is stored in raw payload metadata for offline debugging.
|
||||
- Future analysis-side vendor logic should move into profile refinement hooks instead of growing the
|
||||
central directive builder.
|
||||
|
||||
## ADL-027 — Replay GPU/storage executors consume resolved analysis plans
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** Even after introducing `ResolveAnalysisPlan(...)`, replay GPU/storage collectors still
|
||||
accepted a raw `AnalysisDirectives` struct. That preserved an implicit shortcut from the old design
|
||||
and weakened the plan/executor boundary.
|
||||
**Decision:** Replay GPU/storage executors now accept `redfishprofile.ResolvedAnalysisPlan`
|
||||
directly. The executor reads resolved directives from the plan instead of being passed a standalone
|
||||
directive bundle.
|
||||
**Consequences:**
|
||||
- GPU and storage replay execution now follows the same architectural pattern as acquisition:
|
||||
resolve plan first, execute second.
|
||||
- Future profile-owned execution helpers can use plan notes or additional resolved fields without
|
||||
changing the executor API again.
|
||||
- Remaining replay areas should migrate the same way instead of continuing to accept raw directive
|
||||
structs.
|
||||
|
||||
## ADL-019 — isDeviceBoundFirmwareName must cover vendor-specific naming patterns per vendor
|
||||
|
||||
**Date:** 2026-03-12
|
||||
**Context:** `isDeviceBoundFirmwareName` was written to filter Dell-style device firmware names
|
||||
(`"GPU SomeDevice"`, `"NIC OnboardLAN"`). When Supermicro Redfish FirmwareInventory was added
|
||||
(`6c19a58`), no Supermicro-specific patterns were added. Supermicro names a NIC entry
|
||||
`"NIC1 System Slot0 AOM-DP805-IO"` — a digit follows the type prefix directly, bypassing the
|
||||
`"nic "` (space-terminated) check. 29 device-bound entries leaked into `hardware.firmware` on
|
||||
SYS-A21GE-NBRT (HGX B200). Commit `9c5512d` attempted a fix by adding `_fw_gpu_` patterns,
|
||||
but checked `DeviceName` which contains `"Software Inventory"` (from the Redfish `Name` field),
|
||||
not the firmware inventory ID. The patterns were dead code from the moment they were committed.
|
||||
**Decision:**
|
||||
- `isDeviceBoundFirmwareName` must be extended for each new vendor whose FirmwareInventory
|
||||
naming convention differs from the existing patterns.
|
||||
- When adding HGX/Supermicro patterns, check that the pattern matches the field value that
|
||||
`collectFirmwareInventory` actually stores — trace the data path from Redfish doc to
|
||||
`FirmwareInfo.DeviceName` before writing the condition.
|
||||
- `TestIsDeviceBoundFirmwareName` must contain at least one case per vendor format.
|
||||
**Consequences:**
|
||||
- New vendors with FirmwareInventory support require a test covering both device-bound names
|
||||
(must return true) and system-level names (must return false) before the code ships.
|
||||
- The dead `_fw_gpu_` / `_fw_nvswitch_` / `_inforom_gpu_` patterns were replaced with
|
||||
correct prefix+digit checks (`"gpu" + digit`, `"nic" + digit`) and explicit string checks
|
||||
(`"nvmecontroller"`, `"power supply"`, `"software inventory"`).
|
||||
|
||||
## ADL-020 — Dell TSR device-bound firmware filtered via FQDD; InfiniBand routed to NetworkAdapters
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:** Dell TSR `sysinfo_DCIM_SoftwareIdentity.xml` lists firmware for every installed
|
||||
component. `parseSoftwareIdentityXML` dumped all of these into `hardware.firmware` without
|
||||
filtering, so device-bound entries such as `"Mellanox Network Adapter"` (FQDD `InfiniBand.Slot.1-1`)
|
||||
and `"PERC H755 Front"` (FQDD `RAID.SL.3-1`) appeared in the reanimator export alongside system
|
||||
firmware like BIOS and iDRAC. Confirmed on PowerEdge R6625 (8VS2LG4).
|
||||
|
||||
Additionally, `DCIM_InfiniBandView` was not handled in the parser switch, so Mellanox ConnectX-6
|
||||
appeared only as a PCIe device with `model: "16x or x16"` (from `DataBusWidth` fallback).
|
||||
`parseControllerView` called `addFirmware` with description `"storage controller"` instead of the
|
||||
FQDD, so the FQDD-based filter in the exporter could not remove it.
|
||||
|
||||
**Decision:**
|
||||
1. `isDeviceBoundFirmwareFQDD` extended with `"infiniband."` and `"fc."` prefixes; `"raid.backplane."`
|
||||
broadened to `"raid."` to cover `RAID.SL.*`, `RAID.Integrated.*`, etc.
|
||||
2. `DCIM_InfiniBandView` routed to `parseNICView` → device appears as `NetworkAdapter` with correct
|
||||
firmware, MAC address, and VendorID/DeviceID.
|
||||
3. `"InfiniBand."` added to `pcieFQDDNoisePrefix` to suppress the duplicate `DCIM_PCIDeviceView`
|
||||
entry (DataBusWidth-only, no useful data).
|
||||
4. `parseControllerView` now passes `fqdd` as the `addFirmware` description so the FQDD filter
|
||||
removes the entry in the exporter.
|
||||
5. `parsePCIeDeviceView` now prioritises `props["description"]` (chip model, e.g. `"MT28908 Family
|
||||
[ConnectX-6]"`) over `props["devicedescription"]` (location string) for `pcie.Description`.
|
||||
6. `convertPCIeDevices` model fallback order: `PartNumber → Description → DeviceClass`.
|
||||
|
||||
**Consequences:**
|
||||
- `hardware.firmware` contains only system-level entries; NIC/RAID/storage-controller firmware
|
||||
lives on the respective device record.
|
||||
- `TestParseDellInfiniBandView` and `TestIsDeviceBoundFirmwareFQDD` guard the regression.
|
||||
- Any future Dell TSR device class whose FQDD prefix is not yet in the prefix list may still leak;
|
||||
extend `isDeviceBoundFirmwareFQDD` and add a test case when encountered.
|
||||
|
||||
---
|
||||
|
||||
## ADL-021 — pci.ids enrichment: chip model and vendor resolved from PCI IDs when source data is generic or missing
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
Dell TSR `DCIM_InfiniBandView.ProductName` reports a generic marketing name ("Mellanox Network
|
||||
Adapter") instead of the precise chip identifier ("MT28908 Family [ConnectX-6]"). The actual
|
||||
chip model is available in `pci.ids` by VendorID:DeviceID (15B3:101B). Vendor name may also be
|
||||
absent when no `VendorName` / `Manufacturer` property is present.
|
||||
|
||||
The general rule was established: *if model is not found in source data but PCI IDs are known,
|
||||
resolve model from `pci.ids`*. This rule applies broadly across all export paths.
|
||||
|
||||
**Decision (two-layer enrichment):**
|
||||
1. **Parser layer (Dell, `parseNICView`):** When `VendorID != 0 && DeviceID != 0`, prefer
|
||||
`pciids.DeviceName(vendorID, deviceID)` over the product name from logs. This makes the chip
|
||||
identifier the primary model for NIC/InfiniBand adapters (more specific than marketing name).
|
||||
Fill `Vendor` from `pciids.VendorName(vendorID)` when the vendor field is otherwise empty.
|
||||
Same fallback applied in `parsePCIeDeviceView` for empty `Description`.
|
||||
2. **Exporter layer (`convertPCIeFromDevices`):** General rule — when `d.Model == ""` after all
|
||||
legacy fallbacks and `VendorID != 0 && DeviceID != 0`, set `model = pciids.DeviceName(...)`.
|
||||
Also fill empty `manufacturer` from `pciids.VendorName(...)`. This covers all parsers/sources.
|
||||
|
||||
**Consequences:**
|
||||
- Mellanox InfiniBand slot now reports `model: "MT28908 Family [ConnectX-6]"` and
|
||||
`manufacturer: "Mellanox Technologies"` in the reanimator export.
|
||||
- For NICs where pci.ids has no entry, the original product name is kept (pci.ids returns "").
|
||||
- `TestParseDellInfiniBandView` asserts the model and vendor from pci.ids.
|
||||
|
||||
---
|
||||
|
||||
## ADL-022 — CPUAffinity parsed into NUMANode for PCIe, NIC, and controller devices
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
Dell TSR DCIM view classes report `CPUAffinity` for NIC, InfiniBand, PCIe, and controller
|
||||
devices. Values are "1", "2" (NUMA node index), or "Not Applicable" (for devices that bridge
|
||||
both CPUs or have no CPU affinity). This data is needed for topology-aware diagnostics.
|
||||
|
||||
**Decision:**
|
||||
- Add `NUMANode int` (JSON: `"numa_node,omitempty"`) to `models.PCIeDevice`,
|
||||
`models.NetworkAdapter`, `models.HardwareDevice`, and `ReanimatorPCIe`.
|
||||
- Parse from `props["cpuaffinity"]` using `parseIntLoose`: numeric values ("1", "2") map
|
||||
directly; "Not Applicable" returns 0 (omitted via `omitempty`).
|
||||
- Thread through `buildDevicesFromLegacy` (PCIe and NIC sections) and `convertPCIeFromDevices`.
|
||||
- `parseControllerView` also parses CPUAffinity since RAID controllers have NUMA affinity.
|
||||
|
||||
**Consequences:**
|
||||
- `numa_node: 1` or `2` appears in reanimator export for devices with known affinity.
|
||||
- Value 0 / absent means "not reported" — covers both "Not Applicable" and sources that don't
|
||||
provide CPUAffinity at all.
|
||||
- `TestParseDellCPUAffinity` verifies numeric values parsed correctly and "Not Applicable"→0.
|
||||
|
||||
---
|
||||
|
||||
## ADL-023 — Reanimator export must match ingest contract exactly
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
LOGPile's Reanimator export had drifted from the strict ingest contract. It emitted fields that
|
||||
Reanimator does not currently accept (`status_at_collection`, `numa_node`),
|
||||
while missing fields and sections now present in the contract (`hardware.sensors`,
|
||||
`pcie_devices[].mac_addresses`). Memory export rules also diverged from the ingest side: empty or
|
||||
serial-less DIMMs were still exported.
|
||||
|
||||
**Decision:**
|
||||
- Treat the Reanimator ingest contract as the authoritative schema for `GET /api/export/reanimator`.
|
||||
- Emit only fields present in the current upstream contract revision.
|
||||
- Add `hardware.sensors`, `pcie_devices[].mac_addresses`, `pcie_devices[].numa_node`, and
|
||||
upstream-approved component telemetry/health fields.
|
||||
- Leave out fields that are still not part of the upstream contract.
|
||||
- Map internal `source_type=archive` to external `source_type=logfile`.
|
||||
- Skip memory entries that are empty, not present, or missing serial numbers.
|
||||
- Generate CPU and PCIe serials only in the forms allowed by the contract.
|
||||
- Mirror the applied contract in `bible-local/docs/hardware-ingest-contract.md`.
|
||||
|
||||
**Consequences:**
|
||||
- Some previously exported diagnostic fields are intentionally dropped from the Reanimator payload
|
||||
until the upstream contract adds them.
|
||||
- Internal models may retain richer fields than the current export schema.
|
||||
- `hardware.devices` is canonical only after merge with legacy hardware slices; partial parser-owned
|
||||
canonical records must not hide CPUs, memory, storage, NICs, or PSUs still stored in legacy
|
||||
fields.
|
||||
- CSV and Reanimator exports must use the same merged canonical inventory to avoid divergent export
|
||||
contents across surfaces.
|
||||
- Future exporter changes must update both the code and the mirrored contract document together.
|
||||
|
||||
---
|
||||
|
||||
## ADL-024 — Component presence is implicit; Redfish linked metrics are part of replay correctness
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
The upstream ingest contract allows `present`, but current export semantics do not need to send
|
||||
`present=true` for populated components. At the same time, several important Redfish component
|
||||
telemetry fields were only available through linked metric resources such as `ProcessorMetrics`,
|
||||
`MemoryMetrics`, and `DriveMetrics`. Without collecting and replaying these linked documents,
|
||||
live collection and raw snapshot replay still underreported component health fields.
|
||||
|
||||
**Decision:**
|
||||
- Do not serialize `present=true` in Reanimator export. Presence is represented by the presence of
|
||||
the component record itself.
|
||||
- Do not export component records marked `present=false`.
|
||||
- Interpret CPU `firmware` in Reanimator payload as CPU microcode.
|
||||
- Treat Redfish linked metric resources `ProcessorMetrics`, `MemoryMetrics`, `DriveMetrics`,
|
||||
`EnvironmentMetrics`, and generic `Metrics` as part of analyzer correctness when they are linked
|
||||
from component resources.
|
||||
- Replay logic must merge these linked metric resources back into CPU, memory, storage, PCIe, GPU,
|
||||
NIC, and PSU component `Details` the same way live collection expects them to be used.
|
||||
|
||||
**Consequences:**
|
||||
- Reanimator payloads are smaller and avoid redundant `present=true` noise while still excluding
|
||||
empty slots and absent components.
|
||||
- Any future exporter change that reintroduces serialized component presence needs an explicit
|
||||
contract review.
|
||||
- Raw Redfish snapshot completeness now includes linked per-component metric resources, not only
|
||||
top-level inventory members.
|
||||
- CPU microcode is no longer expected in top-level `hardware.firmware`; it belongs on the CPU
|
||||
component record.
|
||||
|
||||
<!-- Add new decisions below this line using the format above -->
|
||||
|
||||
## ADL-025 — Missing serial numbers must remain absent in Reanimator export
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
LOGPile previously generated synthetic serial numbers for components that had no real serial in
|
||||
source data, especially CPUs and PCIe-class devices. This made the payload look richer, but the
|
||||
serials were not authoritative and could mislead downstream consumers. Reanimator can already
|
||||
accept missing serials and generate its own internal fallback identifiers when needed.
|
||||
|
||||
**Decision:**
|
||||
- Do not synthesize fake serial numbers in LOGPile's Reanimator export.
|
||||
- If a component has no real serial in parsed source data, export the serial field as absent.
|
||||
- This applies to CPUs, PCIe devices, GPUs, NICs, and any other component class unless an
|
||||
upstream contract explicitly requires a deterministic exporter-generated identifier.
|
||||
- Any fallback serial generation defined by the upstream contract is ingest-side Reanimator behavior,
|
||||
not LOGPile exporter behavior.
|
||||
|
||||
**Consequences:**
|
||||
- Exported payloads carry only source-backed serial numbers.
|
||||
- Fake identifiers such as `BOARD-...-CPU-...` or synthetic PCIe serials are no longer considered
|
||||
acceptable exporter behavior.
|
||||
- Any future attempt to reintroduce generated serials requires an explicit contract review and a
|
||||
new ADL entry.
|
||||
|
||||
---
|
||||
|
||||
## ADL-026 — Live Redfish collection uses explicit preflight host-power confirmation
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
Live Redfish inventory can be incomplete when the managed host is powered off. At the same time,
|
||||
LOGPile must not silently power on a host without explicit user choice. The collection workflow
|
||||
therefore needs a preflight step that verifies connectivity, shows current host power state to the
|
||||
user, and only powers on the host when the user explicitly chose that path.
|
||||
|
||||
**Decision:**
|
||||
- Add a dedicated live preflight API step before collection starts.
|
||||
- UI first runs connectivity and power-state check, then offers:
|
||||
- collect as-is
|
||||
- power on and collect
|
||||
- if the host is off and the user does not answer within 5 seconds, default to collecting without
|
||||
powering the host on
|
||||
- Redfish collection may power on the host only when the request explicitly sets
|
||||
`power_on_if_host_off=true`
|
||||
- when LOGPile powers on the host for collection, it must try to power the host back off after
|
||||
collection completes
|
||||
- if LOGPile did not power the host on itself, it must never power the host off
|
||||
- all preflight and power-control steps must be logged into the collection log and therefore into
|
||||
the raw-export bundle
|
||||
|
||||
**Consequences:**
|
||||
- Live collection becomes a two-step UX: probe first, collect second.
|
||||
- Raw bundles preserve operator-visible evidence of power-state decisions and power-control attempts.
|
||||
- Power-on failures do not block collection entirely; they only downgrade completeness expectations.
|
||||
|
||||
---
|
||||
|
||||
## ADL-027 — Sensors without numeric readings are not exported
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
Some parsed sensor records carry only a name, unit, or status, but no actual numeric reading. Such
|
||||
records are not useful as telemetry in Reanimator export and create noisy, low-value sensor lists.
|
||||
|
||||
**Decision:**
|
||||
- Do not export temperature, power, fan, or other sensor records unless they carry a real numeric
|
||||
measurement value.
|
||||
- Presence of a sensor name or health/status alone is not sufficient for export.
|
||||
|
||||
**Consequences:**
|
||||
- Exported sensor groups contain only actionable telemetry.
|
||||
- Parsers and collectors may still keep non-numeric sensor artifacts internally for diagnostics, but
|
||||
Reanimator export must filter them out.
|
||||
|
||||
---
|
||||
|
||||
## ADL-028 — Reanimator PCIe export excludes storage endpoints and synthetic serials
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
Some Redfish and archive sources expose NVMe drives both as storage inventory and as PCIe-visible
|
||||
endpoints. Exporting such drives in both `hardware.storage` and `hardware.pcie_devices` creates
|
||||
duplicates without adding useful topology value. At the same time, PCIe-class export still had old
|
||||
fallback behavior that generated synthetic serial numbers when source serials were absent.
|
||||
|
||||
**Decision:**
|
||||
- Export disks and NVMe drives only through `hardware.storage`.
|
||||
- Do not export storage endpoints as `hardware.pcie_devices`, even if the source inventory exposes
|
||||
them as PCIe/NVMe devices.
|
||||
- Keep real PCIe storage controllers such as RAID and HBA adapters in `hardware.pcie_devices`.
|
||||
- Do not synthesize PCIe/GPU/NIC serial numbers in LOGPile; missing serials stay absent.
|
||||
- Treat placeholder names such as `Network Device View` as non-authoritative and prefer resolved
|
||||
device names when stronger data exists.
|
||||
|
||||
**Consequences:**
|
||||
- Reanimator payloads no longer duplicate NVMe drives between storage and PCIe sections.
|
||||
- PCIe export remains topology-focused while storage export remains component-focused.
|
||||
- Missing PCIe-class serials no longer produce fake `BOARD-...-PCIE-...` identifiers.
|
||||
|
||||
---
|
||||
|
||||
## ADL-029 — Local exporter guidance tracks upstream contract v2.7 terminology
|
||||
|
||||
**Date:** 2026-03-15
|
||||
**Context:**
|
||||
The upstream Reanimator hardware ingest contract moved to `v2.7` and clarified several points that
|
||||
matter for LOGPile documentation: ingest-side serial fallback rules, canonical PCIe addressing via
|
||||
`slot`, the optional `event_logs` section, and the shared `manufactured_year_week` field.
|
||||
|
||||
**Decision:**
|
||||
- Keep the local mirrored contract file as an exact copy of the upstream `v2.7` document.
|
||||
- Describe CPU/PCIe serial fallback as Reanimator ingest behavior, not LOGPile exporter behavior.
|
||||
- Treat `pcie_devices.slot` as the canonical address on the LOGPile side as well; `bdf` may remain
|
||||
an internal fallback/dedupe key but is not serialized in the payload.
|
||||
- Export `event_logs` only from normalized parser/collector events that can be mapped to contract
|
||||
sources `host` / `bmc` / `redfish` without synthesizing message content.
|
||||
- Export `manufactured_year_week` only as a reliable passthrough when a parser/collector already
|
||||
extracted a valid `YYYY-Www` value.
|
||||
|
||||
**Consequences:**
|
||||
- Local bible wording no longer conflicts with upstream contract terminology.
|
||||
- Reanimator payloads use contract-native PCIe addressing and no longer expose `bdf` as a parallel
|
||||
coordinate.
|
||||
- LOGPile event export remains strictly source-derived; internal warnings such as LOGPile analysis
|
||||
notes do not leak into Reanimator `event_logs`.
|
||||
|
||||
---
|
||||
|
||||
## ADL-030 — Audit result rendering is delegated to embedded reanimator/chart
|
||||
|
||||
**Date:** 2026-03-16
|
||||
**Context:**
|
||||
LOGPile already owns file upload, Redfish collection, archive parsing, normalization, and
|
||||
Reanimator export. Maintaining a second host-side audit renderer for the same data created
|
||||
presentation drift and duplicated UI logic.
|
||||
|
||||
**Decision:**
|
||||
- Use vendored `reanimator/chart` as the only audit result viewer.
|
||||
- Keep LOGPile responsible for service flows: upload, live collection, batch convert, raw export,
|
||||
Reanimator export, and parse-error reporting.
|
||||
- Render the current dataset by converting it to Reanimator JSON and passing that snapshot to
|
||||
embedded `chart` under `/chart/current`.
|
||||
|
||||
**Consequences:**
|
||||
- Reanimator JSON becomes the single presentation contract for the audit surface.
|
||||
- The host UI becomes a service shell around the viewer instead of maintaining its own
|
||||
field-by-field tabs.
|
||||
- `internal/chart` must be updated explicitly as a git submodule when the viewer changes.
|
||||
|
||||
---
|
||||
|
||||
## ADL-031 — Redfish uses profile-driven acquisition and unified ingest entrypoints
|
||||
|
||||
**Date:** 2026-03-17
|
||||
**Context:**
|
||||
Redfish collection had accumulated platform-specific probing in the shared collector path, while
|
||||
upload and raw-export replay still entered analysis through direct handler branches. This made
|
||||
vendor/model tuning harder to contain and increased regression risk when one topology needed a
|
||||
special acquisition strategy.
|
||||
|
||||
**Decision:**
|
||||
- Introduce `internal/ingest.Service` as the internal source-family entrypoint for archive parsing
|
||||
and Redfish raw replay.
|
||||
- Introduce `internal/collector/redfishprofile/` for Redfish profile matching and modular hooks.
|
||||
- Split Redfish behavior into coordinated phases:
|
||||
- acquisition planning during live collection
|
||||
- analysis hooks during snapshot replay
|
||||
- Use score-based profile matching. If confidence is low, enter fallback acquisition mode and
|
||||
aggregate only safe additive profile probes.
|
||||
- Allow profile modules to provide bounded acquisition tuning hints such as crawl cap, prefetch
|
||||
behavior, and expensive post-probe toggles.
|
||||
- Allow profile modules to own model-specific `CriticalPaths` and bounded `PlanBPaths` so vendor
|
||||
recovery targets stop leaking into the collector core.
|
||||
- Expose Redfish profile matching as structured diagnostics during live collection: logs must
|
||||
contain all module scores, and collect job status must expose active modules for the UI.
|
||||
|
||||
**Consequences:**
|
||||
- Server handlers stop owning parser-vs-replay branching details directly.
|
||||
- Vendor/model-specific Redfish logic gets an explicit module boundary.
|
||||
- Unknown-vendor Redfish collection becomes slower but more complete by design.
|
||||
- Tactical Redfish fixes should move into profile modules instead of widening generic replay logic.
|
||||
- Repo-owned compact fixtures under `internal/collector/redfishprofile/testdata/`, derived from
|
||||
representative raw-export snapshots, are used to lock profile matching and acquisition tuning
|
||||
for known MSI and Supermicro-family shapes.
|
||||
|
||||
---
|
||||
|
||||
## ADL-032 — MSI ghost GPU filter: exclude GPUs with temperature=0 on powered-on host
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:**
|
||||
MSI/AMI BMC caches GPU inventory from the host via Host Interface (in-band). When GPUs are
|
||||
removed without a reboot the old entries remain in `Chassis/GPU*` and
|
||||
`Systems/Self/Processors/GPU*` with `Status.Health: OK, State: Enabled`. The BMC has no
|
||||
out-of-band mechanism to detect physical absence. A physically present GPU always reports
|
||||
an ambient temperature (>0°C) even when idle; a stale cached entry returns `Reading: 0`.
|
||||
|
||||
**Decision:**
|
||||
- Add `EnableMSIGhostGPUFilter` directive (enabled by MSI profile's `refineAnalysis`
|
||||
alongside `EnableProcessorGPUFallback`).
|
||||
- In `collectGPUsFromProcessors`: for each processor GPU, resolve its chassis path and read
|
||||
`Chassis/GPU{n}/Sensors/GPU{n}_Temperature`. If `PowerState=On` and `Reading=0` → skip.
|
||||
- Filter only applies when host is powered on; when host is off all temperatures are 0 and
|
||||
the signal is ambiguous.
|
||||
|
||||
**Consequences:**
|
||||
- Ghost GPUs from previous hardware configurations no longer appear in the inventory.
|
||||
- Filter is MSI-profile-owned and does not affect HGX, Supermicro, or generic paths.
|
||||
- Any new MSI GPU chassis that uses a different temperature sensor path will bypass the filter
|
||||
(safe default: include rather than wrongly exclude).
|
||||
|
||||
---
|
||||
|
||||
## ADL-033 — Reanimator export collected_at uses inventory LastModifiedTime with 30-day fallback
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:**
|
||||
For Redfish sources the BMC Manager `DateTime` reflects when the BMC clock read the time, not
|
||||
when the hardware inventory was last known-good. `InventoryData/Status.LastModifiedTime`
|
||||
(AMI/MSI OEM endpoint) records the actual timestamp of the last successful host-pushed
|
||||
inventory cycle and is a better proxy for "when was this hardware configuration last confirmed".
|
||||
|
||||
**Decision:**
|
||||
- `inferInventoryLastModifiedTime` reads `LastModifiedTime` from the snapshot and sets
|
||||
`AnalysisResult.InventoryLastModifiedAt`.
|
||||
- `reanimatorCollectedAt()` in the exporter selects `InventoryLastModifiedAt` when it is set
|
||||
and no older than 30 days; otherwise falls back to `CollectedAt`.
|
||||
- Fallback rationale: inventory older than 30 days is likely from a long-running server with
|
||||
no recent reboot; using the actual collection date is more useful for the downstream consumer.
|
||||
- The inventory timestamp is also logged during replay and live collection for diagnostics.
|
||||
|
||||
**Consequences:**
|
||||
- Reanimator export `collected_at` reflects the last confirmed inventory cycle on AMI/MSI BMCs.
|
||||
- On non-AMI BMCs or when `InventoryData/Status` is absent, behavior is unchanged.
|
||||
- If inventory is stale (>30 days), collection date is used as before.
|
||||
|
||||
---
|
||||
|
||||
## ADL-034 — Redfish inventory invalidated before host power-on
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:**
|
||||
When a host is powered on by the collector (`power_on_if_host_off=true`), the BMC still holds
|
||||
inventory from the previous boot. If hardware changed between shutdowns, the new boot will push
|
||||
fresh inventory — but only if the BMC accepts it (CRC mismatch triggers re-population). Without
|
||||
explicit invalidation, unchanged CRCs can cause the BMC to skip re-processing even after a
|
||||
hardware change.
|
||||
|
||||
**Decision:**
|
||||
- Before any power-on attempt, `invalidateRedfishInventory` POSTs to
|
||||
`{systemPath}/Oem/Ami/Inventory/Crc` with all groups zeroed (`CPU`, `DIMM`, `PCIE`,
|
||||
`CERTIFICATES`, `SECUREBOOT`).
|
||||
- Best-effort: a 404/405 response (non-AMI BMC) is logged and silently ignored.
|
||||
- The invalidation is logged at `INFO` level and surfaced as a collect progress message.
|
||||
|
||||
**Consequences:**
|
||||
- On AMI/MSI BMCs: the next boot will push a full fresh inventory regardless of whether
|
||||
CRCs appear unchanged, eliminating ghost components from prior hardware configurations.
|
||||
- On non-AMI BMCs: the POST fails immediately (endpoint does not exist), nothing changes.
|
||||
- Invalidation runs only when `power_on_if_host_off=true` and host is confirmed off.
|
||||
|
||||
---
|
||||
|
||||
## ADL-035 — Redfish hardware event log collection from Systems LogServices
|
||||
|
||||
**Date:** 2026-03-18
|
||||
**Context:** Redfish BMCs expose event logs via `LogServices/{svc}/Entries`. On MSI/AMI this includes the IPMI SEL with hardware events (temperature, power, drive failures, etc.). Live collection previously collected only inventory/sensor snapshots; event history was unavailable in Reanimator.
|
||||
**Decision:**
|
||||
- After tree-walk, fetch hardware log entries separately via `collectRedfishLogEntries()` (not part of tree-walk to avoid bloat).
|
||||
- Only `Systems/{sys}/LogServices` is queried — Managers LogServices (BMC audit/journal) are excluded.
|
||||
- Log services with Id/Name containing "audit", "journal", "bmc", "security", "manager", "debug" are skipped.
|
||||
- Entries older than 7 days (client-side filter) are discarded. Pages are followed until an out-of-window entry is found (assumes newest-first ordering, typical for BMCs).
|
||||
- Entries with `EntryType: "Oem"` or `MessageId` containing user/auth/login keywords are filtered as non-hardware.
|
||||
- Raw entries stored in `rawPayloads["redfish_log_entries"]` as `[]map[string]interface{}`.
|
||||
- Parsed to `models.Event` in `parseRedfishLogEntries()` during replay — same path for live and offline.
|
||||
- Max 200 entries per log service, 500 total to limit BMC load.
|
||||
**Consequences:**
|
||||
- Hardware event history (last 7 days) visible in Reanimator `EventLogs` section.
|
||||
- No impact on existing inventory pipeline or offline archive replay (archives without `redfish_log_entries` key silently skip parsing).
|
||||
- Adds extra HTTP requests during live collection (sequential, after tree-walk completes).
|
||||
42
bible-local/README.md
Normal file
42
bible-local/README.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# LOGPile Bible
|
||||
|
||||
`bible-local/` is the project-specific source of truth for LOGPile.
|
||||
Keep top-level docs minimal and put maintained architecture/API contracts here.
|
||||
|
||||
## Rules
|
||||
|
||||
- Documentation language: English only
|
||||
- Update relevant bible files in the same change as the code
|
||||
- Record significant architectural decisions in [`10-decisions.md`](10-decisions.md)
|
||||
- Do not duplicate shared rules from `bible/`
|
||||
|
||||
## Read order
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| [01-overview.md](01-overview.md) | Product scope, modes, non-goals |
|
||||
| [02-architecture.md](02-architecture.md) | Runtime structure, state, main flows |
|
||||
| [04-data-models.md](04-data-models.md) | Stable data contracts and canonical inventory |
|
||||
| [03-api.md](03-api.md) | HTTP endpoints and response contracts |
|
||||
| [05-collectors.md](05-collectors.md) | Live collection behavior |
|
||||
| [06-parsers.md](06-parsers.md) | Archive parser framework and vendor coverage |
|
||||
| [07-exporters.md](07-exporters.md) | Raw export, Reanimator export, batch convert |
|
||||
| [docs/hardware-ingest-contract.md](docs/hardware-ingest-contract.md) | Reanimator ingest schema mirrored locally |
|
||||
| [08-build-release.md](08-build-release.md) | Build and release workflow |
|
||||
| [09-testing.md](09-testing.md) | Test expectations and regression rules |
|
||||
| [10-decisions.md](10-decisions.md) | Architectural Decision Log |
|
||||
|
||||
## Fast orientation
|
||||
|
||||
- Entry point: `cmd/logpile/main.go`
|
||||
- HTTP layer: `internal/server/`
|
||||
- Core contracts: `internal/models/models.go`
|
||||
- Live collection: `internal/collector/`
|
||||
- Archive parsing: `internal/parser/`
|
||||
- Export conversion: `internal/exporter/`
|
||||
- Frontend consumer: `web/static/js/app.js`
|
||||
|
||||
## Maintenance rule
|
||||
|
||||
If a document becomes stale, either fix it immediately or delete it.
|
||||
Stale docs are worse than missing docs.
|
||||
793
bible-local/docs/hardware-ingest-contract.md
Normal file
793
bible-local/docs/hardware-ingest-contract.md
Normal file
@@ -0,0 +1,793 @@
|
||||
---
|
||||
title: Hardware Ingest JSON Contract
|
||||
version: "2.7"
|
||||
updated: "2026-03-15"
|
||||
maintainer: Reanimator Core
|
||||
audience: external-integrators, ai-agents
|
||||
language: ru
|
||||
---
|
||||
|
||||
# Интеграция с Reanimator: контракт JSON-импорта аппаратного обеспечения
|
||||
|
||||
Версия: **2.7** · Дата: **2026-03-15**
|
||||
|
||||
Документ описывает формат JSON для передачи данных об аппаратном обеспечении серверов в систему **Reanimator** (управление жизненным циклом аппаратного обеспечения).
|
||||
Предназначен для разработчиков смежных систем (Redfish-коллекторов, агентов мониторинга, CMDB-экспортёров) и может быть включён в документацию интегрируемых проектов.
|
||||
|
||||
> Актуальная версия документа: https://git.mchus.pro/reanimator/core/src/branch/main/bible-local/docs/hardware-ingest-contract.md
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Версия | Дата | Изменения |
|
||||
|--------|------|-----------|
|
||||
| 2.7 | 2026-03-15 | Явно запрещён синтез данных в `event_logs`; интеграторы не должны придумывать серийные номера компонентов, если источник их не отдал |
|
||||
| 2.6 | 2026-03-15 | Добавлена необязательная секция `event_logs` для dedup/upsert логов `host` / `bmc` / `redfish` вне history timeline |
|
||||
| 2.5 | 2026-03-15 | Добавлено общее необязательное поле `manufactured_year_week` для компонентных секций (`YYYY-Www`) |
|
||||
| 2.4 | 2026-03-15 | Добавлена первая волна component telemetry: health/life поля для `cpus`, `memory`, `storage`, `pcie_devices`, `power_supplies` |
|
||||
| 2.3 | 2026-03-15 | Добавлены component telemetry поля: `pcie_devices.temperature_c`, `pcie_devices.power_w`, `power_supplies.temperature_c` |
|
||||
| 2.2 | 2026-03-15 | Добавлено поле `numa_node` у `pcie_devices` для topology/affinity |
|
||||
| 2.1 | 2026-03-15 | Добавлена секция `sensors` (fans, power, temperatures, other); поле `mac_addresses` у `pcie_devices`; расширен список значений `device_class` |
|
||||
| 2.0 | 2026-02-01 | История статусов (`status_history`, `status_changed_at`); поля telemetry у PSU; async job response |
|
||||
| 1.0 | 2026-01-01 | Начальная версия контракта |
|
||||
|
||||
---
|
||||
|
||||
## Принципы
|
||||
|
||||
1. **Snapshot** — JSON описывает состояние сервера на момент сбора. Может включать историю изменений статуса компонентов.
|
||||
2. **Идемпотентность** — повторная отправка идентичного payload не создаёт дублей (дедупликация по хешу).
|
||||
3. **Частичность** — можно передавать только те секции, данные по которым доступны. Пустой массив и отсутствие секции эквивалентны.
|
||||
4. **Строгая схема** — endpoint использует строгий JSON-декодер; неизвестные поля приводят к `400 Bad Request`.
|
||||
5. **Event-driven** — импорт создаёт события в timeline (LOG_COLLECTED, INSTALLED, REMOVED, FIRMWARE_CHANGED и др.).
|
||||
6. **Без синтеза со стороны интегратора** — сборщик передаёт только фактически собранные значения. Нельзя придумывать `serial_number`, `component_ref`, `message`, `message_id` или другие идентификаторы/атрибуты, если источник их не предоставил или парсер не смог их надёжно извлечь.
|
||||
|
||||
---
|
||||
|
||||
## Endpoint
|
||||
|
||||
```
|
||||
POST /ingest/hardware
|
||||
Content-Type: application/json
|
||||
```
|
||||
|
||||
**Ответ при приёме (202 Accepted):**
|
||||
```json
|
||||
{
|
||||
"status": "accepted",
|
||||
"job_id": "job_01J..."
|
||||
}
|
||||
```
|
||||
|
||||
Импорт выполняется асинхронно. Результат доступен по:
|
||||
```
|
||||
GET /ingest/hardware/jobs/{job_id}
|
||||
```
|
||||
|
||||
**Ответ при успехе задачи:**
|
||||
```json
|
||||
{
|
||||
"status": "success",
|
||||
"bundle_id": "lb_01J...",
|
||||
"asset_id": "mach_01J...",
|
||||
"collected_at": "2026-02-10T15:30:00Z",
|
||||
"duplicate": false,
|
||||
"summary": {
|
||||
"parts_observed": 15,
|
||||
"parts_created": 2,
|
||||
"parts_updated": 13,
|
||||
"installations_created": 2,
|
||||
"installations_closed": 1,
|
||||
"timeline_events_created": 9,
|
||||
"failure_events_created": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Ответ при дубликате:**
|
||||
```json
|
||||
{
|
||||
"status": "success",
|
||||
"duplicate": true,
|
||||
"message": "LogBundle with this content hash already exists"
|
||||
}
|
||||
```
|
||||
|
||||
**Ответ при ошибке (400 Bad Request):**
|
||||
```json
|
||||
{
|
||||
"status": "error",
|
||||
"error": "validation_failed",
|
||||
"details": {
|
||||
"field": "hardware.board.serial_number",
|
||||
"message": "serial_number is required"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Частые причины `400`:
|
||||
- Неверный формат `collected_at` (требуется RFC3339).
|
||||
- Пустой `hardware.board.serial_number`.
|
||||
- Наличие неизвестного JSON-поля на любом уровне.
|
||||
- Тело запроса превышает допустимый размер.
|
||||
|
||||
---
|
||||
|
||||
## Структура верхнего уровня
|
||||
|
||||
```json
|
||||
{
|
||||
"filename": "redfish://10.10.10.103",
|
||||
"source_type": "api",
|
||||
"protocol": "redfish",
|
||||
"target_host": "10.10.10.103",
|
||||
"collected_at": "2026-02-10T15:30:00Z",
|
||||
"hardware": {
|
||||
"board": { ... },
|
||||
"firmware": [ ... ],
|
||||
"cpus": [ ... ],
|
||||
"memory": [ ... ],
|
||||
"storage": [ ... ],
|
||||
"pcie_devices": [ ... ],
|
||||
"power_supplies": [ ... ],
|
||||
"sensors": { ... },
|
||||
"event_logs": [ ... ]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Поля верхнего уровня
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `collected_at` | string RFC3339 | **да** | Время сбора данных |
|
||||
| `hardware` | object | **да** | Аппаратный снапшот |
|
||||
| `hardware.board.serial_number` | string | **да** | Серийный номер платы/сервера |
|
||||
| `target_host` | string | нет | IP или hostname |
|
||||
| `source_type` | string | нет | Тип источника: `api`, `logfile`, `manual` |
|
||||
| `protocol` | string | нет | Протокол: `redfish`, `ipmi`, `snmp`, `ssh` |
|
||||
| `filename` | string | нет | Идентификатор источника |
|
||||
|
||||
---
|
||||
|
||||
## Общие поля статуса компонентов
|
||||
|
||||
Применяются ко всем компонентным секциям (`cpus`, `memory`, `storage`, `pcie_devices`, `power_supplies`).
|
||||
|
||||
| Поле | Тип | Описание |
|
||||
|------|-----|----------|
|
||||
| `status` | string | Текущий статус: `OK`, `Warning`, `Critical`, `Unknown`, `Empty` |
|
||||
| `status_checked_at` | string RFC3339 | Время последней проверки статуса |
|
||||
| `status_changed_at` | string RFC3339 | Время последнего изменения статуса |
|
||||
| `status_history` | array | История переходов статусов (см. ниже) |
|
||||
| `error_description` | string | Текст ошибки/диагностики |
|
||||
| `manufactured_year_week` | string | Дата производства в формате `YYYY-Www`, например `2024-W07` |
|
||||
|
||||
**Объект `status_history[]`:**
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `status` | string | **да** | Статус в этот момент |
|
||||
| `changed_at` | string RFC3339 | **да** | Время перехода (без этого поля запись игнорируется) |
|
||||
| `details` | string | нет | Пояснение к переходу |
|
||||
|
||||
**Правила приоритета времени события:**
|
||||
|
||||
1. `status_changed_at`
|
||||
2. Последняя запись `status_history` с совпадающим статусом
|
||||
3. Последняя парсируемая запись `status_history`
|
||||
4. `status_checked_at`
|
||||
|
||||
**Правила передачи статусов:**
|
||||
- Передавайте `status` как текущее состояние компонента в snapshot.
|
||||
- Если источник хранит историю — передавайте `status_history` отсортированным по `changed_at` по возрастанию.
|
||||
- Не включайте записи `status_history` без `changed_at`.
|
||||
- Все даты — RFC3339, рекомендуется UTC (`Z`).
|
||||
- `manufactured_year_week` используйте, когда источник знает только год и неделю производства, без точной календарной даты.
|
||||
|
||||
---
|
||||
|
||||
## Секции hardware
|
||||
|
||||
### board
|
||||
|
||||
Основная информация о сервере. Обязательная секция.
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `serial_number` | string | **да** | Серийный номер (ключ идентификации Asset) |
|
||||
| `manufacturer` | string | нет | Производитель |
|
||||
| `product_name` | string | нет | Модель |
|
||||
| `part_number` | string | нет | Партномер |
|
||||
| `uuid` | string | нет | UUID системы |
|
||||
|
||||
Значения `"NULL"` в строковых полях трактуются как отсутствие данных.
|
||||
|
||||
```json
|
||||
"board": {
|
||||
"manufacturer": "Supermicro",
|
||||
"product_name": "X12DPG-QT6",
|
||||
"serial_number": "21D634101",
|
||||
"part_number": "X12DPG-QT6-REV1.01",
|
||||
"uuid": "d7ef2fe5-2fd0-11f0-910a-346f11040868"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### firmware
|
||||
|
||||
Версии прошивок системных компонентов (BIOS, BMC, CPLD и др.).
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `device_name` | string | **да** | Название устройства (`BIOS`, `BMC`, `CPLD`, …) |
|
||||
| `version` | string | **да** | Версия прошивки |
|
||||
|
||||
Записи с пустым `device_name` или `version` игнорируются.
|
||||
Изменение версии создаёт событие `FIRMWARE_CHANGED` для Asset.
|
||||
|
||||
```json
|
||||
"firmware": [
|
||||
{ "device_name": "BIOS", "version": "06.08.05" },
|
||||
{ "device_name": "BMC", "version": "5.17.00" },
|
||||
{ "device_name": "CPLD", "version": "01.02.03" }
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### cpus
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `socket` | int | **да** | Номер сокета (используется для генерации serial) |
|
||||
| `model` | string | нет | Модель процессора |
|
||||
| `manufacturer` | string | нет | Производитель |
|
||||
| `cores` | int | нет | Количество ядер |
|
||||
| `threads` | int | нет | Количество потоков |
|
||||
| `frequency_mhz` | int | нет | Текущая частота |
|
||||
| `max_frequency_mhz` | int | нет | Максимальная частота |
|
||||
| `temperature_c` | float | нет | Температура CPU, °C (telemetry) |
|
||||
| `power_w` | float | нет | Текущая мощность CPU, Вт (telemetry) |
|
||||
| `throttled` | bool | нет | Зафиксирован thermal/power throttling |
|
||||
| `correctable_error_count` | int | нет | Количество корректируемых ошибок CPU |
|
||||
| `uncorrectable_error_count` | int | нет | Количество некорректируемых ошибок CPU |
|
||||
| `life_remaining_pct` | float | нет | Остаточный ресурс / health, % |
|
||||
| `life_used_pct` | float | нет | Использованный ресурс / wear, % |
|
||||
| `serial_number` | string | нет | Серийный номер (если доступен) |
|
||||
| `firmware` | string | нет | Версия микрокода; если логгер отдает `Microcode level`, передавайте его сюда как есть |
|
||||
| `present` | bool | нет | Наличие (по умолчанию `true`) |
|
||||
| + общие поля статуса | | | см. раздел выше |
|
||||
|
||||
**Генерация serial_number при отсутствии:** `{board_serial}-CPU-{socket}`
|
||||
|
||||
Если источник использует поле/лейбл `Microcode level`, его значение передавайте в `cpus[].firmware` без дополнительного преобразования.
|
||||
|
||||
```json
|
||||
"cpus": [
|
||||
{
|
||||
"socket": 0,
|
||||
"model": "INTEL(R) XEON(R) GOLD 6530",
|
||||
"cores": 32,
|
||||
"threads": 64,
|
||||
"frequency_mhz": 2100,
|
||||
"max_frequency_mhz": 4000,
|
||||
"temperature_c": 61.5,
|
||||
"power_w": 182.0,
|
||||
"throttled": false,
|
||||
"manufacturer": "Intel",
|
||||
"status": "OK",
|
||||
"status_checked_at": "2026-02-10T15:28:00Z"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### memory
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `slot` | string | нет | Идентификатор слота |
|
||||
| `present` | bool | нет | Наличие модуля (по умолчанию `true`) |
|
||||
| `serial_number` | string | нет | Серийный номер |
|
||||
| `part_number` | string | нет | Партномер (используется как модель) |
|
||||
| `manufacturer` | string | нет | Производитель |
|
||||
| `size_mb` | int | нет | Объём в МБ |
|
||||
| `type` | string | нет | Тип: `DDR3`, `DDR4`, `DDR5`, … |
|
||||
| `max_speed_mhz` | int | нет | Максимальная частота |
|
||||
| `current_speed_mhz` | int | нет | Текущая частота |
|
||||
| `temperature_c` | float | нет | Температура DIMM/модуля, °C (telemetry) |
|
||||
| `correctable_ecc_error_count` | int | нет | Количество корректируемых ECC-ошибок |
|
||||
| `uncorrectable_ecc_error_count` | int | нет | Количество некорректируемых ECC-ошибок |
|
||||
| `life_remaining_pct` | float | нет | Остаточный ресурс / health, % |
|
||||
| `life_used_pct` | float | нет | Использованный ресурс / wear, % |
|
||||
| `spare_blocks_remaining_pct` | float | нет | Остаток spare blocks, % |
|
||||
| `performance_degraded` | bool | нет | Зафиксирована деградация производительности |
|
||||
| `data_loss_detected` | bool | нет | Источник сигнализирует риск/факт потери данных |
|
||||
| + общие поля статуса | | | см. раздел выше |
|
||||
|
||||
Модуль без `serial_number` игнорируется. Модуль с `present=false` или `status=Empty` игнорируется.
|
||||
|
||||
```json
|
||||
"memory": [
|
||||
{
|
||||
"slot": "CPU0_C0D0",
|
||||
"present": true,
|
||||
"size_mb": 32768,
|
||||
"type": "DDR5",
|
||||
"max_speed_mhz": 4800,
|
||||
"current_speed_mhz": 4800,
|
||||
"temperature_c": 43.0,
|
||||
"correctable_ecc_error_count": 0,
|
||||
"manufacturer": "Hynix",
|
||||
"serial_number": "80AD032419E17CEEC1",
|
||||
"part_number": "HMCG88AGBRA191N",
|
||||
"status": "OK"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### storage
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `slot` | string | нет | Канонический адрес установки PCIe-устройства; передавайте BDF (`0000:18:00.0`) |
|
||||
| `serial_number` | string | нет | Серийный номер |
|
||||
| `model` | string | нет | Модель |
|
||||
| `manufacturer` | string | нет | Производитель |
|
||||
| `type` | string | нет | Тип: `NVMe`, `SSD`, `HDD` |
|
||||
| `interface` | string | нет | Интерфейс: `NVMe`, `SATA`, `SAS` |
|
||||
| `size_gb` | int | нет | Размер в ГБ |
|
||||
| `temperature_c` | float | нет | Температура накопителя, °C (telemetry) |
|
||||
| `power_on_hours` | int64 | нет | Время работы, часы |
|
||||
| `power_cycles` | int64 | нет | Количество циклов питания |
|
||||
| `unsafe_shutdowns` | int64 | нет | Нештатные выключения |
|
||||
| `media_errors` | int64 | нет | Ошибки носителя / media errors |
|
||||
| `error_log_entries` | int64 | нет | Количество записей в error log |
|
||||
| `written_bytes` | int64 | нет | Всего записано байт |
|
||||
| `read_bytes` | int64 | нет | Всего прочитано байт |
|
||||
| `life_used_pct` | float | нет | Использованный ресурс / wear, % |
|
||||
| `life_remaining_pct` | float | нет | Остаточный ресурс / health, % |
|
||||
| `available_spare_pct` | float | нет | Доступный spare, % |
|
||||
| `reallocated_sectors` | int64 | нет | Переназначенные сектора |
|
||||
| `current_pending_sectors` | int64 | нет | Сектора в ожидании ремапа |
|
||||
| `offline_uncorrectable` | int64 | нет | Некорректируемые ошибки offline scan |
|
||||
| `firmware` | string | нет | Версия прошивки |
|
||||
| `present` | bool | нет | Наличие (по умолчанию `true`) |
|
||||
| + общие поля статуса | | | см. раздел выше |
|
||||
|
||||
Диск без `serial_number` игнорируется. Изменение `firmware` создаёт событие `FIRMWARE_CHANGED`.
|
||||
|
||||
```json
|
||||
"storage": [
|
||||
{
|
||||
"slot": "OB01",
|
||||
"type": "NVMe",
|
||||
"model": "INTEL SSDPF2KX076T1",
|
||||
"size_gb": 7680,
|
||||
"temperature_c": 38.5,
|
||||
"power_on_hours": 12450,
|
||||
"unsafe_shutdowns": 3,
|
||||
"written_bytes": 9876543210,
|
||||
"life_remaining_pct": 91.0,
|
||||
"serial_number": "BTAX41900GF87P6DGN",
|
||||
"manufacturer": "Intel",
|
||||
"firmware": "9CV10510",
|
||||
"interface": "NVMe",
|
||||
"present": true,
|
||||
"status": "OK"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### pcie_devices
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `slot` | string | нет | Идентификатор слота |
|
||||
| `vendor_id` | int | нет | PCI Vendor ID (decimal) |
|
||||
| `device_id` | int | нет | PCI Device ID (decimal) |
|
||||
| `numa_node` | int | нет | NUMA node / CPU affinity устройства |
|
||||
| `temperature_c` | float | нет | Температура устройства, °C (telemetry) |
|
||||
| `power_w` | float | нет | Текущее энергопотребление устройства, Вт (telemetry) |
|
||||
| `life_remaining_pct` | float | нет | Остаточный ресурс / health, % |
|
||||
| `life_used_pct` | float | нет | Использованный ресурс / wear, % |
|
||||
| `ecc_corrected_total` | int64 | нет | Всего корректируемых ECC-ошибок |
|
||||
| `ecc_uncorrected_total` | int64 | нет | Всего некорректируемых ECC-ошибок |
|
||||
| `hw_slowdown` | bool | нет | Устройство вошло в hardware slowdown / protective mode |
|
||||
| `battery_charge_pct` | float | нет | Заряд батареи / supercap, % |
|
||||
| `battery_health_pct` | float | нет | Состояние батареи / supercap, % |
|
||||
| `battery_temperature_c` | float | нет | Температура батареи / supercap, °C |
|
||||
| `battery_voltage_v` | float | нет | Напряжение батареи / supercap, В |
|
||||
| `battery_replace_required` | bool | нет | Требуется замена батареи / supercap |
|
||||
| `sfp_temperature_c` | float | нет | Температура SFP/optic, °C |
|
||||
| `sfp_tx_power_dbm` | float | нет | TX optical power, dBm |
|
||||
| `sfp_rx_power_dbm` | float | нет | RX optical power, dBm |
|
||||
| `sfp_voltage_v` | float | нет | Напряжение SFP, В |
|
||||
| `sfp_bias_ma` | float | нет | Bias current SFP, мА |
|
||||
| `bdf` | string | нет | Deprecated alias для `slot`; при наличии ingest нормализует его в `slot` |
|
||||
| `device_class` | string | нет | Класс устройства (см. список ниже) |
|
||||
| `manufacturer` | string | нет | Производитель |
|
||||
| `model` | string | нет | Модель |
|
||||
| `serial_number` | string | нет | Серийный номер |
|
||||
| `firmware` | string | нет | Версия прошивки |
|
||||
| `link_width` | int | нет | Текущая ширина линка |
|
||||
| `link_speed` | string | нет | Текущая скорость: `Gen3`, `Gen4`, `Gen5` |
|
||||
| `max_link_width` | int | нет | Максимальная ширина линка |
|
||||
| `max_link_speed` | string | нет | Максимальная скорость |
|
||||
| `mac_addresses` | string[] | нет | MAC-адреса портов (для сетевых устройств) |
|
||||
| `present` | bool | нет | Наличие (по умолчанию `true`) |
|
||||
| + общие поля статуса | | | см. раздел выше |
|
||||
|
||||
`numa_node` передавайте для NIC / InfiniBand / RAID / GPU, когда источник знает CPU/NUMA affinity. Поле сохраняется в snapshot-атрибутах PCIe-компонента и дублируется в telemetry для topology use cases.
|
||||
Поля `temperature_c` и `power_w` используйте для device-level telemetry GPU / accelerator / smart PCIe devices. Они не влияют на идентификацию компонента.
|
||||
|
||||
**Генерация serial_number при отсутствии или `"N/A"`:** `{board_serial}-PCIE-{slot}`, где `slot` для PCIe равен BDF.
|
||||
|
||||
`slot` — единственный канонический адрес компонента. Для PCIe в `slot` передавайте BDF. Поле `bdf` сохраняется только как переходный alias на входе и не должно использоваться как отдельная координата рядом со `slot`.
|
||||
|
||||
**Значения `device_class`:**
|
||||
|
||||
| Значение | Назначение |
|
||||
|----------|------------|
|
||||
| `MassStorageController` | RAID-контроллеры |
|
||||
| `StorageController` | HBA, SAS-контроллеры |
|
||||
| `NetworkController` | Сетевые адаптеры (InfiniBand, общий) |
|
||||
| `EthernetController` | Ethernet NIC |
|
||||
| `FibreChannelController` | Fibre Channel HBA |
|
||||
| `VideoController` | GPU, видеокарты |
|
||||
| `ProcessingAccelerator` | Вычислительные ускорители (AI/ML) |
|
||||
| `DisplayController` | Контроллеры дисплея (BMC VGA) |
|
||||
|
||||
Список открытый: допускаются произвольные строки для нестандартных классов.
|
||||
|
||||
```json
|
||||
"pcie_devices": [
|
||||
{
|
||||
"slot": "0000:3b:00.0",
|
||||
"vendor_id": 5555,
|
||||
"device_id": 4401,
|
||||
"numa_node": 0,
|
||||
"temperature_c": 48.5,
|
||||
"power_w": 18.2,
|
||||
"sfp_temperature_c": 36.2,
|
||||
"sfp_tx_power_dbm": -1.8,
|
||||
"sfp_rx_power_dbm": -2.1,
|
||||
"device_class": "EthernetController",
|
||||
"manufacturer": "Intel",
|
||||
"model": "X710 10GbE",
|
||||
"serial_number": "K65472-003",
|
||||
"firmware": "9.20 0x8000d4ae",
|
||||
"mac_addresses": ["3c:fd:fe:aa:bb:cc", "3c:fd:fe:aa:bb:cd"],
|
||||
"status": "OK"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### power_supplies
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `slot` | string | нет | Идентификатор слота |
|
||||
| `present` | bool | нет | Наличие (по умолчанию `true`) |
|
||||
| `serial_number` | string | нет | Серийный номер |
|
||||
| `part_number` | string | нет | Партномер |
|
||||
| `model` | string | нет | Модель |
|
||||
| `vendor` | string | нет | Производитель |
|
||||
| `wattage_w` | int | нет | Мощность в ваттах |
|
||||
| `firmware` | string | нет | Версия прошивки |
|
||||
| `input_type` | string | нет | Тип входа (например `ACWideRange`) |
|
||||
| `input_voltage` | float | нет | Входное напряжение, В (telemetry) |
|
||||
| `input_power_w` | float | нет | Входная мощность, Вт (telemetry) |
|
||||
| `output_power_w` | float | нет | Выходная мощность, Вт (telemetry) |
|
||||
| `temperature_c` | float | нет | Температура PSU, °C (telemetry) |
|
||||
| `life_remaining_pct` | float | нет | Остаточный ресурс / health, % |
|
||||
| `life_used_pct` | float | нет | Использованный ресурс / wear, % |
|
||||
| + общие поля статуса | | | см. раздел выше |
|
||||
|
||||
Поля telemetry (`input_voltage`, `input_power_w`, `output_power_w`, `temperature_c`, `life_remaining_pct`, `life_used_pct`) сохраняются в атрибутах компонента и не влияют на его идентификацию.
|
||||
|
||||
PSU без `serial_number` игнорируется.
|
||||
|
||||
```json
|
||||
"power_supplies": [
|
||||
{
|
||||
"slot": "0",
|
||||
"present": true,
|
||||
"model": "GW-CRPS3000LW",
|
||||
"vendor": "Great Wall",
|
||||
"wattage_w": 3000,
|
||||
"serial_number": "2P06C102610",
|
||||
"firmware": "00.03.05",
|
||||
"status": "OK",
|
||||
"input_type": "ACWideRange",
|
||||
"input_power_w": 137,
|
||||
"output_power_w": 104,
|
||||
"input_voltage": 215.25,
|
||||
"temperature_c": 39.5,
|
||||
"life_remaining_pct": 97.0
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### sensors
|
||||
|
||||
Показания сенсоров сервера. Секция опциональная, не привязана к компонентам.
|
||||
Данные хранятся как последнее известное значение (last-known-value) на уровне Asset.
|
||||
|
||||
```json
|
||||
"sensors": {
|
||||
"fans": [ ... ],
|
||||
"power": [ ... ],
|
||||
"temperatures": [ ... ],
|
||||
"other": [ ... ]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### event_logs
|
||||
|
||||
Нормализованные операционные логи сервера из `host`, `bmc` или `redfish`.
|
||||
|
||||
Эти записи не попадают в history timeline и не создают history events. Они сохраняются в отдельной deduplicated log store и отображаются в отдельном UI-блоке asset logs / host logs.
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `source` | string | **да** | Источник лога: `host`, `bmc`, `redfish` |
|
||||
| `event_time` | string RFC3339 | нет | Время события из источника; если отсутствует, используется время ingest/collection |
|
||||
| `severity` | string | нет | Уровень: `OK`, `Info`, `Warning`, `Critical`, `Unknown` |
|
||||
| `message_id` | string | нет | Идентификатор/код события источника |
|
||||
| `message` | string | **да** | Нормализованный текст события |
|
||||
| `component_ref` | string | нет | Ссылка на компонент/устройство/слот, если извлекается |
|
||||
| `fingerprint` | string | нет | Внешний готовый dedup-key; если не передан, система вычисляет свой |
|
||||
| `is_active` | bool | нет | Признак, что событие всё ещё активно/не погашено, если источник умеет lifecycle |
|
||||
| `raw_payload` | object | нет | Сырой vendor-specific payload для диагностики |
|
||||
|
||||
**Правила event_logs:**
|
||||
- Логи дедуплицируются в рамках asset + source + fingerprint.
|
||||
- Если `fingerprint` не передан, система строит его из нормализованных полей (`source`, `message_id`, `message`, `component_ref`, временная нормализация).
|
||||
- Интегратор/сборщик логов не должен синтезировать содержимое событий: не придумывайте `message`, `message_id`, `component_ref`, serial/device identifiers или иные поля, если они отсутствуют в исходном логе или не были надёжно извлечены.
|
||||
- Повторное получение того же события обновляет `last_seen_at`/счётчик повторов и не должно создавать новый timeline/history event.
|
||||
- `event_logs` используются для отдельного UI-представления логов и не изменяют canonical state компонентов/asset по умолчанию.
|
||||
|
||||
```json
|
||||
"event_logs": [
|
||||
{
|
||||
"source": "bmc",
|
||||
"event_time": "2026-03-15T14:03:11Z",
|
||||
"severity": "Warning",
|
||||
"message_id": "0x000F",
|
||||
"message": "Correctable ECC error threshold exceeded",
|
||||
"component_ref": "CPU0_C0D0",
|
||||
"raw_payload": {
|
||||
"sensor": "DIMM_A1",
|
||||
"sel_record_id": "0042"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "redfish",
|
||||
"event_time": "2026-03-15T14:03:20Z",
|
||||
"severity": "Info",
|
||||
"message_id": "OpenBMC.0.1.SystemReboot",
|
||||
"message": "System reboot requested by administrator",
|
||||
"component_ref": "Mainboard"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
#### sensors.fans
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `name` | string | **да** | Уникальное имя сенсора в рамках секции |
|
||||
| `location` | string | нет | Физическое расположение |
|
||||
| `rpm` | int | нет | Обороты, RPM |
|
||||
| `status` | string | нет | Статус: `OK`, `Warning`, `Critical`, `Unknown` |
|
||||
|
||||
#### sensors.power
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `name` | string | **да** | Уникальное имя сенсора |
|
||||
| `location` | string | нет | Физическое расположение |
|
||||
| `voltage_v` | float | нет | Напряжение, В |
|
||||
| `current_a` | float | нет | Ток, А |
|
||||
| `power_w` | float | нет | Мощность, Вт |
|
||||
| `status` | string | нет | Статус |
|
||||
|
||||
#### sensors.temperatures
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `name` | string | **да** | Уникальное имя сенсора |
|
||||
| `location` | string | нет | Физическое расположение |
|
||||
| `celsius` | float | нет | Температура, °C |
|
||||
| `threshold_warning_celsius` | float | нет | Порог Warning, °C |
|
||||
| `threshold_critical_celsius` | float | нет | Порог Critical, °C |
|
||||
| `status` | string | нет | Статус |
|
||||
|
||||
#### sensors.other
|
||||
|
||||
| Поле | Тип | Обязательно | Описание |
|
||||
|------|-----|-------------|----------|
|
||||
| `name` | string | **да** | Уникальное имя сенсора |
|
||||
| `location` | string | нет | Физическое расположение |
|
||||
| `value` | float | нет | Значение |
|
||||
| `unit` | string | нет | Единица измерения |
|
||||
| `status` | string | нет | Статус |
|
||||
|
||||
**Правила sensors:**
|
||||
- Идентификатор сенсора: пара `(sensor_type, name)`. Дубли в одном payload — берётся первое вхождение.
|
||||
- Сенсоры без `name` игнорируются.
|
||||
- При каждом импорте значения перезаписываются (upsert по ключу).
|
||||
|
||||
```json
|
||||
"sensors": {
|
||||
"fans": [
|
||||
{ "name": "FAN1", "location": "Front", "rpm": 4200, "status": "OK" },
|
||||
{ "name": "FAN_CPU0", "location": "CPU0", "rpm": 5600, "status": "OK" }
|
||||
],
|
||||
"power": [
|
||||
{ "name": "12V Rail", "location": "Mainboard", "voltage_v": 12.06, "status": "OK" },
|
||||
{ "name": "PSU0 Input", "location": "PSU0", "voltage_v": 215.25, "current_a": 0.64, "power_w": 137.0, "status": "OK" }
|
||||
],
|
||||
"temperatures": [
|
||||
{ "name": "CPU0 Temp", "location": "CPU0", "celsius": 46.0, "threshold_warning_celsius": 80.0, "threshold_critical_celsius": 95.0, "status": "OK" },
|
||||
{ "name": "Inlet Temp", "location": "Front", "celsius": 22.0, "threshold_warning_celsius": 40.0, "threshold_critical_celsius": 50.0, "status": "OK" }
|
||||
],
|
||||
"other": [
|
||||
{ "name": "System Humidity", "value": 38.5, "unit": "%" , "status": "OK" }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Обработка статусов компонентов
|
||||
|
||||
| Статус | Поведение |
|
||||
|--------|-----------|
|
||||
| `OK` | Нормальная обработка |
|
||||
| `Warning` | Создаётся событие `COMPONENT_WARNING` |
|
||||
| `Critical` | Создаётся событие `COMPONENT_FAILED` + запись в `failure_events` |
|
||||
| `Unknown` | Компонент считается рабочим, создаётся событие `COMPONENT_UNKNOWN` |
|
||||
| `Empty` | Компонент не создаётся/не обновляется |
|
||||
|
||||
---
|
||||
|
||||
## Обработка отсутствующих serial_number
|
||||
|
||||
Общее правило для всех секций: если источник не вернул серийный номер и сборщик не смог его надёжно извлечь, интегратор не должен подставлять вымышленные значения, хеши, локальные placeholder-идентификаторы или серийные номера "по догадке". Разрешены только явно оговорённые ниже server-side fallback-правила ingest.
|
||||
|
||||
| Тип | Поведение |
|
||||
|-----|-----------|
|
||||
| CPU | Генерируется: `{board_serial}-CPU-{socket}` |
|
||||
| PCIe | Генерируется: `{board_serial}-PCIE-{slot}` (если serial = `"N/A"` или пустой; `slot` для PCIe = BDF) |
|
||||
| Memory | Компонент игнорируется |
|
||||
| Storage | Компонент игнорируется |
|
||||
| PSU | Компонент игнорируется |
|
||||
|
||||
Если `serial_number` не уникален внутри одного payload для того же `model`:
|
||||
- Первое вхождение сохраняет оригинальный серийный номер.
|
||||
- Каждое следующее дублирующее получает placeholder: `NO_SN-XXXXXXXX`.
|
||||
|
||||
---
|
||||
|
||||
## Минимальный валидный пример
|
||||
|
||||
```json
|
||||
{
|
||||
"collected_at": "2026-02-10T15:30:00Z",
|
||||
"target_host": "192.168.1.100",
|
||||
"hardware": {
|
||||
"board": {
|
||||
"serial_number": "SRV-001"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Полный пример с историей статусов
|
||||
|
||||
```json
|
||||
{
|
||||
"filename": "redfish://10.10.10.103",
|
||||
"source_type": "api",
|
||||
"protocol": "redfish",
|
||||
"target_host": "10.10.10.103",
|
||||
"collected_at": "2026-02-10T15:30:00Z",
|
||||
"hardware": {
|
||||
"board": {
|
||||
"manufacturer": "Supermicro",
|
||||
"product_name": "X12DPG-QT6",
|
||||
"serial_number": "21D634101"
|
||||
},
|
||||
"firmware": [
|
||||
{ "device_name": "BIOS", "version": "06.08.05" },
|
||||
{ "device_name": "BMC", "version": "5.17.00" }
|
||||
],
|
||||
"cpus": [
|
||||
{
|
||||
"socket": 0,
|
||||
"model": "INTEL(R) XEON(R) GOLD 6530",
|
||||
"manufacturer": "Intel",
|
||||
"cores": 32,
|
||||
"threads": 64,
|
||||
"status": "OK"
|
||||
}
|
||||
],
|
||||
"storage": [
|
||||
{
|
||||
"slot": "OB01",
|
||||
"type": "NVMe",
|
||||
"model": "INTEL SSDPF2KX076T1",
|
||||
"size_gb": 7680,
|
||||
"serial_number": "BTAX41900GF87P6DGN",
|
||||
"manufacturer": "Intel",
|
||||
"firmware": "9CV10510",
|
||||
"present": true,
|
||||
"status": "OK",
|
||||
"status_changed_at": "2026-02-10T15:22:00Z",
|
||||
"status_history": [
|
||||
{ "status": "Critical", "changed_at": "2026-02-10T15:10:00Z", "details": "I/O timeout on NVMe queue 3" },
|
||||
{ "status": "OK", "changed_at": "2026-02-10T15:22:00Z", "details": "Recovered after controller reset" }
|
||||
]
|
||||
}
|
||||
],
|
||||
"pcie_devices": [
|
||||
{
|
||||
"slot": "0000:18:00.0",
|
||||
"device_class": "EthernetController",
|
||||
"manufacturer": "Intel",
|
||||
"model": "X710 10GbE",
|
||||
"serial_number": "K65472-003",
|
||||
"mac_addresses": ["3c:fd:fe:aa:bb:cc", "3c:fd:fe:aa:bb:cd"],
|
||||
"status": "OK"
|
||||
}
|
||||
],
|
||||
"power_supplies": [
|
||||
{
|
||||
"slot": "0",
|
||||
"present": true,
|
||||
"model": "GW-CRPS3000LW",
|
||||
"vendor": "Great Wall",
|
||||
"wattage_w": 3000,
|
||||
"serial_number": "2P06C102610",
|
||||
"firmware": "00.03.05",
|
||||
"status": "OK",
|
||||
"input_power_w": 137,
|
||||
"output_power_w": 104,
|
||||
"input_voltage": 215.25
|
||||
}
|
||||
],
|
||||
"sensors": {
|
||||
"fans": [
|
||||
{ "name": "FAN1", "location": "Front", "rpm": 4200, "status": "OK" }
|
||||
],
|
||||
"power": [
|
||||
{ "name": "12V Rail", "voltage_v": 12.06, "status": "OK" }
|
||||
],
|
||||
"temperatures": [
|
||||
{ "name": "CPU0 Temp", "celsius": 46.0, "threshold_warning_celsius": 80.0, "threshold_critical_celsius": 95.0, "status": "OK" }
|
||||
],
|
||||
"other": [
|
||||
{ "name": "System Humidity", "value": 38.5, "unit": "%" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
343
bible-local/docs/msi-redfish-api.md
Normal file
343
bible-local/docs/msi-redfish-api.md
Normal file
@@ -0,0 +1,343 @@
|
||||
# MSI BMC Redfish API Reference
|
||||
|
||||
Source: MSI Enterprise Platform Solutions — Redfish BMC User Guide v1.0 (AMI/MegaRAC stack).
|
||||
Spec compliance: DSP0266 1.15.1, DSP8010 2019.2.
|
||||
|
||||
> This document is trimmed to sections relevant to LOGPile collection and inventory analysis.
|
||||
> Auth, LDAP/AD, SMTP, VirtualMedia, Certificates, RADIUS, Composability, and BMC config
|
||||
> sections are omitted.
|
||||
|
||||
---
|
||||
|
||||
## Supported HTTP methods
|
||||
|
||||
`GET`, `POST`, `PATCH`, `DELETE`. Unsupported methods return `405`.
|
||||
|
||||
PATCH requires an `If-Match` / `ETag` precondition header; missing header → `428`, mismatch → `412`.
|
||||
|
||||
---
|
||||
|
||||
## 1. Core Redfish API endpoints
|
||||
|
||||
| Resource | URI | Schema |
|
||||
|---|---|---|
|
||||
| Service Root | `/redfish/v1/` | ServiceRoot.v1_7_0 |
|
||||
| ComputerSystem Collection | `/redfish/v1/Systems` | ComputerSystemCollection |
|
||||
| ComputerSystem | `/redfish/v1/Systems/{sys}` | ComputerSystem.v1_16_2 |
|
||||
| Memory Collection | `/redfish/v1/Systems/{sys}/Memory` | MemoryCollection |
|
||||
| Memory | `/redfish/v1/Systems/{sys}/Memory/{mem}` | Memory.v1_19_0 |
|
||||
| MemoryMetrics | `/redfish/v1/Systems/{sys}/Memory/{mem}/MemoryMetrics` | MemoryMetrics.v1_7_0 |
|
||||
| MemoryDomain Collection | `/redfish/v1/Systems/{sys}/MemoryDomain` | MemoryDomainCollection |
|
||||
| MemoryDomain | `/redfish/v1/Systems/{sys}/MemoryDomain/{dom}` | MemoryDomain.v1_2_3 |
|
||||
| MemoryChunks Collection | `/redfish/v1/Systems/{sys}/MemoryDomain/{dom}/MemoryChunks` | MemoryChunksCollection |
|
||||
| MemoryChunks | `/redfish/v1/Systems/{sys}/MemoryDomain/{dom}/MemoryChunks/{chunk}` | MemoryChunks.v1_4_0 |
|
||||
| Processor Collection | `/redfish/v1/Systems/{sys}/Processors` | ProcessorCollection |
|
||||
| Processor | `/redfish/v1/Systems/{sys}/Processors/{proc}` | Processor.v1_15_0 |
|
||||
| SubProcessors Collection | `/redfish/v1/Systems/{sys}/Processors/{proc}/SubProcessors` | ProcessorCollection |
|
||||
| SubProcessor | `/redfish/v1/Systems/{sys}/Processors/{proc}/SubProcessors/{sub}` | Processor.v1_15_0 |
|
||||
| ProcessorMetrics | `/redfish/v1/Systems/{sys}/Processors/{proc}/ProcessorMetrics` | ProcessorMetrics.v1_4_0 |
|
||||
| Bios | `/redfish/v1/Systems/{sys}/Bios` | Bios.v1_2_0 |
|
||||
| SimpleStorage Collection | `/redfish/v1/Systems/{sys}/SimpleStorage` | SimpleStorageCollection |
|
||||
| SimpleStorage | `/redfish/v1/Systems/{sys}/SimpleStorage/{ss}` | SimpleStorage.v1_3_0 |
|
||||
| Storage Collection | `/redfish/v1/Systems/{sys}/Storage` | StorageCollection |
|
||||
| Storage | `/redfish/v1/Systems/{sys}/Storage/{stor}` | Storage.v1_9_0 |
|
||||
| StorageController Collection | `/redfish/v1/Systems/{sys}/Storage/{stor}/Controllers` | StorageControllerCollection |
|
||||
| StorageController | `/redfish/v1/Systems/{sys}/Storage/{stor}/Controllers/{ctrl}` | StorageController.v1_0_0 |
|
||||
| Drive | `/redfish/v1/Systems/{sys}/Storage/{stor}/Drives/{drv}` | Drive.v1_13_0 |
|
||||
| Volume Collection | `/redfish/v1/Systems/{sys}/Storage/{stor}/Volumes` | VolumeCollection |
|
||||
| Volume | `/redfish/v1/Systems/{sys}/Storage/{stor}/Volumes/{vol}` | Volume.v1_5_0 |
|
||||
| NetworkInterface Collection | `/redfish/v1/Systems/{sys}/NetworkInterfaces` | NetworkInterfaceCollection |
|
||||
| NetworkInterface | `/redfish/v1/Systems/{sys}/NetworkInterfaces/{nic}` | NetworkInterface.v1_2_0 |
|
||||
| EthernetInterface (System) | `/redfish/v1/Systems/{sys}/EthernetInterfaces/{eth}` | EthernetInterface.v1_6_2 |
|
||||
| GraphicsController Collection | `/redfish/v1/Systems/{sys}/GraphicsControllers` | GraphicsControllerCollection |
|
||||
| GraphicsController | `/redfish/v1/Systems/{sys}/GraphicsControllers/{gpu}` | GraphicsController.v1_0_0 |
|
||||
| USBController Collection | `/redfish/v1/Systems/{sys}/USBControllers` | USBControllerCollection |
|
||||
| USBController | `/redfish/v1/Systems/{sys}/USBControllers/{usb}` | USBController.v1_0_0 |
|
||||
| SecureBoot | `/redfish/v1/Systems/{sys}/SecureBoot` | SecureBoot.v1_1_0 |
|
||||
| LogService Collection (System) | `/redfish/v1/Systems/{sys}/LogServices` | LogServiceCollection |
|
||||
| LogService (System) | `/redfish/v1/Systems/{sys}/LogServices/{log}` | LogService.v1_1_3 |
|
||||
| LogEntry Collection | `/redfish/v1/Systems/{sys}/LogServices/{log}/Entries` | LogEntryCollection |
|
||||
| LogEntry | `/redfish/v1/Systems/{sys}/LogServices/{log}/Entries/{entry}` | LogEntry.v1_12_0 |
|
||||
| Chassis Collection | `/redfish/v1/Chassis` | ChassisCollection |
|
||||
| Chassis | `/redfish/v1/Chassis/{ch}` | Chassis.v1_15_0 |
|
||||
| Power | `/redfish/v1/Chassis/{ch}/Power` | Power.v1_5_4 |
|
||||
| PowerSubSystem | `/redfish/v1/Chassis/{ch}/PowerSubSystem` | PowerSubsystem.v1_1_0 |
|
||||
| PowerSupplies Collection | `/redfish/v1/Chassis/{ch}/PowerSubSystem/PowerSupplies` | PowerSupplyCollection |
|
||||
| PowerSupply | `/redfish/v1/Chassis/{ch}/PowerSubSystem/PowerSupplies/{psu}` | PowerSupply.v1_3_0 |
|
||||
| PowerSupplyMetrics | `/redfish/v1/Chassis/{ch}/PowerSubSystem/PowerSupplies/{psu}/Metrics` | PowerSupplyMetrics.v1_0_1 |
|
||||
| Thermal | `/redfish/v1/Chassis/{ch}/Thermal` | Thermal.v1_5_3 |
|
||||
| ThermalSubSystem | `/redfish/v1/Chassis/{ch}/ThermalSubSystem` | ThermalSubsystem.v1_0_0 |
|
||||
| ThermalMetrics | `/redfish/v1/Chassis/{ch}/ThermalSubSystem/ThermalMetrics` | ThermalMetrics.v1_0_1 |
|
||||
| Fans Collection | `/redfish/v1/Chassis/{ch}/ThermalSubSystem/Fans` | FanCollection |
|
||||
| Fan | `/redfish/v1/Chassis/{ch}/ThermalSubSystem/Fans/{fan}` | Fan.v1_1_1 |
|
||||
| Sensor Collection | `/redfish/v1/Chassis/{ch}/Sensors` | SensorCollection |
|
||||
| Sensor | `/redfish/v1/Chassis/{ch}/Sensors/{sen}` | Sensor.v1_0_2 |
|
||||
| PCIeDevice Collection | `/redfish/v1/Chassis/{ch}/PCIeDevices` | PCIeDeviceCollection |
|
||||
| PCIeDevice | `/redfish/v1/Chassis/{ch}/PCIeDevices/{dev}` | PCIeDevice.v1_9_0 |
|
||||
| PCIeFunction Collection | `/redfish/v1/Chassis/{ch}/PCIeDevices/{dev}/PCIeFunctions` | PCIeFunctionCollection |
|
||||
| PCIeFunction | `/redfish/v1/Chassis/{ch}/PCIeDevices/{dev}/PCIeFunctions/{fn}` | PCIeFunction.v1_2_3 |
|
||||
| PCIeSlots | `/redfish/v1/Chassis/{ch}/PCIeSlots` | PCIeSlots.v1_5_0 |
|
||||
| NetworkAdapter Collection | `/redfish/v1/Chassis/{ch}/NetworkAdapters` | NetworkAdapterCollection |
|
||||
| NetworkAdapter | `/redfish/v1/Chassis/{ch}/NetworkAdapters/{na}` | NetworkAdapter.v1_8_0 |
|
||||
| NetworkDeviceFunction Collection | `/redfish/v1/Chassis/{ch}/NetworkAdapters/{na}/NetworkDeviceFunctions` | NetworkDeviceFunctionCollection |
|
||||
| NetworkDeviceFunction | `/redfish/v1/Chassis/{ch}/NetworkAdapters/{na}/NetworkDeviceFunctions/{fn}` | NetworkDeviceFunction.v1_5_0 |
|
||||
| Assembly | `/redfish/v1/Chassis/{ch}/Assembly` | Assembly.v1_2_2 |
|
||||
| Assembly (Drive) | `/redfish/v1/Systems/{sys}/Storage/{stor}/Drives/{drv}/Assembly` | Assembly.v1_2_2 |
|
||||
| Assembly (Processor) | `/redfish/v1/Systems/{sys}/Processors/{proc}/Assembly` | Assembly.v1_2_2 |
|
||||
| Assembly (Memory) | `/redfish/v1/Systems/{sys}/Memory/{mem}/Assembly` | Assembly.v1_2_2 |
|
||||
| Assembly (NetworkAdapter) | `/redfish/v1/Chassis/{ch}/NetworkAdapters/{na}/Assembly` | Assembly.v1_2_2 |
|
||||
| Assembly (PCIeDevice) | `/redfish/v1/Chassis/{ch}/PCIeDevices/{dev}/Assembly` | Assembly.v1_2_2 |
|
||||
| MediaController Collection | `/redfish/v1/Chassis/{ch}/MediaControllers` | MediaControllerCollection |
|
||||
| MediaController | `/redfish/v1/Chassis/{ch}/MediaControllers/{mc}` | MediaController.v1_1_0 |
|
||||
| LogService Collection (Chassis) | `/redfish/v1/Chassis/{ch}/LogServices` | LogServiceCollection |
|
||||
| LogService (Chassis) | `/redfish/v1/Chassis/{ch}/LogServices/{log}` | LogService.v1_1_3 |
|
||||
| Manager Collection | `/redfish/v1/Managers` | ManagerCollection |
|
||||
| Manager | `/redfish/v1/Managers/{mgr}` | Manager.v1_13_0 |
|
||||
| EthernetInterface (Manager) | `/redfish/v1/Managers/{mgr}/EthernetInterfaces/{eth}` | EthernetInterface.v1_6_2 |
|
||||
| LogService Collection (Manager) | `/redfish/v1/Managers/{mgr}/LogServices` | LogServiceCollection |
|
||||
| LogService (Manager) | `/redfish/v1/Managers/{mgr}/LogServices/{log}` | LogService.v1_1_3 |
|
||||
| UpdateService | `/redfish/v1/UpdateService` | UpdateService.v1_6_0 |
|
||||
| TaskService | `/redfish/v1/TasksService` | TaskService.v1_1_4 |
|
||||
| Task Collection | `/redfish/v1/TaskService/Tasks` | TaskCollection |
|
||||
| Task | `/redfish/v1/TaskService/Tasks/{task}` | Task.v1_4_2 |
|
||||
|
||||
---
|
||||
|
||||
## 2. Telemetry API endpoints
|
||||
|
||||
| Resource | URI | Schema |
|
||||
|---|---|---|
|
||||
| TelemetryService | `/redfish/v1/TelemetryService` | TelemetryService.v1_2_1 |
|
||||
| MetricDefinition Collection | `/redfish/v1/TelemetryService/MetricDefinitions` | MetricDefinitionCollection |
|
||||
| MetricDefinition | `/redfish/v1/TelemetryService/MetricDefinitions/{md}` | MetricDefinition.v1_0_3 |
|
||||
| MetricReportDefinition Collection | `/redfish/v1/TelemetryService/MetricReportDefinitions` | MetricReportDefinitionCollection |
|
||||
| MetricReportDefinition | `/redfish/v1/TelemetryService/MetricReportDefinitions/{mrd}` | MetricReportDefinition.v1_3_0 |
|
||||
| MetricReport Collection | `/redfish/v1/TelemetryService/MetricReports` | MetricReportCollection |
|
||||
| MetricReport | `/redfish/v1/TelemetryService/MetricReports/{mr}` | MetricReport.v1_2_0 |
|
||||
| Telemetry LogService | `/redfish/v1/TelemetryService/LogService` | LogService.v1_1_3 |
|
||||
| Telemetry LogEntry Collection | `/redfish/v1/TelemetryService/LogService/Entries` | LogEntryCollection |
|
||||
|
||||
---
|
||||
|
||||
## 3. Processor / NIC sub-resources (GPU-relevant)
|
||||
|
||||
| Resource | URI |
|
||||
|---|---|
|
||||
| Processor (NetworkAdapter) | `/redfish/v1/Chassis/{ch}/NetworkAdapters/{na}/Processors/{proc}` |
|
||||
| AccelerationFunction Collection | `/redfish/v1/Systems/{sys}/Processors/{proc}/AccelerationFunctions` |
|
||||
| AccelerationFunction | `/redfish/v1/Systems/{sys}/Processors/{proc}/AccelerationFunctions/{fn}` |
|
||||
| Port Collection (NetworkAdapter) | `/redfish/v1/Chassis/{ch}/NetworkAdapters/{na}/Ports` |
|
||||
| Port (GraphicsController) | `/redfish/v1/Systems/{sys}/GraphicsControllers/{gpu}/Ports/{port}` |
|
||||
| OperatingConfig Collection | `/redfish/v1/Systems/{sys}/Processors/{proc}/OperatingConfigs` |
|
||||
| OperatingConfig | `/redfish/v1/Systems/{sys}/Processors/{proc}/OperatingConfigs/{cfg}` |
|
||||
|
||||
---
|
||||
|
||||
## 4. Error response format
|
||||
|
||||
On error, the service returns an HTTP status code and a JSON body with a single `error` property:
|
||||
|
||||
```json
|
||||
{
|
||||
"error": {
|
||||
"code": "Base.1.12.0.ActionParameterMissing",
|
||||
"message": "...",
|
||||
"@Message.ExtendedInfo": [
|
||||
{
|
||||
"@odata.type": "#Message.v1_0_8.Message",
|
||||
"MessageId": "Base.1.12.0.ActionParameterMissing",
|
||||
"Message": "...",
|
||||
"MessageArgs": [],
|
||||
"Severity": "Warning",
|
||||
"Resolution": "..."
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Common status codes:**
|
||||
|
||||
| Code | Meaning |
|
||||
|------|---------|
|
||||
| 200 | OK with body |
|
||||
| 201 | Created |
|
||||
| 204 | Success, no body |
|
||||
| 400 | Bad request / validation error |
|
||||
| 401 | Unauthorized |
|
||||
| 403 | Forbidden / firmware update in progress |
|
||||
| 404 | Resource not found |
|
||||
| 405 | Method not allowed |
|
||||
| 412 | ETag precondition failed (PATCH) |
|
||||
| 415 | Unsupported media type |
|
||||
| 428 | Missing precondition header (PATCH) |
|
||||
| 501 | Not implemented |
|
||||
|
||||
**Request validation sequence:**
|
||||
1. Authorization check → 401
|
||||
2. Entity privilege check → 403
|
||||
3. URI existence → 404
|
||||
4. Firmware update lock → 403
|
||||
5. Method allowed → 405
|
||||
6. Media type → 415
|
||||
7. Body format → 400
|
||||
8. PATCH: ETag header → 428/412
|
||||
9. Property validation → 400
|
||||
|
||||
---
|
||||
|
||||
## 5. OEM: Inventory refresh (AMI/MSI-specific)
|
||||
|
||||
### 5.1 InventoryCrc — force component re-inventory
|
||||
|
||||
`GET/POST/DELETE /redfish/v1/Systems/{sys}/Oem/Ami/Inventory/Crc`
|
||||
|
||||
The `GroupCrcList` field lists current CRC checksums per component group. When a group's CRC
|
||||
changes (host sends new inventory) or is explicitly zeroed out via POST, the BMC discards its
|
||||
cached inventory and re-reads that group from the host.
|
||||
|
||||
**CRC groups:**
|
||||
|
||||
| Group | Covers |
|
||||
|-------|--------|
|
||||
| `CPU` | Processors, ProcessorMetrics |
|
||||
| `DIMM` | Memory, MemoryDomains, MemoryChunks, MemoryMetrics |
|
||||
| `PCIE` | Storage, PCIeDevices, NetworkInterfaces, NetworkAdapters |
|
||||
| `CERTIFICATES` | Boot Certificates |
|
||||
| `SECURBOOT` | SecureBoot data |
|
||||
|
||||
**POST — invalidate selected groups (force re-inventory):**
|
||||
|
||||
```
|
||||
POST /redfish/v1/Systems/{sys}/Oem/Ami/Inventory/Crc
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"GroupCrcList": [
|
||||
{ "CPU": 0 },
|
||||
{ "DIMM": 0 },
|
||||
{ "PCIE": 0 }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Setting a group's value to `0` signals the BMC to invalidate and repopulate that group on next
|
||||
host inventory push (typically at next boot or host-interface inventory cycle).
|
||||
|
||||
**DELETE** — remove all CRC records entirely.
|
||||
|
||||
**Note:** Inventory data is populated by the host via the Redfish Host Interface (in-band),
|
||||
not by the BMC itself. Zeroing a CRC group does not immediately re-read hardware — it marks
|
||||
the group as stale so the next host-side inventory push will be accepted. A cold reboot is the
|
||||
most reliable trigger.
|
||||
|
||||
### 5.2 InventoryData Status — monitor inventory processing
|
||||
|
||||
`GET /redfish/v1/Oem/Ami/InventoryData/Status`
|
||||
|
||||
Available only after the host has posted an inventory file. Shows current processing state.
|
||||
|
||||
**Status enum:**
|
||||
|
||||
| Value | Meaning |
|
||||
|-------|---------|
|
||||
| `BootInProgress` | Host is booting |
|
||||
| `Queued` | Processing task queued |
|
||||
| `In-Progress` | Processing running in background |
|
||||
| `Ready` / `Completed` | Processing finished successfully |
|
||||
| `Failed` | Processing failed |
|
||||
|
||||
Response also includes:
|
||||
- `InventoryData.DeletedModules` — array of groups updated in this population cycle
|
||||
- `InventoryData.Messages` — warnings/errors encountered during processing
|
||||
- `ProcessingTime` — milliseconds taken
|
||||
- `LastModifiedTime` — ISO 8601 timestamp of last successful update
|
||||
|
||||
### 5.3 Systems OEM properties — Inventory reference
|
||||
|
||||
`GET /redfish/v1/Systems/{sys}` → `Oem.Ami` contains:
|
||||
|
||||
| Property | Notes |
|
||||
|----------|-------|
|
||||
| `Inventory` | Reference to InventoryCrc URI + current GroupCrc data |
|
||||
| `RedfishVersion` | BIOS Redfish version (populated via Host Interface) |
|
||||
| `RtpVersion` | BIOS RTP version (populated via Host Interface) |
|
||||
| `ManagerBootConfiguration.ManagerBootMode` | PATCH to trigger soft reset: `SoftReset` / `ResetTimeout` / `None` |
|
||||
|
||||
---
|
||||
|
||||
## 6. OEM: Component state actions
|
||||
|
||||
### 6.1 Memory enable/disable
|
||||
|
||||
```
|
||||
POST /redfish/v1/Systems/{sys}/Memory/{mem}/Actions/AmiBios.ChangeState
|
||||
Content-Type: application/json
|
||||
|
||||
{ "State": "Disabled" }
|
||||
```
|
||||
|
||||
Response: 204.
|
||||
|
||||
### 6.2 PCIeFunction enable/disable
|
||||
|
||||
```
|
||||
POST /redfish/v1/Chassis/{ch}/PCIeDevices/{dev}/PCIeFunctions/{fn}/Actions/AmiBios.ChangeState
|
||||
Content-Type: application/json
|
||||
|
||||
{ "State": "Disabled" }
|
||||
```
|
||||
|
||||
Response: 204.
|
||||
|
||||
---
|
||||
|
||||
## 7. OEM: Storage sensor readings
|
||||
|
||||
`GET /redfish/v1/Systems/{sys}/Storage/{stor}` → `Oem.Ami.StorageControllerSensors`
|
||||
|
||||
Array of sensor objects per storage controller instance. Each entry exposes:
|
||||
- `Reading` (Number) — current sensor value
|
||||
- `ReadingType` (String) — type of reading
|
||||
- `ReadingUnit` (String) — unit
|
||||
|
||||
---
|
||||
|
||||
## 8. OEM: Power and Thermal OwnerLUN
|
||||
|
||||
Both `GET /redfish/v1/Chassis/{ch}/Power` and `GET /redfish/v1/Chassis/{ch}/Thermal` expose
|
||||
`Oem.Ami.OwnerLUN` (Number, read-only) — the IPMI LUN associated with each
|
||||
temperature/fan/voltage sensor entry. Useful for correlating Redfish sensor readings with IPMI
|
||||
SDR records.
|
||||
|
||||
---
|
||||
|
||||
## 9. UpdateService
|
||||
|
||||
`GET /redfish/v1/UpdateService` → `Oem.Ami.BMC.DualImageConfiguration`:
|
||||
|
||||
| Property | Description |
|
||||
|----------|-------------|
|
||||
| `ActiveImage` | Currently active BMC image slot |
|
||||
| `BootImage` | Image slot BMC boots from |
|
||||
| `FirmwareImage1Name` / `FirmwareImage1Version` | First image slot name + version |
|
||||
| `FirmwareImage2Name` / `FirmwareImage2Version` | Second image slot name + version |
|
||||
|
||||
Standard `SimpleUpdate` action available at `/redfish/v1/UpdateService/Actions/UpdateService.SimpleUpdate`.
|
||||
|
||||
---
|
||||
|
||||
## 10. Inventory refresh summary
|
||||
|
||||
| Approach | Trigger | Latency | Scope |
|
||||
|----------|---------|---------|-------|
|
||||
| Host reboot | Physical/soft reset | Minutes | All groups |
|
||||
| `POST InventoryCrc` (groups = 0) | Explicit API call | Next host inventory push | Selected groups |
|
||||
| Firmware update (`SimpleUpdate`) | Explicit API call | Minutes + reboot | Full platform |
|
||||
| Sensor/telemetry reads | Always live on GET | Immediate | Sensors only |
|
||||
|
||||
**Key constraint:** `InventoryCrc POST` marks groups stale but does not re-read hardware
|
||||
directly. Actual inventory data flows from the host to BMC via the Redfish Host Interface
|
||||
in-band channel, typically during POST/boot. For immediate inventory refresh without a full
|
||||
reboot, a soft reset via `ManagerBootMode: SoftReset` PATCH may be sufficient on some
|
||||
configurations.
|
||||
@@ -1,10 +1,14 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
_ "git.mchus.pro/mchus/logpile/internal/parser/vendors" // Register all vendor parsers
|
||||
@@ -18,9 +22,11 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
port := flag.Int("port", 8080, "HTTP server port")
|
||||
holdOnCrash := flag.Bool("hold-on-crash", runtime.GOOS == "windows", "Wait for Enter on crash to keep console open")
|
||||
port := flag.Int("port", 8082, "HTTP server port")
|
||||
file := flag.String("file", "", "Pre-load archive file")
|
||||
showVersion := flag.Bool("version", false, "Show version")
|
||||
noBrowser := flag.Bool("no-browser", false, "Don't open browser automatically")
|
||||
flag.Parse()
|
||||
|
||||
if *showVersion {
|
||||
@@ -34,13 +40,74 @@ func main() {
|
||||
cfg := server.Config{
|
||||
Port: *port,
|
||||
PreloadFile: *file,
|
||||
AppVersion: version,
|
||||
AppCommit: commit,
|
||||
}
|
||||
|
||||
srv := server.New(cfg)
|
||||
|
||||
log.Printf("LOGPile starting on http://localhost:%d", *port)
|
||||
url := fmt.Sprintf("http://localhost:%d", *port)
|
||||
log.Printf("LOGPile starting on %s", url)
|
||||
log.Printf("Registered parsers: %v", parser.ListParsers())
|
||||
if err := srv.Run(); err != nil {
|
||||
log.Fatalf("Server error: %v", err)
|
||||
|
||||
// Open browser automatically
|
||||
if !*noBrowser {
|
||||
go func() {
|
||||
time.Sleep(500 * time.Millisecond) // Wait for server to start
|
||||
openBrowser(url)
|
||||
}()
|
||||
}
|
||||
|
||||
if err := runServer(srv); err != nil {
|
||||
log.Printf("FATAL: %v", err)
|
||||
maybeWaitForCrashInput(*holdOnCrash)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func runServer(srv *server.Server) (runErr error) {
|
||||
defer func() {
|
||||
if recovered := recover(); recovered != nil {
|
||||
runErr = fmt.Errorf("panic: %v", recovered)
|
||||
}
|
||||
}()
|
||||
return srv.Run()
|
||||
}
|
||||
|
||||
// openBrowser opens the default browser with the given URL
|
||||
func openBrowser(url string) {
|
||||
var cmd *exec.Cmd
|
||||
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
cmd = exec.Command("open", url)
|
||||
case "windows":
|
||||
cmd = exec.Command("rundll32", "url.dll,FileProtocolHandler", url)
|
||||
default: // linux and others
|
||||
cmd = exec.Command("xdg-open", url)
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
log.Printf("Failed to open browser: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func maybeWaitForCrashInput(enabled bool) {
|
||||
if !enabled || !isInteractiveConsole() {
|
||||
return
|
||||
}
|
||||
fmt.Fprintln(os.Stderr, "\nApplication crashed. Press Enter to close...")
|
||||
_, _ = bufio.NewReader(os.Stdin).ReadString('\n')
|
||||
}
|
||||
|
||||
func isInteractiveConsole() bool {
|
||||
stdinInfo, err := os.Stdin.Stat()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
stderrInfo, err := os.Stderr.Stat()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return (stdinInfo.Mode()&os.ModeCharDevice) != 0 && (stderrInfo.Mode()&os.ModeCharDevice) != 0
|
||||
}
|
||||
|
||||
6
go.mod
6
go.mod
@@ -1,3 +1,7 @@
|
||||
module git.mchus.pro/mchus/logpile
|
||||
|
||||
go 1.22
|
||||
go 1.24.0
|
||||
|
||||
require reanimator/chart v0.0.0
|
||||
|
||||
replace reanimator/chart => ./internal/chart
|
||||
|
||||
1
internal/chart
Submodule
1
internal/chart
Submodule
Submodule internal/chart added at c025ae0477
18
internal/collector/helpers.go
Normal file
18
internal/collector/helpers.go
Normal file
@@ -0,0 +1,18 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
)
|
||||
|
||||
func sleepWithContext(ctx context.Context, d time.Duration) bool {
|
||||
timer := time.NewTimer(d)
|
||||
defer timer.Stop()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
case <-timer.C:
|
||||
return true
|
||||
}
|
||||
}
|
||||
42
internal/collector/ipmi_mock.go
Normal file
42
internal/collector/ipmi_mock.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
type IPMIMockConnector struct{}
|
||||
|
||||
func NewIPMIMockConnector() *IPMIMockConnector {
|
||||
return &IPMIMockConnector{}
|
||||
}
|
||||
|
||||
func (c *IPMIMockConnector) Protocol() string {
|
||||
return "ipmi"
|
||||
}
|
||||
|
||||
func (c *IPMIMockConnector) Collect(ctx context.Context, req Request, emit ProgressFn) (*models.AnalysisResult, error) {
|
||||
steps := []Progress{
|
||||
{Status: "running", Progress: 20, Message: "IPMI: подключение к BMC..."},
|
||||
{Status: "running", Progress: 55, Message: "IPMI: чтение инвентаря..."},
|
||||
{Status: "running", Progress: 85, Message: "IPMI: нормализация данных..."},
|
||||
}
|
||||
|
||||
for _, step := range steps {
|
||||
if !sleepWithContext(ctx, 150*time.Millisecond) {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
if emit != nil {
|
||||
emit(step)
|
||||
}
|
||||
}
|
||||
|
||||
return &models.AnalysisResult{
|
||||
Events: make([]models.Event, 0),
|
||||
FRU: make([]models.FRUInfo, 0),
|
||||
Sensors: make([]models.SensorReading, 0),
|
||||
Hardware: &models.HardwareConfig{},
|
||||
}, nil
|
||||
}
|
||||
6451
internal/collector/redfish.go
Normal file
6451
internal/collector/redfish.go
Normal file
File diff suppressed because it is too large
Load Diff
392
internal/collector/redfish_logentries.go
Normal file
392
internal/collector/redfish_logentries.go
Normal file
@@ -0,0 +1,392 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
const (
|
||||
redfishLogEntriesWindow = 7 * 24 * time.Hour
|
||||
redfishLogEntriesMaxTotal = 500
|
||||
redfishLogEntriesMaxPerSvc = 200
|
||||
)
|
||||
|
||||
// collectRedfishLogEntries fetches hardware event log entries from Systems and Managers LogServices.
|
||||
// Only hardware-relevant entries from the last 7 days are returned.
|
||||
// For Systems: all log services except audit/journal/security/debug.
|
||||
// For Managers: only the IPMI SEL service (Id="SEL") — audit and event logs are excluded.
|
||||
func (c *RedfishConnector) collectRedfishLogEntries(ctx context.Context, client *http.Client, req Request, baseURL string, systemPaths, managerPaths []string) []map[string]interface{} {
|
||||
cutoff := time.Now().UTC().Add(-redfishLogEntriesWindow)
|
||||
seen := make(map[string]struct{})
|
||||
var out []map[string]interface{}
|
||||
|
||||
collectFrom := func(logServicesPath string, filter func(map[string]interface{}) bool) {
|
||||
if len(out) >= redfishLogEntriesMaxTotal {
|
||||
return
|
||||
}
|
||||
services, err := c.getCollectionMembers(ctx, client, req, baseURL, logServicesPath)
|
||||
if err != nil || len(services) == 0 {
|
||||
return
|
||||
}
|
||||
for _, svc := range services {
|
||||
if len(out) >= redfishLogEntriesMaxTotal {
|
||||
break
|
||||
}
|
||||
if !filter(svc) {
|
||||
continue
|
||||
}
|
||||
entriesPath := redfishLogServiceEntriesPath(svc)
|
||||
if entriesPath == "" {
|
||||
continue
|
||||
}
|
||||
entries := c.fetchRedfishLogEntriesWithPaging(ctx, client, req, baseURL, entriesPath, cutoff, seen, redfishLogEntriesMaxPerSvc)
|
||||
out = append(out, entries...)
|
||||
}
|
||||
}
|
||||
|
||||
for _, systemPath := range systemPaths {
|
||||
collectFrom(joinPath(systemPath, "/LogServices"), isHardwareLogService)
|
||||
}
|
||||
// Managers hold the IPMI SEL on AMI/MSI BMCs — include only the "SEL" service.
|
||||
for _, managerPath := range managerPaths {
|
||||
collectFrom(joinPath(managerPath, "/LogServices"), isManagerSELService)
|
||||
}
|
||||
|
||||
if len(out) > 0 {
|
||||
log.Printf("redfish: collected %d hardware log entries (Systems+Managers SEL, window=7d)", len(out))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// fetchRedfishLogEntriesWithPaging fetches entries from a LogEntry collection,
|
||||
// following nextLink pages. Stops early when entries older than cutoff are encountered
|
||||
// (assumes BMC returns entries newest-first, which is typical).
|
||||
func (c *RedfishConnector) fetchRedfishLogEntriesWithPaging(ctx context.Context, client *http.Client, req Request, baseURL, entriesPath string, cutoff time.Time, seen map[string]struct{}, limit int) []map[string]interface{} {
|
||||
var out []map[string]interface{}
|
||||
nextPath := entriesPath
|
||||
|
||||
for nextPath != "" && len(out) < limit {
|
||||
collection, err := c.getJSON(ctx, client, req, baseURL, nextPath)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
// Handle both linked members (@odata.id only) and inline members (full objects).
|
||||
rawMembers, _ := collection["Members"].([]interface{})
|
||||
hitOldEntry := false
|
||||
|
||||
for _, rawMember := range rawMembers {
|
||||
if len(out) >= limit {
|
||||
break
|
||||
}
|
||||
memberMap, ok := rawMember.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
var entry map[string]interface{}
|
||||
if _, hasCreated := memberMap["Created"]; hasCreated {
|
||||
// Inline entry — use directly.
|
||||
entry = memberMap
|
||||
} else {
|
||||
// Linked entry — fetch by path.
|
||||
memberPath := normalizeRedfishPath(asString(memberMap["@odata.id"]))
|
||||
if memberPath == "" {
|
||||
continue
|
||||
}
|
||||
entry, err = c.getJSON(ctx, client, req, baseURL, memberPath)
|
||||
if err != nil || len(entry) == 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Dedup by entry Id or path.
|
||||
entryKey := asString(entry["Id"])
|
||||
if entryKey == "" {
|
||||
entryKey = asString(entry["@odata.id"])
|
||||
}
|
||||
if entryKey != "" {
|
||||
if _, dup := seen[entryKey]; dup {
|
||||
continue
|
||||
}
|
||||
seen[entryKey] = struct{}{}
|
||||
}
|
||||
|
||||
// Time filter.
|
||||
created := parseRedfishEntryTime(asString(entry["Created"]))
|
||||
if !created.IsZero() && created.Before(cutoff) {
|
||||
hitOldEntry = true
|
||||
continue
|
||||
}
|
||||
|
||||
// Hardware relevance filter.
|
||||
if !isHardwareLogEntry(entry) {
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, entry)
|
||||
}
|
||||
|
||||
// Stop paging once we've seen entries older than the window.
|
||||
if hitOldEntry {
|
||||
break
|
||||
}
|
||||
nextPath = firstNonEmpty(
|
||||
normalizeRedfishPath(asString(collection["Members@odata.nextLink"])),
|
||||
normalizeRedfishPath(asString(collection["@odata.nextLink"])),
|
||||
)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// isManagerSELService returns true only for the IPMI SEL exposed under Managers.
|
||||
// On AMI/MSI BMCs the hardware SEL lives at Managers/{mgr}/LogServices/SEL.
|
||||
// All other Manager log services (AuditLog, EventLog, Journal) are excluded.
|
||||
func isManagerSELService(svc map[string]interface{}) bool {
|
||||
id := strings.ToLower(strings.TrimSpace(asString(svc["Id"])))
|
||||
return id == "sel"
|
||||
}
|
||||
|
||||
// isHardwareLogService returns true if the log service looks like a hardware event log
|
||||
// (SEL, System Event Log) rather than a BMC audit/journal log.
|
||||
func isHardwareLogService(svc map[string]interface{}) bool {
|
||||
id := strings.ToLower(strings.TrimSpace(asString(svc["Id"])))
|
||||
name := strings.ToLower(strings.TrimSpace(asString(svc["Name"])))
|
||||
for _, skip := range []string{"audit", "journal", "bmc", "security", "manager", "debug"} {
|
||||
if strings.Contains(id, skip) || strings.Contains(name, skip) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// redfishLogServiceEntriesPath returns the Entries collection path for a LogService document.
|
||||
func redfishLogServiceEntriesPath(svc map[string]interface{}) string {
|
||||
if entriesLink, ok := svc["Entries"].(map[string]interface{}); ok {
|
||||
if p := normalizeRedfishPath(asString(entriesLink["@odata.id"])); p != "" {
|
||||
return p
|
||||
}
|
||||
}
|
||||
if id := normalizeRedfishPath(asString(svc["@odata.id"])); id != "" {
|
||||
return joinPath(id, "/Entries")
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// isHardwareLogEntry returns true if the log entry is hardware-related.
|
||||
// Audit, authentication, and session events are excluded.
|
||||
func isHardwareLogEntry(entry map[string]interface{}) bool {
|
||||
entryType := strings.TrimSpace(asString(entry["EntryType"]))
|
||||
if strings.EqualFold(entryType, "Oem") {
|
||||
return false
|
||||
}
|
||||
|
||||
msgID := strings.ToLower(strings.TrimSpace(asString(entry["MessageId"])))
|
||||
for _, skip := range []string{
|
||||
"user", "account", "password", "login", "logon", "session",
|
||||
"auth", "certificate", "security", "credential", "privilege",
|
||||
} {
|
||||
if strings.Contains(msgID, skip) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// Also check the human-readable message for obvious audit patterns.
|
||||
msg := strings.ToLower(strings.TrimSpace(asString(entry["Message"])))
|
||||
for _, skip := range []string{"logged in", "logged out", "log in", "log out", "sign in", "signed in"} {
|
||||
if strings.Contains(msg, skip) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// parseRedfishEntryTime parses a Redfish LogEntry Created timestamp (ISO 8601 / RFC 3339).
|
||||
func parseRedfishEntryTime(raw string) time.Time {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return time.Time{}
|
||||
}
|
||||
for _, layout := range []string{time.RFC3339, time.RFC3339Nano, "2006-01-02T15:04:05Z07:00"} {
|
||||
if t, err := time.Parse(layout, raw); err == nil {
|
||||
return t.UTC()
|
||||
}
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
// parseRedfishLogEntries converts raw log entries stored in RawPayloads into models.Event slice.
|
||||
// Called during Redfish replay for both live and offline (archive) collections.
|
||||
func parseRedfishLogEntries(rawPayloads map[string]any, collectedAt time.Time) []models.Event {
|
||||
raw, ok := rawPayloads["redfish_log_entries"]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
var entries []map[string]interface{}
|
||||
switch v := raw.(type) {
|
||||
case []map[string]interface{}:
|
||||
entries = v
|
||||
case []interface{}:
|
||||
for _, item := range v {
|
||||
if m, ok := item.(map[string]interface{}); ok {
|
||||
entries = append(entries, m)
|
||||
}
|
||||
}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(entries) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
out := make([]models.Event, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
ev := redfishLogEntryToEvent(entry, collectedAt)
|
||||
if ev == nil {
|
||||
continue
|
||||
}
|
||||
out = append(out, *ev)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// redfishLogEntryToEvent converts a single Redfish LogEntry document to models.Event.
|
||||
func redfishLogEntryToEvent(entry map[string]interface{}, collectedAt time.Time) *models.Event {
|
||||
// Prefer EventTimestamp (actual hardware event time) over Created (Redfish record creation time).
|
||||
ts := parseRedfishEntryTime(asString(entry["EventTimestamp"]))
|
||||
if ts.IsZero() {
|
||||
ts = parseRedfishEntryTime(asString(entry["Created"]))
|
||||
}
|
||||
if ts.IsZero() {
|
||||
ts = collectedAt
|
||||
}
|
||||
|
||||
severity := redfishLogEntrySeverity(entry)
|
||||
sensorType := strings.TrimSpace(asString(entry["SensorType"]))
|
||||
messageID := strings.TrimSpace(asString(entry["MessageId"]))
|
||||
entryType := strings.TrimSpace(asString(entry["EntryType"]))
|
||||
entryCode := strings.TrimSpace(asString(entry["EntryCode"]))
|
||||
|
||||
// SensorName: prefer "Name", fall back to "SensorNumber" + SensorType.
|
||||
sensorName := strings.TrimSpace(asString(entry["Name"]))
|
||||
if sensorName == "" {
|
||||
num := strings.TrimSpace(asString(entry["SensorNumber"]))
|
||||
if num != "" && sensorType != "" {
|
||||
sensorName = sensorType + " " + num
|
||||
}
|
||||
}
|
||||
|
||||
rawMessage := strings.TrimSpace(asString(entry["Message"]))
|
||||
|
||||
// AMI/MSI BMCs dump raw IPMI record fields into Message instead of human-readable text.
|
||||
// Detect this and build a readable description from structured fields instead.
|
||||
description, rawData := redfishDecodeMessage(rawMessage, sensorType, entryCode, entry)
|
||||
if description == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &models.Event{
|
||||
ID: messageID,
|
||||
Timestamp: ts,
|
||||
Source: "redfish",
|
||||
SensorType: sensorType,
|
||||
SensorName: sensorName,
|
||||
EventType: entryType,
|
||||
Severity: severity,
|
||||
Description: description,
|
||||
RawData: rawData,
|
||||
}
|
||||
}
|
||||
|
||||
// redfishDecodeMessage returns a human-readable description and optional raw data.
|
||||
// AMI/MSI BMCs dump raw IPMI record fields into Message as "Key : Value, Key : Value, ..."
|
||||
// instead of a plain human-readable string. We extract the useful decoded fields from it.
|
||||
func redfishDecodeMessage(message, sensorType, entryCode string, entry map[string]interface{}) (description, rawData string) {
|
||||
if !isRawIPMIDump(message) {
|
||||
description = message
|
||||
return
|
||||
}
|
||||
|
||||
rawData = message
|
||||
kv := parseIPMIDumpKV(message)
|
||||
|
||||
// Sensor_Type inside the dump is more specific than the top-level SensorType field.
|
||||
if v := kv["Sensor_Type"]; v != "" {
|
||||
sensorType = v
|
||||
}
|
||||
eventType := kv["Event_Type"] // human-readable IPMI event type, e.g. "Legacy OFF State"
|
||||
|
||||
var parts []string
|
||||
if sensorType != "" {
|
||||
parts = append(parts, sensorType)
|
||||
}
|
||||
if eventType != "" {
|
||||
parts = append(parts, eventType)
|
||||
} else if entryCode != "" {
|
||||
parts = append(parts, entryCode)
|
||||
}
|
||||
description = strings.Join(parts, ": ")
|
||||
return
|
||||
}
|
||||
|
||||
// isRawIPMIDump returns true if the message is an AMI raw IPMI record dump.
|
||||
func isRawIPMIDump(message string) bool {
|
||||
return strings.Contains(message, "Event_Data_1 :") && strings.Contains(message, "Record_Type :")
|
||||
}
|
||||
|
||||
// parseIPMIDumpKV parses the AMI "Key : Value, Key : Value, " format into a map.
|
||||
func parseIPMIDumpKV(message string) map[string]string {
|
||||
out := make(map[string]string)
|
||||
for _, part := range strings.Split(message, ",") {
|
||||
part = strings.TrimSpace(part)
|
||||
idx := strings.Index(part, " : ")
|
||||
if idx < 0 {
|
||||
continue
|
||||
}
|
||||
k := strings.TrimSpace(part[:idx])
|
||||
v := strings.TrimSpace(part[idx+3:])
|
||||
if k != "" && v != "" {
|
||||
out[k] = v
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// redfishLogEntrySeverity maps a Redfish LogEntry to models.Severity.
|
||||
// AMI/MSI BMCs often set Severity="OK" on all SEL records regardless of content,
|
||||
// so we fall back to inferring severity from SensorType when the explicit field is unhelpful.
|
||||
func redfishLogEntrySeverity(entry map[string]interface{}) models.Severity {
|
||||
// Newer Redfish uses MessageSeverity; older uses Severity.
|
||||
raw := strings.ToLower(firstNonEmpty(
|
||||
strings.TrimSpace(asString(entry["MessageSeverity"])),
|
||||
strings.TrimSpace(asString(entry["Severity"])),
|
||||
))
|
||||
switch raw {
|
||||
case "critical":
|
||||
return models.SeverityCritical
|
||||
case "warning":
|
||||
return models.SeverityWarning
|
||||
case "ok", "informational", "":
|
||||
// BMC didn't set a meaningful severity — infer from SensorType.
|
||||
return redfishSeverityFromSensorType(strings.TrimSpace(asString(entry["SensorType"])))
|
||||
default:
|
||||
return models.SeverityInfo
|
||||
}
|
||||
}
|
||||
|
||||
// redfishSeverityFromSensorType infers event severity from the IPMI/Redfish SensorType string.
|
||||
func redfishSeverityFromSensorType(sensorType string) models.Severity {
|
||||
switch strings.ToLower(sensorType) {
|
||||
case "critical interrupt", "processor", "memory", "power unit",
|
||||
"power supply", "drive slot", "system firmware progress":
|
||||
return models.SeverityWarning
|
||||
default:
|
||||
return models.SeverityInfo
|
||||
}
|
||||
}
|
||||
40
internal/collector/redfish_pciids_test.go
Normal file
40
internal/collector/redfish_pciids_test.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseNIC_ResolvesModelFromPCIIDs(t *testing.T) {
|
||||
doc := map[string]interface{}{
|
||||
"Id": "NIC1",
|
||||
"VendorId": "0x8086",
|
||||
"DeviceId": "0x1521",
|
||||
"Model": "0x1521",
|
||||
}
|
||||
|
||||
nic := parseNIC(doc)
|
||||
if nic.Model == "" {
|
||||
t.Fatalf("expected model resolved from pci.ids")
|
||||
}
|
||||
if !strings.Contains(strings.ToUpper(nic.Model), "I350") {
|
||||
t.Fatalf("expected I350 in model, got %q", nic.Model)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePCIeFunction_ResolvesDeviceClassFromPCIIDs(t *testing.T) {
|
||||
doc := map[string]interface{}{
|
||||
"Id": "PCIE1",
|
||||
"VendorId": "0x9005",
|
||||
"DeviceId": "0x028f",
|
||||
"ClassCode": "0x010700",
|
||||
}
|
||||
|
||||
dev := parsePCIeFunction(doc, 0)
|
||||
if dev.DeviceClass == "" || strings.EqualFold(dev.DeviceClass, "PCIe device") {
|
||||
t.Fatalf("expected device class resolved from pci.ids, got %q", dev.DeviceClass)
|
||||
}
|
||||
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(dev.DeviceClass)), "0x") {
|
||||
t.Fatalf("expected resolved name instead of raw hex, got %q", dev.DeviceClass)
|
||||
}
|
||||
}
|
||||
1043
internal/collector/redfish_replay.go
Normal file
1043
internal/collector/redfish_replay.go
Normal file
File diff suppressed because it is too large
Load Diff
159
internal/collector/redfish_replay_fru.go
Normal file
159
internal/collector/redfish_replay_fru.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func (r redfishSnapshotReader) collectBoardFallbackDocs(systemPaths, chassisPaths []string) []map[string]interface{} {
|
||||
out := make([]map[string]interface{}, 0)
|
||||
for _, chassisPath := range chassisPaths {
|
||||
for _, suffix := range []string{"/Boards", "/Backplanes"} {
|
||||
path := joinPath(chassisPath, suffix)
|
||||
if docs, err := r.getCollectionMembers(path); err == nil && len(docs) > 0 {
|
||||
out = append(out, docs...)
|
||||
continue
|
||||
}
|
||||
if doc, err := r.getJSON(path); err == nil && len(doc) > 0 {
|
||||
out = append(out, doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, path := range append(append([]string{}, systemPaths...), chassisPaths...) {
|
||||
for _, suffix := range []string{"/Oem/Public", "/Oem/Public/ThermalConfig", "/ThermalConfig"} {
|
||||
docPath := joinPath(path, suffix)
|
||||
if doc, err := r.getJSON(docPath); err == nil && len(doc) > 0 {
|
||||
out = append(out, doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func applyBoardInfoFallbackFromDocs(board *models.BoardInfo, docs []map[string]interface{}) {
|
||||
if board == nil || len(docs) == 0 {
|
||||
return
|
||||
}
|
||||
for _, doc := range docs {
|
||||
candidate := parseBoardInfoFromFRUDoc(doc)
|
||||
if !isLikelyServerProductName(candidate.ProductName) {
|
||||
continue
|
||||
}
|
||||
if board.Manufacturer == "" {
|
||||
board.Manufacturer = candidate.Manufacturer
|
||||
}
|
||||
if board.ProductName == "" {
|
||||
board.ProductName = candidate.ProductName
|
||||
}
|
||||
if board.SerialNumber == "" {
|
||||
board.SerialNumber = candidate.SerialNumber
|
||||
}
|
||||
if board.PartNumber == "" {
|
||||
board.PartNumber = candidate.PartNumber
|
||||
}
|
||||
if board.Manufacturer != "" && board.ProductName != "" && board.SerialNumber != "" && board.PartNumber != "" {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isLikelyServerProductName(v string) bool {
|
||||
v = strings.TrimSpace(v)
|
||||
if v == "" {
|
||||
return false
|
||||
}
|
||||
n := strings.ToUpper(v)
|
||||
if strings.Contains(n, "NULL") {
|
||||
return false
|
||||
}
|
||||
componentTokens := []string{
|
||||
"DIMM", "DDR", "NVME", "SSD", "HDD", "GPU", "NIC", "RAID",
|
||||
"PSU", "FAN", "BACKPLANE", "FRU",
|
||||
}
|
||||
for _, token := range componentTokens {
|
||||
if strings.Contains(n, strings.ToUpper(token)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// collectAssemblyFRU reads Chassis/*/Assembly documents and returns FRU entries
|
||||
// for subcomponents (backplanes, PSUs, DIMMs, etc.) that carry meaningful
|
||||
// serial or part numbers. Entries already present in dedicated collections
|
||||
// (PSUs, DIMMs) are included here as well so that all FRU data is available
|
||||
// in one place; deduplication by serial is performed.
|
||||
func (r redfishSnapshotReader) collectAssemblyFRU(chassisPaths []string) []models.FRUInfo {
|
||||
seen := make(map[string]struct{})
|
||||
var out []models.FRUInfo
|
||||
|
||||
add := func(fru models.FRUInfo) {
|
||||
key := strings.ToUpper(strings.TrimSpace(fru.SerialNumber))
|
||||
if key == "" {
|
||||
key = strings.ToUpper(strings.TrimSpace(fru.Description + "|" + fru.PartNumber))
|
||||
}
|
||||
if key == "" || key == "|" {
|
||||
return
|
||||
}
|
||||
if _, ok := seen[key]; ok {
|
||||
return
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
out = append(out, fru)
|
||||
}
|
||||
|
||||
for _, chassisPath := range chassisPaths {
|
||||
doc, err := r.getJSON(joinPath(chassisPath, "/Assembly"))
|
||||
if err != nil || len(doc) == 0 {
|
||||
continue
|
||||
}
|
||||
assemblies, _ := doc["Assemblies"].([]interface{})
|
||||
for _, aAny := range assemblies {
|
||||
a, ok := aAny.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
name := strings.TrimSpace(firstNonEmpty(asString(a["Name"]), asString(a["Description"])))
|
||||
model := strings.TrimSpace(asString(a["Model"]))
|
||||
partNumber := strings.TrimSpace(asString(a["PartNumber"]))
|
||||
serial := extractAssemblySerial(a)
|
||||
|
||||
if serial == "" && partNumber == "" {
|
||||
continue
|
||||
}
|
||||
add(models.FRUInfo{
|
||||
Description: name,
|
||||
ProductName: model,
|
||||
SerialNumber: serial,
|
||||
PartNumber: partNumber,
|
||||
})
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// extractAssemblySerial tries to find a serial number in an Assembly entry.
|
||||
// Standard Redfish Assembly has no top-level SerialNumber; vendors put it in Oem.
|
||||
func extractAssemblySerial(a map[string]interface{}) string {
|
||||
if s := strings.TrimSpace(asString(a["SerialNumber"])); s != "" {
|
||||
return s
|
||||
}
|
||||
oem, _ := a["Oem"].(map[string]interface{})
|
||||
for _, v := range oem {
|
||||
subtree, ok := v.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, v2 := range subtree {
|
||||
node, ok := v2.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if s := strings.TrimSpace(asString(node["SerialNumber"])); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
198
internal/collector/redfish_replay_gpu.go
Normal file
198
internal/collector/redfish_replay_gpu.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/collector/redfishprofile"
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string, plan redfishprofile.ResolvedAnalysisPlan) []models.GPU {
|
||||
collections := make([]string, 0, len(systemPaths)*3+len(chassisPaths)*2)
|
||||
for _, systemPath := range systemPaths {
|
||||
collections = append(collections, joinPath(systemPath, "/PCIeDevices"))
|
||||
collections = append(collections, joinPath(systemPath, "/Accelerators"))
|
||||
collections = append(collections, joinPath(systemPath, "/GraphicsControllers"))
|
||||
}
|
||||
for _, chassisPath := range chassisPaths {
|
||||
collections = append(collections, joinPath(chassisPath, "/PCIeDevices"))
|
||||
collections = append(collections, joinPath(chassisPath, "/Accelerators"))
|
||||
}
|
||||
var out []models.GPU
|
||||
seen := make(map[string]struct{})
|
||||
idx := 1
|
||||
for _, collectionPath := range collections {
|
||||
memberDocs, err := r.getCollectionMembers(collectionPath)
|
||||
if err != nil || len(memberDocs) == 0 {
|
||||
continue
|
||||
}
|
||||
for _, doc := range memberDocs {
|
||||
functionDocs := r.getLinkedPCIeFunctions(doc)
|
||||
if !looksLikeGPU(doc, functionDocs) {
|
||||
continue
|
||||
}
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(doc, "EnvironmentMetrics", "Metrics")
|
||||
for _, fn := range functionDocs {
|
||||
supplementalDocs = append(supplementalDocs, r.getLinkedSupplementalDocs(fn, "EnvironmentMetrics", "Metrics")...)
|
||||
}
|
||||
gpu := parseGPUWithSupplementalDocs(doc, functionDocs, supplementalDocs, idx)
|
||||
idx++
|
||||
if plan.Directives.EnableGenericGraphicsControllerDedup && shouldSkipGenericGPUDuplicate(out, gpu) {
|
||||
continue
|
||||
}
|
||||
key := gpuDocDedupKey(doc, gpu)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
out = append(out, gpu)
|
||||
}
|
||||
}
|
||||
if plan.Directives.EnableGenericGraphicsControllerDedup {
|
||||
return dropModelOnlyGPUPlaceholders(out)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// msiGhostGPUFilter returns true when the GPU chassis for gpuID shows a temperature
|
||||
// of 0 on a powered-on host, which is the reliable MSI/AMI signal that the GPU is
|
||||
// no longer physically installed (stale BMC inventory cache).
|
||||
// It only filters when the system PowerState is "On" — when the host is off, all
|
||||
// temperature readings are 0 and we cannot distinguish absent from idle.
|
||||
func (r redfishSnapshotReader) msiGhostGPUFilter(systemPaths []string, gpuID, chassisPath string) bool {
|
||||
// Require host powered on.
|
||||
for _, sp := range systemPaths {
|
||||
doc, err := r.getJSON(sp)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if !strings.EqualFold(strings.TrimSpace(asString(doc["PowerState"])), "on") {
|
||||
return false
|
||||
}
|
||||
break
|
||||
}
|
||||
// Read the temperature sensor for this GPU chassis.
|
||||
sensorPath := joinPath(chassisPath, "/Sensors/"+gpuID+"_Temperature")
|
||||
sensorDoc, err := r.getJSON(sensorPath)
|
||||
if err != nil || len(sensorDoc) == 0 {
|
||||
return false
|
||||
}
|
||||
reading, ok := sensorDoc["Reading"]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
switch v := reading.(type) {
|
||||
case float64:
|
||||
return v == 0
|
||||
case int:
|
||||
return v == 0
|
||||
case int64:
|
||||
return v == 0
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// collectGPUsFromProcessors finds GPUs that some BMCs (e.g. MSI) expose as
|
||||
// Processor entries with ProcessorType=GPU rather than as PCIe devices.
|
||||
// It supplements the existing gpus slice (already found via PCIe path),
|
||||
// skipping entries already present by UUID or SerialNumber.
|
||||
// Serial numbers are looked up from Chassis members named after each GPU Id.
|
||||
func (r redfishSnapshotReader) collectGPUsFromProcessors(systemPaths, chassisPaths []string, existing []models.GPU, plan redfishprofile.ResolvedAnalysisPlan) []models.GPU {
|
||||
if !plan.Directives.EnableProcessorGPUFallback {
|
||||
return append([]models.GPU{}, existing...)
|
||||
}
|
||||
chassisByID := make(map[string]map[string]interface{})
|
||||
chassisPathByID := make(map[string]string)
|
||||
for _, cp := range chassisPaths {
|
||||
doc, err := r.getJSON(cp)
|
||||
if err != nil || len(doc) == 0 {
|
||||
continue
|
||||
}
|
||||
id := strings.TrimSpace(asString(doc["Id"]))
|
||||
if id != "" {
|
||||
chassisByID[strings.ToUpper(id)] = doc
|
||||
chassisPathByID[strings.ToUpper(id)] = cp
|
||||
}
|
||||
}
|
||||
|
||||
seenUUID := make(map[string]struct{})
|
||||
seenSerial := make(map[string]struct{})
|
||||
for _, g := range existing {
|
||||
if u := strings.ToUpper(strings.TrimSpace(g.UUID)); u != "" {
|
||||
seenUUID[u] = struct{}{}
|
||||
}
|
||||
if s := strings.ToUpper(strings.TrimSpace(g.SerialNumber)); s != "" {
|
||||
seenSerial[s] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
out := append([]models.GPU{}, existing...)
|
||||
idx := len(existing) + 1
|
||||
for _, systemPath := range systemPaths {
|
||||
procDocs, err := r.getCollectionMembers(joinPath(systemPath, "/Processors"))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, doc := range procDocs {
|
||||
if !strings.EqualFold(strings.TrimSpace(asString(doc["ProcessorType"])), "GPU") {
|
||||
continue
|
||||
}
|
||||
|
||||
gpuID := strings.TrimSpace(asString(doc["Id"]))
|
||||
serial := findFirstNormalizedStringByKeys(doc, "SerialNumber")
|
||||
if serial == "" {
|
||||
serial = resolveProcessorGPUChassisSerial(chassisByID, gpuID, plan)
|
||||
}
|
||||
|
||||
if plan.Directives.EnableMSIGhostGPUFilter {
|
||||
chassisPath := resolveProcessorGPUChassisPath(chassisPathByID, gpuID, plan)
|
||||
if chassisPath != "" && r.msiGhostGPUFilter(systemPaths, gpuID, chassisPath) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
uuid := strings.TrimSpace(asString(doc["UUID"]))
|
||||
uuidKey := strings.ToUpper(uuid)
|
||||
serialKey := strings.ToUpper(serial)
|
||||
|
||||
if uuidKey != "" {
|
||||
if _, dup := seenUUID[uuidKey]; dup {
|
||||
continue
|
||||
}
|
||||
seenUUID[uuidKey] = struct{}{}
|
||||
}
|
||||
if serialKey != "" {
|
||||
if _, dup := seenSerial[serialKey]; dup {
|
||||
continue
|
||||
}
|
||||
seenSerial[serialKey] = struct{}{}
|
||||
}
|
||||
|
||||
slotLabel := firstNonEmpty(
|
||||
redfishLocationLabel(doc["Location"]),
|
||||
redfishLocationLabel(doc["PhysicalLocation"]),
|
||||
)
|
||||
if slotLabel == "" && gpuID != "" {
|
||||
slotLabel = gpuID
|
||||
}
|
||||
if slotLabel == "" {
|
||||
slotLabel = fmt.Sprintf("GPU%d", idx)
|
||||
}
|
||||
out = append(out, models.GPU{
|
||||
Slot: slotLabel,
|
||||
Model: firstNonEmpty(asString(doc["Model"]), asString(doc["Name"])),
|
||||
Manufacturer: asString(doc["Manufacturer"]),
|
||||
PartNumber: asString(doc["PartNumber"]),
|
||||
SerialNumber: serial,
|
||||
UUID: uuid,
|
||||
Status: mapStatus(doc["Status"]),
|
||||
})
|
||||
idx++
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
215
internal/collector/redfish_replay_inventory.go
Normal file
215
internal/collector/redfish_replay_inventory.go
Normal file
@@ -0,0 +1,215 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func (r redfishSnapshotReader) enrichNICsFromNetworkInterfaces(nics *[]models.NetworkAdapter, systemPaths []string) {
|
||||
if nics == nil {
|
||||
return
|
||||
}
|
||||
bySlot := make(map[string]int, len(*nics))
|
||||
for i, nic := range *nics {
|
||||
bySlot[strings.ToLower(strings.TrimSpace(nic.Slot))] = i
|
||||
}
|
||||
|
||||
for _, systemPath := range systemPaths {
|
||||
ifaces, err := r.getCollectionMembers(joinPath(systemPath, "/NetworkInterfaces"))
|
||||
if err != nil || len(ifaces) == 0 {
|
||||
continue
|
||||
}
|
||||
for _, iface := range ifaces {
|
||||
slot := firstNonEmpty(asString(iface["Id"]), asString(iface["Name"]))
|
||||
if strings.TrimSpace(slot) == "" {
|
||||
continue
|
||||
}
|
||||
idx, ok := bySlot[strings.ToLower(strings.TrimSpace(slot))]
|
||||
if !ok {
|
||||
*nics = append(*nics, models.NetworkAdapter{
|
||||
Slot: slot,
|
||||
Present: true,
|
||||
Model: firstNonEmpty(asString(iface["Model"]), asString(iface["Name"])),
|
||||
Status: mapStatus(iface["Status"]),
|
||||
})
|
||||
idx = len(*nics) - 1
|
||||
bySlot[strings.ToLower(strings.TrimSpace(slot))] = idx
|
||||
}
|
||||
|
||||
portsPath := redfishLinkedPath(iface, "NetworkPorts")
|
||||
if portsPath == "" {
|
||||
continue
|
||||
}
|
||||
portDocs, err := r.getCollectionMembers(portsPath)
|
||||
if err != nil || len(portDocs) == 0 {
|
||||
continue
|
||||
}
|
||||
macs := append([]string{}, (*nics)[idx].MACAddresses...)
|
||||
for _, p := range portDocs {
|
||||
macs = append(macs, collectNetworkPortMACs(p)...)
|
||||
}
|
||||
(*nics)[idx].MACAddresses = dedupeStrings(macs)
|
||||
if sanitizeNetworkPortCount((*nics)[idx].PortCount) == 0 {
|
||||
(*nics)[idx].PortCount = len(portDocs)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) collectNICs(chassisPaths []string) []models.NetworkAdapter {
|
||||
var nics []models.NetworkAdapter
|
||||
for _, chassisPath := range chassisPaths {
|
||||
adapterDocs, err := r.getCollectionMembers(joinPath(chassisPath, "/NetworkAdapters"))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, doc := range adapterDocs {
|
||||
nic := parseNIC(doc)
|
||||
for _, pciePath := range networkAdapterPCIeDevicePaths(doc) {
|
||||
pcieDoc, err := r.getJSON(pciePath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
functionDocs := r.getLinkedPCIeFunctions(pcieDoc)
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(pcieDoc, "EnvironmentMetrics", "Metrics")
|
||||
for _, fn := range functionDocs {
|
||||
supplementalDocs = append(supplementalDocs, r.getLinkedSupplementalDocs(fn, "EnvironmentMetrics", "Metrics")...)
|
||||
}
|
||||
enrichNICFromPCIe(&nic, pcieDoc, functionDocs, supplementalDocs)
|
||||
}
|
||||
if len(nic.MACAddresses) == 0 {
|
||||
r.enrichNICMACsFromNetworkDeviceFunctions(&nic, doc)
|
||||
}
|
||||
nics = append(nics, nic)
|
||||
}
|
||||
}
|
||||
return dedupeNetworkAdapters(nics)
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []string) []models.PCIeDevice {
|
||||
collections := make([]string, 0, len(systemPaths)+len(chassisPaths))
|
||||
for _, systemPath := range systemPaths {
|
||||
collections = append(collections, joinPath(systemPath, "/PCIeDevices"))
|
||||
}
|
||||
for _, chassisPath := range chassisPaths {
|
||||
collections = append(collections, joinPath(chassisPath, "/PCIeDevices"))
|
||||
}
|
||||
var out []models.PCIeDevice
|
||||
for _, collectionPath := range collections {
|
||||
memberDocs, err := r.getCollectionMembers(collectionPath)
|
||||
if err != nil || len(memberDocs) == 0 {
|
||||
continue
|
||||
}
|
||||
for _, doc := range memberDocs {
|
||||
functionDocs := r.getLinkedPCIeFunctions(doc)
|
||||
if looksLikeGPU(doc, functionDocs) {
|
||||
continue
|
||||
}
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(doc, "EnvironmentMetrics", "Metrics")
|
||||
supplementalDocs = append(supplementalDocs, r.getChassisScopedPCIeSupplementalDocs(doc)...)
|
||||
for _, fn := range functionDocs {
|
||||
supplementalDocs = append(supplementalDocs, r.getLinkedSupplementalDocs(fn, "EnvironmentMetrics", "Metrics")...)
|
||||
}
|
||||
dev := parsePCIeDeviceWithSupplementalDocs(doc, functionDocs, supplementalDocs)
|
||||
if isUnidentifiablePCIeDevice(dev) {
|
||||
continue
|
||||
}
|
||||
out = append(out, dev)
|
||||
}
|
||||
}
|
||||
for _, systemPath := range systemPaths {
|
||||
functionDocs, err := r.getCollectionMembers(joinPath(systemPath, "/PCIeFunctions"))
|
||||
if err != nil || len(functionDocs) == 0 {
|
||||
continue
|
||||
}
|
||||
for idx, fn := range functionDocs {
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(fn, "EnvironmentMetrics", "Metrics")
|
||||
dev := parsePCIeFunctionWithSupplementalDocs(fn, supplementalDocs, idx+1)
|
||||
out = append(out, dev)
|
||||
}
|
||||
}
|
||||
return dedupePCIeDevices(out)
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) getChassisScopedPCIeSupplementalDocs(doc map[string]interface{}) []map[string]interface{} {
|
||||
if !looksLikeNVSwitchPCIeDoc(doc) {
|
||||
return nil
|
||||
}
|
||||
docPath := normalizeRedfishPath(asString(doc["@odata.id"]))
|
||||
chassisPath := chassisPathForPCIeDoc(docPath)
|
||||
if chassisPath == "" {
|
||||
return nil
|
||||
}
|
||||
out := make([]map[string]interface{}, 0, 4)
|
||||
for _, path := range []string{
|
||||
joinPath(chassisPath, "/EnvironmentMetrics"),
|
||||
joinPath(chassisPath, "/ThermalSubsystem/ThermalMetrics"),
|
||||
} {
|
||||
supplementalDoc, err := r.getJSON(path)
|
||||
if err != nil || len(supplementalDoc) == 0 {
|
||||
continue
|
||||
}
|
||||
out = append(out, supplementalDoc)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// collectBMCMAC returns the MAC address of the first active BMC management
|
||||
// interface found in Managers/*/EthernetInterfaces. Returns empty string if
|
||||
// no MAC is available.
|
||||
func (r redfishSnapshotReader) collectBMCMAC(managerPaths []string) string {
|
||||
for _, managerPath := range managerPaths {
|
||||
members, err := r.getCollectionMembers(joinPath(managerPath, "/EthernetInterfaces"))
|
||||
if err != nil || len(members) == 0 {
|
||||
continue
|
||||
}
|
||||
for _, doc := range members {
|
||||
mac := strings.TrimSpace(firstNonEmpty(
|
||||
asString(doc["PermanentMACAddress"]),
|
||||
asString(doc["MACAddress"]),
|
||||
))
|
||||
if mac == "" || strings.EqualFold(mac, "00:00:00:00:00:00") {
|
||||
continue
|
||||
}
|
||||
return strings.ToUpper(mac)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// enrichNICMACsFromNetworkDeviceFunctions reads the NetworkDeviceFunctions
|
||||
// collection linked from a NetworkAdapter document and populates the NIC's
|
||||
// MACAddresses from each function's Ethernet.PermanentMACAddress / MACAddress.
|
||||
// Called when PCIe-path enrichment does not produce any MACs.
|
||||
func (r redfishSnapshotReader) enrichNICMACsFromNetworkDeviceFunctions(nic *models.NetworkAdapter, adapterDoc map[string]interface{}) {
|
||||
ndfCol, ok := adapterDoc["NetworkDeviceFunctions"].(map[string]interface{})
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
colPath := asString(ndfCol["@odata.id"])
|
||||
if colPath == "" {
|
||||
return
|
||||
}
|
||||
funcDocs, err := r.getCollectionMembers(colPath)
|
||||
if err != nil || len(funcDocs) == 0 {
|
||||
return
|
||||
}
|
||||
for _, fn := range funcDocs {
|
||||
eth, _ := fn["Ethernet"].(map[string]interface{})
|
||||
if eth == nil {
|
||||
continue
|
||||
}
|
||||
mac := strings.TrimSpace(firstNonEmpty(
|
||||
asString(eth["PermanentMACAddress"]),
|
||||
asString(eth["MACAddress"]),
|
||||
))
|
||||
if mac == "" {
|
||||
continue
|
||||
}
|
||||
nic.MACAddresses = dedupeStrings(append(nic.MACAddresses, strings.ToUpper(mac)))
|
||||
}
|
||||
if len(funcDocs) > 0 && nic.PortCount == 0 {
|
||||
nic.PortCount = sanitizeNetworkPortCount(len(funcDocs))
|
||||
}
|
||||
}
|
||||
100
internal/collector/redfish_replay_profiles.go
Normal file
100
internal/collector/redfish_replay_profiles.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/collector/redfishprofile"
|
||||
)
|
||||
|
||||
func (r redfishSnapshotReader) collectKnownStorageMembers(systemPath string, relativeCollections []string) []map[string]interface{} {
|
||||
var out []map[string]interface{}
|
||||
for _, rel := range relativeCollections {
|
||||
docs, err := r.getCollectionMembers(joinPath(systemPath, rel))
|
||||
if err != nil || len(docs) == 0 {
|
||||
continue
|
||||
}
|
||||
out = append(out, docs...)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) probeSupermicroNVMeDiskBays(backplanePath string) []map[string]interface{} {
|
||||
return r.probeDirectDiskBayChildren(joinPath(backplanePath, "/Drives"))
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) probeDirectDiskBayChildren(drivesCollectionPath string) []map[string]interface{} {
|
||||
var out []map[string]interface{}
|
||||
for _, path := range directDiskBayCandidates(drivesCollectionPath) {
|
||||
doc, err := r.getJSON(path)
|
||||
if err != nil || !looksLikeDrive(doc) {
|
||||
continue
|
||||
}
|
||||
out = append(out, doc)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func resolveProcessorGPUChassisSerial(chassisByID map[string]map[string]interface{}, gpuID string, plan redfishprofile.ResolvedAnalysisPlan) string {
|
||||
for _, candidateID := range processorGPUChassisCandidateIDs(gpuID, plan) {
|
||||
if chassisDoc, ok := chassisByID[strings.ToUpper(candidateID)]; ok {
|
||||
if serial := strings.TrimSpace(asString(chassisDoc["SerialNumber"])); serial != "" {
|
||||
return serial
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func resolveProcessorGPUChassisPath(chassisPathByID map[string]string, gpuID string, plan redfishprofile.ResolvedAnalysisPlan) string {
|
||||
for _, candidateID := range processorGPUChassisCandidateIDs(gpuID, plan) {
|
||||
if p, ok := chassisPathByID[strings.ToUpper(candidateID)]; ok {
|
||||
return p
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func processorGPUChassisCandidateIDs(gpuID string, plan redfishprofile.ResolvedAnalysisPlan) []string {
|
||||
gpuID = strings.TrimSpace(gpuID)
|
||||
if gpuID == "" {
|
||||
return nil
|
||||
}
|
||||
candidates := []string{gpuID}
|
||||
for _, mode := range plan.ProcessorGPUChassisLookupModes {
|
||||
switch strings.ToLower(strings.TrimSpace(mode)) {
|
||||
case "msi-index":
|
||||
candidates = append(candidates, msiProcessorGPUChassisCandidateIDs(gpuID)...)
|
||||
case "hgx-alias":
|
||||
if strings.HasPrefix(strings.ToUpper(gpuID), "GPU_") {
|
||||
candidates = append(candidates, "HGX_"+gpuID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return dedupeStrings(candidates)
|
||||
}
|
||||
|
||||
func msiProcessorGPUChassisCandidateIDs(gpuID string) []string {
|
||||
gpuID = strings.TrimSpace(strings.ToUpper(gpuID))
|
||||
if gpuID == "" {
|
||||
return nil
|
||||
}
|
||||
var out []string
|
||||
switch {
|
||||
case strings.HasPrefix(gpuID, "GPU_SXM_"):
|
||||
index := strings.TrimPrefix(gpuID, "GPU_SXM_")
|
||||
if index != "" {
|
||||
out = append(out, "GPU"+index, "GPU_"+index)
|
||||
}
|
||||
case strings.HasPrefix(gpuID, "GPU_"):
|
||||
index := strings.TrimPrefix(gpuID, "GPU_")
|
||||
if index != "" {
|
||||
out = append(out, "GPU"+index, "GPU_SXM_"+index)
|
||||
}
|
||||
case strings.HasPrefix(gpuID, "GPU"):
|
||||
index := strings.TrimPrefix(gpuID, "GPU")
|
||||
if index != "" {
|
||||
out = append(out, "GPU_"+index, "GPU_SXM_"+index)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
164
internal/collector/redfish_replay_storage.go
Normal file
164
internal/collector/redfish_replay_storage.go
Normal file
@@ -0,0 +1,164 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"git.mchus.pro/mchus/logpile/internal/collector/redfishprofile"
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func (r redfishSnapshotReader) collectStorage(systemPath string, plan redfishprofile.ResolvedAnalysisPlan) []models.Storage {
|
||||
var out []models.Storage
|
||||
storageMembers, _ := r.getCollectionMembers(joinPath(systemPath, "/Storage"))
|
||||
for _, member := range storageMembers {
|
||||
if driveCollection, ok := member["Drives"].(map[string]interface{}); ok {
|
||||
if driveCollectionPath := asString(driveCollection["@odata.id"]); driveCollectionPath != "" {
|
||||
driveDocs, err := r.getCollectionMembers(driveCollectionPath)
|
||||
if err == nil {
|
||||
for _, driveDoc := range driveDocs {
|
||||
if !isVirtualStorageDrive(driveDoc) {
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(driveDoc, "DriveMetrics", "EnvironmentMetrics", "Metrics")
|
||||
out = append(out, parseDriveWithSupplementalDocs(driveDoc, supplementalDocs...))
|
||||
}
|
||||
}
|
||||
if len(driveDocs) == 0 {
|
||||
for _, driveDoc := range r.probeDirectDiskBayChildren(driveCollectionPath) {
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(driveDoc, "DriveMetrics", "EnvironmentMetrics", "Metrics")
|
||||
out = append(out, parseDriveWithSupplementalDocs(driveDoc, supplementalDocs...))
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
if drives, ok := member["Drives"].([]interface{}); ok {
|
||||
for _, driveAny := range drives {
|
||||
driveRef, ok := driveAny.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
odata := asString(driveRef["@odata.id"])
|
||||
if odata == "" {
|
||||
continue
|
||||
}
|
||||
driveDoc, err := r.getJSON(odata)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if !isVirtualStorageDrive(driveDoc) {
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(driveDoc, "DriveMetrics", "EnvironmentMetrics", "Metrics")
|
||||
out = append(out, parseDriveWithSupplementalDocs(driveDoc, supplementalDocs...))
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
if looksLikeDrive(member) {
|
||||
if isVirtualStorageDrive(member) {
|
||||
continue
|
||||
}
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(member, "DriveMetrics", "EnvironmentMetrics", "Metrics")
|
||||
out = append(out, parseDriveWithSupplementalDocs(member, supplementalDocs...))
|
||||
}
|
||||
|
||||
if plan.Directives.EnableStorageEnclosureRecovery {
|
||||
for _, enclosurePath := range redfishLinkRefs(member, "Links", "Enclosures") {
|
||||
driveDocs, err := r.getCollectionMembers(joinPath(enclosurePath, "/Drives"))
|
||||
if err == nil {
|
||||
for _, driveDoc := range driveDocs {
|
||||
if looksLikeDrive(driveDoc) && !isVirtualStorageDrive(driveDoc) {
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(driveDoc, "DriveMetrics", "EnvironmentMetrics", "Metrics")
|
||||
out = append(out, parseDriveWithSupplementalDocs(driveDoc, supplementalDocs...))
|
||||
}
|
||||
}
|
||||
if len(driveDocs) == 0 {
|
||||
for _, driveDoc := range r.probeDirectDiskBayChildren(joinPath(enclosurePath, "/Drives")) {
|
||||
if isVirtualStorageDrive(driveDoc) {
|
||||
continue
|
||||
}
|
||||
out = append(out, parseDrive(driveDoc))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(plan.KnownStorageDriveCollections) > 0 {
|
||||
for _, driveDoc := range r.collectKnownStorageMembers(systemPath, plan.KnownStorageDriveCollections) {
|
||||
if looksLikeDrive(driveDoc) && !isVirtualStorageDrive(driveDoc) {
|
||||
supplementalDocs := r.getLinkedSupplementalDocs(driveDoc, "DriveMetrics", "EnvironmentMetrics", "Metrics")
|
||||
out = append(out, parseDriveWithSupplementalDocs(driveDoc, supplementalDocs...))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
simpleStorageMembers, _ := r.getCollectionMembers(joinPath(systemPath, "/SimpleStorage"))
|
||||
for _, member := range simpleStorageMembers {
|
||||
devices, ok := member["Devices"].([]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, devAny := range devices {
|
||||
devDoc, ok := devAny.(map[string]interface{})
|
||||
if !ok || !looksLikeDrive(devDoc) || isVirtualStorageDrive(devDoc) {
|
||||
continue
|
||||
}
|
||||
out = append(out, parseDrive(devDoc))
|
||||
}
|
||||
}
|
||||
|
||||
chassisPaths := r.discoverMemberPaths("/redfish/v1/Chassis", "/redfish/v1/Chassis/1")
|
||||
for _, chassisPath := range chassisPaths {
|
||||
driveDocs, err := r.getCollectionMembers(joinPath(chassisPath, "/Drives"))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, driveDoc := range driveDocs {
|
||||
if !looksLikeDrive(driveDoc) || isVirtualStorageDrive(driveDoc) {
|
||||
continue
|
||||
}
|
||||
out = append(out, parseDrive(driveDoc))
|
||||
}
|
||||
}
|
||||
if plan.Directives.EnableSupermicroNVMeBackplane {
|
||||
for _, chassisPath := range chassisPaths {
|
||||
if !isSupermicroNVMeBackplanePath(chassisPath) {
|
||||
continue
|
||||
}
|
||||
for _, driveDoc := range r.probeSupermicroNVMeDiskBays(chassisPath) {
|
||||
if !looksLikeDrive(driveDoc) || isVirtualStorageDrive(driveDoc) {
|
||||
continue
|
||||
}
|
||||
out = append(out, parseDrive(driveDoc))
|
||||
}
|
||||
}
|
||||
}
|
||||
return dedupeStorage(out)
|
||||
}
|
||||
|
||||
func (r redfishSnapshotReader) collectStorageVolumes(systemPath string, plan redfishprofile.ResolvedAnalysisPlan) []models.StorageVolume {
|
||||
var out []models.StorageVolume
|
||||
storageMembers, _ := r.getCollectionMembers(joinPath(systemPath, "/Storage"))
|
||||
for _, member := range storageMembers {
|
||||
controller := firstNonEmpty(asString(member["Id"]), asString(member["Name"]))
|
||||
volumeCollectionPath := redfishLinkedPath(member, "Volumes")
|
||||
if volumeCollectionPath == "" {
|
||||
continue
|
||||
}
|
||||
volumeDocs, err := r.getCollectionMembers(volumeCollectionPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, volDoc := range volumeDocs {
|
||||
if looksLikeVolume(volDoc) {
|
||||
out = append(out, parseStorageVolume(volDoc, controller))
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(plan.KnownStorageVolumeCollections) > 0 {
|
||||
for _, volDoc := range r.collectKnownStorageMembers(systemPath, plan.KnownStorageVolumeCollections) {
|
||||
if looksLikeVolume(volDoc) {
|
||||
out = append(out, parseStorageVolume(volDoc, storageControllerFromPath(asString(volDoc["@odata.id"]))))
|
||||
}
|
||||
}
|
||||
}
|
||||
return dedupeStorageVolumes(out)
|
||||
}
|
||||
3263
internal/collector/redfish_test.go
Normal file
3263
internal/collector/redfish_test.go
Normal file
File diff suppressed because it is too large
Load Diff
162
internal/collector/redfishprofile/acquisition.go
Normal file
162
internal/collector/redfishprofile/acquisition.go
Normal file
@@ -0,0 +1,162 @@
|
||||
package redfishprofile
|
||||
|
||||
import "strings"
|
||||
|
||||
func ResolveAcquisitionPlan(match MatchResult, plan AcquisitionPlan, discovered DiscoveredResources, signals MatchSignals) ResolvedAcquisitionPlan {
|
||||
seedGroups := [][]string{
|
||||
baselineSeedPaths(discovered),
|
||||
expandScopedSuffixes(discovered.SystemPaths, plan.ScopedPaths.SystemSeedSuffixes),
|
||||
expandScopedSuffixes(discovered.ChassisPaths, plan.ScopedPaths.ChassisSeedSuffixes),
|
||||
expandScopedSuffixes(discovered.ManagerPaths, plan.ScopedPaths.ManagerSeedSuffixes),
|
||||
plan.SeedPaths,
|
||||
}
|
||||
if plan.Mode == ModeFallback {
|
||||
seedGroups = append(seedGroups, plan.PlanBPaths)
|
||||
}
|
||||
|
||||
criticalGroups := [][]string{
|
||||
baselineCriticalPaths(discovered),
|
||||
expandScopedSuffixes(discovered.SystemPaths, plan.ScopedPaths.SystemCriticalSuffixes),
|
||||
expandScopedSuffixes(discovered.ChassisPaths, plan.ScopedPaths.ChassisCriticalSuffixes),
|
||||
expandScopedSuffixes(discovered.ManagerPaths, plan.ScopedPaths.ManagerCriticalSuffixes),
|
||||
plan.CriticalPaths,
|
||||
}
|
||||
|
||||
resolved := ResolvedAcquisitionPlan{
|
||||
Plan: plan,
|
||||
SeedPaths: mergeResolvedPaths(seedGroups...),
|
||||
CriticalPaths: mergeResolvedPaths(criticalGroups...),
|
||||
}
|
||||
for _, profile := range match.Profiles {
|
||||
profile.RefineAcquisitionPlan(&resolved, discovered, signals)
|
||||
}
|
||||
resolved.SeedPaths = mergeResolvedPaths(resolved.SeedPaths)
|
||||
resolved.CriticalPaths = mergeResolvedPaths(resolved.CriticalPaths, resolved.Plan.CriticalPaths)
|
||||
resolved.Plan.SeedPaths = mergeResolvedPaths(resolved.Plan.SeedPaths)
|
||||
resolved.Plan.CriticalPaths = mergeResolvedPaths(resolved.Plan.CriticalPaths)
|
||||
resolved.Plan.PlanBPaths = mergeResolvedPaths(resolved.Plan.PlanBPaths)
|
||||
return resolved
|
||||
}
|
||||
|
||||
func baselineSeedPaths(discovered DiscoveredResources) []string {
|
||||
var out []string
|
||||
add := func(p string) {
|
||||
if p = normalizePath(p); p != "" {
|
||||
out = append(out, p)
|
||||
}
|
||||
}
|
||||
|
||||
add("/redfish/v1/UpdateService")
|
||||
add("/redfish/v1/UpdateService/FirmwareInventory")
|
||||
|
||||
for _, p := range discovered.SystemPaths {
|
||||
add(p)
|
||||
add(joinPath(p, "/Bios"))
|
||||
add(joinPath(p, "/Oem/Public"))
|
||||
add(joinPath(p, "/Oem/Public/FRU"))
|
||||
add(joinPath(p, "/Processors"))
|
||||
add(joinPath(p, "/Memory"))
|
||||
add(joinPath(p, "/EthernetInterfaces"))
|
||||
add(joinPath(p, "/NetworkInterfaces"))
|
||||
add(joinPath(p, "/PCIeDevices"))
|
||||
add(joinPath(p, "/PCIeFunctions"))
|
||||
add(joinPath(p, "/Accelerators"))
|
||||
add(joinPath(p, "/GraphicsControllers"))
|
||||
add(joinPath(p, "/Storage"))
|
||||
}
|
||||
for _, p := range discovered.ChassisPaths {
|
||||
add(p)
|
||||
add(joinPath(p, "/Oem/Public"))
|
||||
add(joinPath(p, "/Oem/Public/FRU"))
|
||||
add(joinPath(p, "/PCIeDevices"))
|
||||
add(joinPath(p, "/PCIeSlots"))
|
||||
add(joinPath(p, "/NetworkAdapters"))
|
||||
add(joinPath(p, "/Drives"))
|
||||
add(joinPath(p, "/Power"))
|
||||
}
|
||||
for _, p := range discovered.ManagerPaths {
|
||||
add(p)
|
||||
add(joinPath(p, "/EthernetInterfaces"))
|
||||
add(joinPath(p, "/NetworkProtocol"))
|
||||
}
|
||||
return mergeResolvedPaths(out)
|
||||
}
|
||||
|
||||
func baselineCriticalPaths(discovered DiscoveredResources) []string {
|
||||
var out []string
|
||||
for _, group := range [][]string{
|
||||
{"/redfish/v1"},
|
||||
discovered.SystemPaths,
|
||||
discovered.ChassisPaths,
|
||||
discovered.ManagerPaths,
|
||||
} {
|
||||
out = append(out, group...)
|
||||
}
|
||||
return mergeResolvedPaths(out)
|
||||
}
|
||||
|
||||
func expandScopedSuffixes(basePaths, suffixes []string) []string {
|
||||
if len(basePaths) == 0 || len(suffixes) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(basePaths)*len(suffixes))
|
||||
for _, basePath := range basePaths {
|
||||
basePath = normalizePath(basePath)
|
||||
if basePath == "" {
|
||||
continue
|
||||
}
|
||||
for _, suffix := range suffixes {
|
||||
suffix = strings.TrimSpace(suffix)
|
||||
if suffix == "" {
|
||||
continue
|
||||
}
|
||||
out = append(out, joinPath(basePath, suffix))
|
||||
}
|
||||
}
|
||||
return mergeResolvedPaths(out)
|
||||
}
|
||||
|
||||
func mergeResolvedPaths(groups ...[]string) []string {
|
||||
seen := make(map[string]struct{})
|
||||
out := make([]string, 0)
|
||||
for _, group := range groups {
|
||||
for _, path := range group {
|
||||
path = normalizePath(path)
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[path]; ok {
|
||||
continue
|
||||
}
|
||||
seen[path] = struct{}{}
|
||||
out = append(out, path)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizePath(path string) string {
|
||||
path = strings.TrimSpace(path)
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
if !strings.HasPrefix(path, "/") {
|
||||
path = "/" + path
|
||||
}
|
||||
return strings.TrimRight(path, "/")
|
||||
}
|
||||
|
||||
func joinPath(base, rel string) string {
|
||||
base = normalizePath(base)
|
||||
rel = strings.TrimSpace(rel)
|
||||
if base == "" {
|
||||
return normalizePath(rel)
|
||||
}
|
||||
if rel == "" {
|
||||
return base
|
||||
}
|
||||
if !strings.HasPrefix(rel, "/") {
|
||||
rel = "/" + rel
|
||||
}
|
||||
return normalizePath(base + rel)
|
||||
}
|
||||
100
internal/collector/redfishprofile/analysis.go
Normal file
100
internal/collector/redfishprofile/analysis.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package redfishprofile
|
||||
|
||||
import "strings"
|
||||
|
||||
func ResolveAnalysisPlan(match MatchResult, snapshot map[string]interface{}, discovered DiscoveredResources, signals MatchSignals) ResolvedAnalysisPlan {
|
||||
plan := ResolvedAnalysisPlan{
|
||||
Match: match,
|
||||
Directives: AnalysisDirectives{},
|
||||
}
|
||||
if match.Mode == ModeFallback {
|
||||
plan.Directives.EnableProcessorGPUFallback = true
|
||||
plan.Directives.EnableSupermicroNVMeBackplane = true
|
||||
plan.Directives.EnableProcessorGPUChassisAlias = true
|
||||
plan.Directives.EnableGenericGraphicsControllerDedup = true
|
||||
plan.Directives.EnableStorageEnclosureRecovery = true
|
||||
plan.Directives.EnableKnownStorageControllerRecovery = true
|
||||
addAnalysisLookupMode(&plan, "msi-index")
|
||||
addAnalysisLookupMode(&plan, "hgx-alias")
|
||||
addAnalysisStorageDriveCollections(&plan,
|
||||
"/Storage/IntelVROC/Drives",
|
||||
"/Storage/IntelVROC/Controllers/1/Drives",
|
||||
)
|
||||
addAnalysisStorageVolumeCollections(&plan,
|
||||
"/Storage/IntelVROC/Volumes",
|
||||
"/Storage/HA-RAID/Volumes",
|
||||
"/Storage/MRVL.HA-RAID/Volumes",
|
||||
)
|
||||
addAnalysisNote(&plan, "fallback analysis enables broad recovery directives")
|
||||
}
|
||||
for _, profile := range match.Profiles {
|
||||
profile.ApplyAnalysisDirectives(&plan.Directives, signals)
|
||||
}
|
||||
for _, profile := range match.Profiles {
|
||||
profile.RefineAnalysisPlan(&plan, snapshot, discovered, signals)
|
||||
}
|
||||
return plan
|
||||
}
|
||||
|
||||
func snapshotHasPathPrefix(snapshot map[string]interface{}, prefix string) bool {
|
||||
prefix = normalizePath(prefix)
|
||||
if prefix == "" {
|
||||
return false
|
||||
}
|
||||
for path := range snapshot {
|
||||
if strings.HasPrefix(normalizePath(path), prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func snapshotHasPathContaining(snapshot map[string]interface{}, sub string) bool {
|
||||
sub = strings.ToLower(strings.TrimSpace(sub))
|
||||
if sub == "" {
|
||||
return false
|
||||
}
|
||||
for path := range snapshot {
|
||||
if strings.Contains(strings.ToLower(path), sub) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func snapshotHasGPUProcessor(snapshot map[string]interface{}, systemPaths []string) bool {
|
||||
for _, systemPath := range systemPaths {
|
||||
prefix := normalizePath(joinPath(systemPath, "/Processors")) + "/"
|
||||
for path, docAny := range snapshot {
|
||||
if !strings.HasPrefix(normalizePath(path), prefix) {
|
||||
continue
|
||||
}
|
||||
doc, ok := docAny.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if strings.EqualFold(strings.TrimSpace(asString(doc["ProcessorType"])), "GPU") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func snapshotHasStorageControllerHint(snapshot map[string]interface{}, needles ...string) bool {
|
||||
for _, needle := range needles {
|
||||
if snapshotHasPathContaining(snapshot, needle) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func asString(v interface{}) string {
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
return x
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
450
internal/collector/redfishprofile/fixture_test.go
Normal file
450
internal/collector/redfishprofile/fixture_test.go
Normal file
@@ -0,0 +1,450 @@
|
||||
package redfishprofile
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_MSI_CG480(t *testing.T) {
|
||||
signals := loadProfileFixtureSignals(t, "msi-cg480.json")
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, discoveredResourcesFromSignals(signals), signals)
|
||||
|
||||
if match.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode, got %q", match.Mode)
|
||||
}
|
||||
assertProfileSelected(t, match, "msi")
|
||||
assertProfileSelected(t, match, "ami-family")
|
||||
assertProfileNotSelected(t, match, "hgx-topology")
|
||||
|
||||
if plan.Tuning.PrefetchWorkers < 6 {
|
||||
t.Fatalf("expected msi prefetch worker tuning, got %d", plan.Tuning.PrefetchWorkers)
|
||||
}
|
||||
if !containsString(resolved.SeedPaths, "/redfish/v1/Chassis/GPU1") {
|
||||
t.Fatalf("expected MSI chassis GPU seed path")
|
||||
}
|
||||
if !containsString(resolved.CriticalPaths, "/redfish/v1/Chassis/GPU1/Sensors") {
|
||||
t.Fatal("expected MSI GPU sensor critical path")
|
||||
}
|
||||
if !containsString(resolved.Plan.PlanBPaths, "/redfish/v1/Chassis/GPU1/Sensors") {
|
||||
t.Fatal("expected MSI GPU sensor plan-b path")
|
||||
}
|
||||
if plan.Tuning.ETABaseline.SnapshotSeconds <= 0 {
|
||||
t.Fatal("expected MSI snapshot eta baseline")
|
||||
}
|
||||
if !plan.Tuning.PostProbePolicy.EnableNumericCollectionProbe {
|
||||
t.Fatal("expected MSI fixture to inherit generic numeric post-probe policy")
|
||||
}
|
||||
if !containsString(plan.ScopedPaths.SystemSeedSuffixes, "/SimpleStorage") {
|
||||
t.Fatal("expected MSI fixture to inherit generic SimpleStorage scoped seed suffix")
|
||||
}
|
||||
if !containsString(plan.ScopedPaths.SystemCriticalSuffixes, "/Memory") {
|
||||
t.Fatal("expected MSI fixture to inherit generic system critical suffixes")
|
||||
}
|
||||
if !containsString(plan.Tuning.PrefetchPolicy.IncludeSuffixes, "/Storage") {
|
||||
t.Fatal("expected MSI fixture to inherit generic storage prefetch policy")
|
||||
}
|
||||
if !containsString(plan.CriticalPaths, "/redfish/v1/UpdateService") {
|
||||
t.Fatal("expected MSI fixture to inherit generic top-level critical path")
|
||||
}
|
||||
if !plan.Tuning.RecoveryPolicy.EnableProfilePlanB {
|
||||
t.Fatal("expected MSI fixture to enable profile plan-b")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_MSI_CG480_CopyMatchesSameProfiles(t *testing.T) {
|
||||
originalSignals := loadProfileFixtureSignals(t, "msi-cg480.json")
|
||||
copySignals := loadProfileFixtureSignals(t, "msi-cg480-copy.json")
|
||||
originalMatch := MatchProfiles(originalSignals)
|
||||
copyMatch := MatchProfiles(copySignals)
|
||||
originalPlan := BuildAcquisitionPlan(originalSignals)
|
||||
copyPlan := BuildAcquisitionPlan(copySignals)
|
||||
originalResolved := ResolveAcquisitionPlan(originalMatch, originalPlan, discoveredResourcesFromSignals(originalSignals), originalSignals)
|
||||
copyResolved := ResolveAcquisitionPlan(copyMatch, copyPlan, discoveredResourcesFromSignals(copySignals), copySignals)
|
||||
|
||||
assertSameProfileNames(t, originalMatch, copyMatch)
|
||||
if originalPlan.Tuning.PrefetchWorkers != copyPlan.Tuning.PrefetchWorkers {
|
||||
t.Fatalf("expected same MSI prefetch worker tuning, got %d vs %d", originalPlan.Tuning.PrefetchWorkers, copyPlan.Tuning.PrefetchWorkers)
|
||||
}
|
||||
if containsString(originalResolved.SeedPaths, "/redfish/v1/Chassis/GPU1") != containsString(copyResolved.SeedPaths, "/redfish/v1/Chassis/GPU1") {
|
||||
t.Fatal("expected same MSI GPU chassis seed presence in both fixtures")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_MSI_CG290(t *testing.T) {
|
||||
signals := loadProfileFixtureSignals(t, "msi-cg290.json")
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, discoveredResourcesFromSignals(signals), signals)
|
||||
|
||||
if match.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode, got %q", match.Mode)
|
||||
}
|
||||
assertProfileSelected(t, match, "msi")
|
||||
assertProfileSelected(t, match, "ami-family")
|
||||
assertProfileNotSelected(t, match, "hgx-topology")
|
||||
|
||||
if plan.Tuning.PrefetchWorkers < 6 {
|
||||
t.Fatalf("expected MSI prefetch worker tuning, got %d", plan.Tuning.PrefetchWorkers)
|
||||
}
|
||||
if !containsString(resolved.SeedPaths, "/redfish/v1/Chassis/GPU1") {
|
||||
t.Fatalf("expected MSI chassis GPU seed path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_Supermicro_HGX(t *testing.T) {
|
||||
signals := loadProfileFixtureSignals(t, "supermicro-hgx.json")
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
discovered := discoveredResourcesFromSignals(signals)
|
||||
discovered.SystemPaths = dedupeSorted(append(discovered.SystemPaths, "/redfish/v1/Systems/HGX_Baseboard_0"))
|
||||
resolved := ResolveAcquisitionPlan(match, plan, discovered, signals)
|
||||
|
||||
if match.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode, got %q", match.Mode)
|
||||
}
|
||||
assertProfileSelected(t, match, "supermicro")
|
||||
assertProfileSelected(t, match, "hgx-topology")
|
||||
assertProfileNotSelected(t, match, "msi")
|
||||
|
||||
if plan.Tuning.SnapshotMaxDocuments < 180000 {
|
||||
t.Fatalf("expected widened HGX snapshot cap, got %d", plan.Tuning.SnapshotMaxDocuments)
|
||||
}
|
||||
if plan.Tuning.NVMePostProbeEnabled == nil || *plan.Tuning.NVMePostProbeEnabled {
|
||||
t.Fatal("expected HGX fixture to disable NVMe post-probe")
|
||||
}
|
||||
if !containsString(resolved.SeedPaths, "/redfish/v1/Systems/HGX_Baseboard_0/Processors") {
|
||||
t.Fatal("expected HGX baseboard processors seed path")
|
||||
}
|
||||
if !containsString(resolved.CriticalPaths, "/redfish/v1/Systems/HGX_Baseboard_0/Processors") {
|
||||
t.Fatal("expected HGX baseboard processors critical path")
|
||||
}
|
||||
if !containsString(resolved.Plan.PlanBPaths, "/redfish/v1/Systems/HGX_Baseboard_0/Processors") {
|
||||
t.Fatal("expected HGX baseboard processors plan-b path")
|
||||
}
|
||||
if plan.Tuning.ETABaseline.SnapshotSeconds < 300 {
|
||||
t.Fatalf("expected HGX snapshot eta baseline, got %d", plan.Tuning.ETABaseline.SnapshotSeconds)
|
||||
}
|
||||
if !plan.Tuning.PostProbePolicy.EnableDirectNVMEDiskBayProbe {
|
||||
t.Fatal("expected HGX fixture to retain Supermicro direct NVMe disk bay probe policy")
|
||||
}
|
||||
if !containsString(plan.ScopedPaths.SystemCriticalSuffixes, "/Storage/IntelVROC/Drives") {
|
||||
t.Fatal("expected HGX fixture to inherit generic IntelVROC scoped critical suffix")
|
||||
}
|
||||
if !containsString(plan.ScopedPaths.ChassisCriticalSuffixes, "/Assembly") {
|
||||
t.Fatal("expected HGX fixture to inherit generic chassis critical suffixes")
|
||||
}
|
||||
if !containsString(plan.Tuning.PrefetchPolicy.ExcludeContains, "/Assembly") {
|
||||
t.Fatal("expected HGX fixture to inherit generic assembly prefetch exclusion")
|
||||
}
|
||||
if !plan.Tuning.RecoveryPolicy.EnableProfilePlanB {
|
||||
t.Fatal("expected HGX fixture to enable profile plan-b")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_Supermicro_OAM_NoHGX(t *testing.T) {
|
||||
signals := loadProfileFixtureSignals(t, "supermicro-oam-amd.json")
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, discoveredResourcesFromSignals(signals), signals)
|
||||
|
||||
if match.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode, got %q", match.Mode)
|
||||
}
|
||||
assertProfileSelected(t, match, "supermicro")
|
||||
assertProfileNotSelected(t, match, "hgx-topology")
|
||||
assertProfileNotSelected(t, match, "msi")
|
||||
|
||||
if containsString(resolved.SeedPaths, "/redfish/v1/Systems/HGX_Baseboard_0/Processors") {
|
||||
t.Fatal("did not expect HGX baseboard processors seed path for OAM fixture")
|
||||
}
|
||||
if containsString(resolved.CriticalPaths, "/redfish/v1/Systems/HGX_Baseboard_0/Processors") {
|
||||
t.Fatal("did not expect HGX baseboard processors critical path for OAM fixture")
|
||||
}
|
||||
if !containsString(resolved.CriticalPaths, "/redfish/v1/UpdateService/Oem/Supermicro/FirmwareInventory") {
|
||||
t.Fatal("expected Supermicro firmware critical path")
|
||||
}
|
||||
if !containsString(resolved.Plan.PlanBPaths, "/redfish/v1/UpdateService/Oem/Supermicro/FirmwareInventory") {
|
||||
t.Fatal("expected Supermicro firmware plan-b path")
|
||||
}
|
||||
if plan.Tuning.SnapshotMaxDocuments != 150000 {
|
||||
t.Fatalf("expected generic supermicro snapshot cap, got %d", plan.Tuning.SnapshotMaxDocuments)
|
||||
}
|
||||
if plan.Tuning.NVMePostProbeEnabled != nil {
|
||||
t.Fatal("did not expect HGX NVMe tuning for OAM fixture")
|
||||
}
|
||||
if plan.Tuning.ETABaseline.SnapshotSeconds < 180 {
|
||||
t.Fatalf("expected Supermicro snapshot eta baseline, got %d", plan.Tuning.ETABaseline.SnapshotSeconds)
|
||||
}
|
||||
if !plan.Tuning.PostProbePolicy.EnableDirectNVMEDiskBayProbe {
|
||||
t.Fatal("expected Supermicro OAM fixture to use direct NVMe disk bay probe policy")
|
||||
}
|
||||
if !plan.Tuning.PostProbePolicy.EnableNumericCollectionProbe {
|
||||
t.Fatal("expected Supermicro OAM fixture to inherit generic numeric post-probe policy")
|
||||
}
|
||||
if !containsString(plan.ScopedPaths.SystemSeedSuffixes, "/Storage/IntelVROC") {
|
||||
t.Fatal("expected Supermicro OAM fixture to inherit generic IntelVROC scoped seed suffix")
|
||||
}
|
||||
if !plan.Tuning.RecoveryPolicy.EnableProfilePlanB {
|
||||
t.Fatal("expected Supermicro OAM fixture to enable profile plan-b")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_Dell_R750(t *testing.T) {
|
||||
signals := loadProfileFixtureSignals(t, "dell-r750.json")
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/System.Embedded.1"},
|
||||
ChassisPaths: []string{"/redfish/v1/Chassis/System.Embedded.1"},
|
||||
ManagerPaths: []string{"/redfish/v1/Managers/1", "/redfish/v1/Managers/iDRAC.Embedded.1"},
|
||||
}, signals)
|
||||
|
||||
if match.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode, got %q", match.Mode)
|
||||
}
|
||||
assertProfileSelected(t, match, "dell")
|
||||
assertProfileNotSelected(t, match, "supermicro")
|
||||
assertProfileNotSelected(t, match, "hgx-topology")
|
||||
assertProfileNotSelected(t, match, "msi")
|
||||
|
||||
if !plan.Tuning.RecoveryPolicy.EnableProfilePlanB {
|
||||
t.Fatal("expected dell fixture to enable profile plan-b")
|
||||
}
|
||||
if !containsString(resolved.SeedPaths, "/redfish/v1/Managers/iDRAC.Embedded.1") {
|
||||
t.Fatal("expected Dell refinement to add iDRAC manager seed path")
|
||||
}
|
||||
if !containsString(resolved.CriticalPaths, "/redfish/v1/Managers/iDRAC.Embedded.1") {
|
||||
t.Fatal("expected Dell refinement to add iDRAC manager critical path")
|
||||
}
|
||||
directives := ResolveAnalysisPlan(match, nil, DiscoveredResources{}, signals).Directives
|
||||
if !directives.EnableGenericGraphicsControllerDedup {
|
||||
t.Fatal("expected dell fixture to enable graphics controller dedup")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_AMI_Generic(t *testing.T) {
|
||||
signals := loadProfileFixtureSignals(t, "ami-generic.json")
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
|
||||
if match.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode, got %q", match.Mode)
|
||||
}
|
||||
assertProfileSelected(t, match, "ami-family")
|
||||
assertProfileNotSelected(t, match, "msi")
|
||||
assertProfileNotSelected(t, match, "supermicro")
|
||||
assertProfileNotSelected(t, match, "dell")
|
||||
assertProfileNotSelected(t, match, "hgx-topology")
|
||||
|
||||
if plan.Tuning.PrefetchEnabled == nil || !*plan.Tuning.PrefetchEnabled {
|
||||
t.Fatal("expected ami-family fixture to force prefetch enabled")
|
||||
}
|
||||
if !containsString(plan.SeedPaths, "/redfish/v1/Oem/Ami") {
|
||||
t.Fatal("expected ami-family fixture seed path /redfish/v1/Oem/Ami")
|
||||
}
|
||||
if !containsString(plan.SeedPaths, "/redfish/v1/Oem/Ami/InventoryData/Status") {
|
||||
t.Fatal("expected ami-family fixture seed path /redfish/v1/Oem/Ami/InventoryData/Status")
|
||||
}
|
||||
if !containsString(plan.CriticalPaths, "/redfish/v1/UpdateService") {
|
||||
t.Fatal("expected ami-family fixture to inherit generic critical path")
|
||||
}
|
||||
|
||||
directives := ResolveAnalysisPlan(match, nil, DiscoveredResources{}, signals).Directives
|
||||
if !directives.EnableGenericGraphicsControllerDedup {
|
||||
t.Fatal("expected ami-family fixture to enable graphics controller dedup")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_UnknownVendor(t *testing.T) {
|
||||
signals := loadProfileFixtureSignals(t, "unknown-vendor.json")
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/1"},
|
||||
ChassisPaths: []string{"/redfish/v1/Chassis/1"},
|
||||
ManagerPaths: []string{"/redfish/v1/Managers/1"},
|
||||
}, signals)
|
||||
|
||||
if match.Mode != ModeFallback {
|
||||
t.Fatalf("expected fallback mode for unknown vendor, got %q", match.Mode)
|
||||
}
|
||||
if len(match.Profiles) == 0 {
|
||||
t.Fatal("expected fallback to aggregate profiles")
|
||||
}
|
||||
for _, profile := range match.Profiles {
|
||||
if !profile.SafeForFallback() {
|
||||
t.Fatalf("fallback mode included non-safe profile %q", profile.Name())
|
||||
}
|
||||
}
|
||||
|
||||
if plan.Tuning.SnapshotMaxDocuments < 180000 {
|
||||
t.Fatalf("expected fallback to widen snapshot cap, got %d", plan.Tuning.SnapshotMaxDocuments)
|
||||
}
|
||||
if plan.Tuning.PrefetchEnabled == nil || !*plan.Tuning.PrefetchEnabled {
|
||||
t.Fatal("expected fallback fixture to force prefetch enabled")
|
||||
}
|
||||
if !containsString(resolved.CriticalPaths, "/redfish/v1/Systems/1") {
|
||||
t.Fatal("expected fallback resolved critical paths to include discovered system")
|
||||
}
|
||||
|
||||
analysisPlan := ResolveAnalysisPlan(match, nil, DiscoveredResources{}, signals)
|
||||
if !analysisPlan.Directives.EnableProcessorGPUFallback {
|
||||
t.Fatal("expected fallback fixture to enable processor GPU fallback")
|
||||
}
|
||||
if !analysisPlan.Directives.EnableStorageEnclosureRecovery {
|
||||
t.Fatal("expected fallback fixture to enable storage enclosure recovery")
|
||||
}
|
||||
if !analysisPlan.Directives.EnableGenericGraphicsControllerDedup {
|
||||
t.Fatal("expected fallback fixture to enable graphics controller dedup")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_Fixture_xFusion_G5500V7(t *testing.T) {
|
||||
signals := loadProfileFixtureSignals(t, "xfusion-g5500v7.json")
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/1"},
|
||||
ChassisPaths: []string{"/redfish/v1/Chassis/1"},
|
||||
ManagerPaths: []string{"/redfish/v1/Managers/1"},
|
||||
}, signals)
|
||||
|
||||
if match.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode for xFusion, got %q", match.Mode)
|
||||
}
|
||||
assertProfileSelected(t, match, "xfusion")
|
||||
assertProfileNotSelected(t, match, "supermicro")
|
||||
assertProfileNotSelected(t, match, "hgx-topology")
|
||||
assertProfileNotSelected(t, match, "msi")
|
||||
assertProfileNotSelected(t, match, "dell")
|
||||
|
||||
if plan.Tuning.SnapshotMaxDocuments > 150000 {
|
||||
t.Fatalf("expected xfusion snapshot cap <= 150000, got %d", plan.Tuning.SnapshotMaxDocuments)
|
||||
}
|
||||
if plan.Tuning.PrefetchEnabled == nil || !*plan.Tuning.PrefetchEnabled {
|
||||
t.Fatal("expected xfusion fixture to enable prefetch")
|
||||
}
|
||||
if plan.Tuning.ETABaseline.SnapshotSeconds <= 0 {
|
||||
t.Fatal("expected xfusion snapshot eta baseline")
|
||||
}
|
||||
if !containsString(resolved.CriticalPaths, "/redfish/v1/Systems/1") {
|
||||
t.Fatal("expected system path in critical paths")
|
||||
}
|
||||
|
||||
analysisPlan := ResolveAnalysisPlan(match, map[string]interface{}{
|
||||
"/redfish/v1/Systems/1/Processors/Gpu1": map[string]interface{}{"ProcessorType": "GPU"},
|
||||
}, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/1"},
|
||||
}, signals)
|
||||
if !analysisPlan.Directives.EnableProcessorGPUFallback {
|
||||
t.Fatal("expected xfusion analysis to enable processor GPU fallback when GPU processors present")
|
||||
}
|
||||
if !analysisPlan.Directives.EnableGenericGraphicsControllerDedup {
|
||||
t.Fatal("expected xfusion analysis to enable graphics controller dedup")
|
||||
}
|
||||
}
|
||||
|
||||
func loadProfileFixtureSignals(t *testing.T, fixtureName string) MatchSignals {
|
||||
t.Helper()
|
||||
path := filepath.Join("testdata", fixtureName)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read fixture %s: %v", path, err)
|
||||
}
|
||||
var signals MatchSignals
|
||||
if err := json.Unmarshal(data, &signals); err != nil {
|
||||
t.Fatalf("decode fixture %s: %v", path, err)
|
||||
}
|
||||
return normalizeSignals(signals)
|
||||
}
|
||||
|
||||
func assertProfileSelected(t *testing.T, match MatchResult, want string) {
|
||||
t.Helper()
|
||||
for _, profile := range match.Profiles {
|
||||
if profile.Name() == want {
|
||||
return
|
||||
}
|
||||
}
|
||||
t.Fatalf("expected profile %q in %v", want, profileNames(match))
|
||||
}
|
||||
|
||||
func assertProfileNotSelected(t *testing.T, match MatchResult, want string) {
|
||||
t.Helper()
|
||||
for _, profile := range match.Profiles {
|
||||
if profile.Name() == want {
|
||||
t.Fatalf("did not expect profile %q in %v", want, profileNames(match))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func profileNames(match MatchResult) []string {
|
||||
out := make([]string, 0, len(match.Profiles))
|
||||
for _, profile := range match.Profiles {
|
||||
out = append(out, profile.Name())
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func assertSameProfileNames(t *testing.T, left, right MatchResult) {
|
||||
t.Helper()
|
||||
leftNames := profileNames(left)
|
||||
rightNames := profileNames(right)
|
||||
if len(leftNames) != len(rightNames) {
|
||||
t.Fatalf("profile stack size differs: %v vs %v", leftNames, rightNames)
|
||||
}
|
||||
for i := range leftNames {
|
||||
if leftNames[i] != rightNames[i] {
|
||||
t.Fatalf("profile stack differs: %v vs %v", leftNames, rightNames)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func containsString(items []string, want string) bool {
|
||||
for _, item := range items {
|
||||
if item == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func discoveredResourcesFromSignals(signals MatchSignals) DiscoveredResources {
|
||||
var discovered DiscoveredResources
|
||||
for _, hint := range signals.ResourceHints {
|
||||
memberPath := discoveredMemberPath(hint)
|
||||
switch {
|
||||
case strings.HasPrefix(memberPath, "/redfish/v1/Systems/"):
|
||||
discovered.SystemPaths = append(discovered.SystemPaths, memberPath)
|
||||
case strings.HasPrefix(memberPath, "/redfish/v1/Chassis/"):
|
||||
discovered.ChassisPaths = append(discovered.ChassisPaths, memberPath)
|
||||
case strings.HasPrefix(memberPath, "/redfish/v1/Managers/"):
|
||||
discovered.ManagerPaths = append(discovered.ManagerPaths, memberPath)
|
||||
}
|
||||
}
|
||||
discovered.SystemPaths = dedupeSorted(discovered.SystemPaths)
|
||||
discovered.ChassisPaths = dedupeSorted(discovered.ChassisPaths)
|
||||
discovered.ManagerPaths = dedupeSorted(discovered.ManagerPaths)
|
||||
return discovered
|
||||
}
|
||||
|
||||
func discoveredMemberPath(path string) string {
|
||||
path = strings.TrimSpace(path)
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
parts := strings.Split(strings.Trim(path, "/"), "/")
|
||||
if len(parts) < 4 || parts[0] != "redfish" || parts[1] != "v1" {
|
||||
return ""
|
||||
}
|
||||
switch parts[2] {
|
||||
case "Systems", "Chassis", "Managers":
|
||||
return "/" + strings.Join(parts[:4], "/")
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
122
internal/collector/redfishprofile/matcher.go
Normal file
122
internal/collector/redfishprofile/matcher.go
Normal file
@@ -0,0 +1,122 @@
|
||||
package redfishprofile
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
const (
|
||||
ModeMatched = "matched"
|
||||
ModeFallback = "fallback"
|
||||
)
|
||||
|
||||
func MatchProfiles(signals MatchSignals) MatchResult {
|
||||
type scored struct {
|
||||
profile Profile
|
||||
score int
|
||||
}
|
||||
builtins := BuiltinProfiles()
|
||||
candidates := make([]scored, 0, len(builtins))
|
||||
allScores := make([]ProfileScore, 0, len(builtins))
|
||||
for _, profile := range builtins {
|
||||
score := profile.Match(signals)
|
||||
allScores = append(allScores, ProfileScore{
|
||||
Name: profile.Name(),
|
||||
Score: score,
|
||||
Priority: profile.Priority(),
|
||||
})
|
||||
if score <= 0 {
|
||||
continue
|
||||
}
|
||||
candidates = append(candidates, scored{profile: profile, score: score})
|
||||
}
|
||||
sort.Slice(allScores, func(i, j int) bool {
|
||||
if allScores[i].Score == allScores[j].Score {
|
||||
if allScores[i].Priority == allScores[j].Priority {
|
||||
return allScores[i].Name < allScores[j].Name
|
||||
}
|
||||
return allScores[i].Priority < allScores[j].Priority
|
||||
}
|
||||
return allScores[i].Score > allScores[j].Score
|
||||
})
|
||||
sort.Slice(candidates, func(i, j int) bool {
|
||||
if candidates[i].score == candidates[j].score {
|
||||
return candidates[i].profile.Priority() < candidates[j].profile.Priority()
|
||||
}
|
||||
return candidates[i].score > candidates[j].score
|
||||
})
|
||||
if len(candidates) == 0 || candidates[0].score < 60 {
|
||||
profiles := make([]Profile, 0, len(builtins))
|
||||
active := make(map[string]struct{}, len(builtins))
|
||||
for _, profile := range builtins {
|
||||
if profile.SafeForFallback() {
|
||||
profiles = append(profiles, profile)
|
||||
active[profile.Name()] = struct{}{}
|
||||
}
|
||||
}
|
||||
sortProfiles(profiles)
|
||||
for i := range allScores {
|
||||
_, ok := active[allScores[i].Name]
|
||||
allScores[i].Active = ok
|
||||
}
|
||||
return MatchResult{Mode: ModeFallback, Profiles: profiles, Scores: allScores}
|
||||
}
|
||||
profiles := make([]Profile, 0, len(candidates))
|
||||
seen := make(map[string]struct{}, len(candidates))
|
||||
for _, candidate := range candidates {
|
||||
name := candidate.profile.Name()
|
||||
if _, ok := seen[name]; ok {
|
||||
continue
|
||||
}
|
||||
seen[name] = struct{}{}
|
||||
profiles = append(profiles, candidate.profile)
|
||||
}
|
||||
sortProfiles(profiles)
|
||||
for i := range allScores {
|
||||
_, ok := seen[allScores[i].Name]
|
||||
allScores[i].Active = ok
|
||||
}
|
||||
return MatchResult{Mode: ModeMatched, Profiles: profiles, Scores: allScores}
|
||||
}
|
||||
|
||||
func BuildAcquisitionPlan(signals MatchSignals) AcquisitionPlan {
|
||||
match := MatchProfiles(signals)
|
||||
plan := AcquisitionPlan{Mode: match.Mode}
|
||||
for _, profile := range match.Profiles {
|
||||
plan.Profiles = append(plan.Profiles, profile.Name())
|
||||
profile.ExtendAcquisitionPlan(&plan, signals)
|
||||
}
|
||||
plan.Profiles = dedupeSorted(plan.Profiles)
|
||||
plan.SeedPaths = dedupeSorted(plan.SeedPaths)
|
||||
plan.CriticalPaths = dedupeSorted(plan.CriticalPaths)
|
||||
plan.PlanBPaths = dedupeSorted(plan.PlanBPaths)
|
||||
plan.Notes = dedupeSorted(plan.Notes)
|
||||
if plan.Mode == ModeFallback {
|
||||
ensureSnapshotMaxDocuments(&plan, 180000)
|
||||
ensurePrefetchEnabled(&plan, true)
|
||||
addPlanNote(&plan, "fallback acquisition expands safe profile probes")
|
||||
}
|
||||
return plan
|
||||
}
|
||||
|
||||
func ApplyAnalysisProfiles(result *models.AnalysisResult, snapshot map[string]interface{}, signals MatchSignals) MatchResult {
|
||||
match := MatchProfiles(signals)
|
||||
for _, profile := range match.Profiles {
|
||||
profile.PostAnalyze(result, snapshot, signals)
|
||||
}
|
||||
return match
|
||||
}
|
||||
|
||||
func BuildAnalysisDirectives(match MatchResult) AnalysisDirectives {
|
||||
return ResolveAnalysisPlan(match, nil, DiscoveredResources{}, MatchSignals{}).Directives
|
||||
}
|
||||
|
||||
func sortProfiles(profiles []Profile) {
|
||||
sort.Slice(profiles, func(i, j int) bool {
|
||||
if profiles[i].Priority() == profiles[j].Priority() {
|
||||
return profiles[i].Name() < profiles[j].Name()
|
||||
}
|
||||
return profiles[i].Priority() < profiles[j].Priority()
|
||||
})
|
||||
}
|
||||
390
internal/collector/redfishprofile/matcher_test.go
Normal file
390
internal/collector/redfishprofile/matcher_test.go
Normal file
@@ -0,0 +1,390 @@
|
||||
package redfishprofile
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMatchProfiles_UnknownVendorFallsBackToAggregateProfiles(t *testing.T) {
|
||||
match := MatchProfiles(MatchSignals{
|
||||
ServiceRootProduct: "Redfish Server",
|
||||
})
|
||||
if match.Mode != ModeFallback {
|
||||
t.Fatalf("expected fallback mode, got %q", match.Mode)
|
||||
}
|
||||
if len(match.Profiles) < 2 {
|
||||
t.Fatalf("expected aggregated fallback profiles, got %d", len(match.Profiles))
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchProfiles_MSISelectsMatchedMode(t *testing.T) {
|
||||
match := MatchProfiles(MatchSignals{
|
||||
SystemManufacturer: "Micro-Star International Co., Ltd.",
|
||||
ResourceHints: []string{"/redfish/v1/Chassis/GPU1"},
|
||||
})
|
||||
if match.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode, got %q", match.Mode)
|
||||
}
|
||||
found := false
|
||||
for _, profile := range match.Profiles {
|
||||
if profile.Name() == "msi" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatal("expected msi profile to be selected")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_FallbackIncludesProfileNotes(t *testing.T) {
|
||||
plan := BuildAcquisitionPlan(MatchSignals{
|
||||
ServiceRootVendor: "AMI",
|
||||
})
|
||||
if len(plan.Profiles) == 0 {
|
||||
t.Fatal("expected acquisition plan profiles")
|
||||
}
|
||||
if len(plan.Notes) == 0 {
|
||||
t.Fatal("expected acquisition plan notes")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_FallbackAddsBroadCrawlTuning(t *testing.T) {
|
||||
plan := BuildAcquisitionPlan(MatchSignals{
|
||||
ServiceRootProduct: "Unknown Redfish",
|
||||
})
|
||||
if plan.Mode != ModeFallback {
|
||||
t.Fatalf("expected fallback mode, got %q", plan.Mode)
|
||||
}
|
||||
if plan.Tuning.SnapshotMaxDocuments < 180000 {
|
||||
t.Fatalf("expected widened snapshot cap, got %d", plan.Tuning.SnapshotMaxDocuments)
|
||||
}
|
||||
if plan.Tuning.PrefetchEnabled == nil || !*plan.Tuning.PrefetchEnabled {
|
||||
t.Fatal("expected fallback to force prefetch enabled")
|
||||
}
|
||||
if !plan.Tuning.RecoveryPolicy.EnableCriticalCollectionMemberRetry {
|
||||
t.Fatal("expected fallback to inherit critical member retry recovery")
|
||||
}
|
||||
if !plan.Tuning.RecoveryPolicy.EnableCriticalSlowProbe {
|
||||
t.Fatal("expected fallback to inherit critical slow probe recovery")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAcquisitionPlan_HGXDisablesNVMePostProbe(t *testing.T) {
|
||||
plan := BuildAcquisitionPlan(MatchSignals{
|
||||
SystemModel: "HGX B200",
|
||||
ResourceHints: []string{"/redfish/v1/Systems/HGX_Baseboard_0"},
|
||||
})
|
||||
if plan.Mode != ModeMatched {
|
||||
t.Fatalf("expected matched mode, got %q", plan.Mode)
|
||||
}
|
||||
if plan.Tuning.NVMePostProbeEnabled == nil || *plan.Tuning.NVMePostProbeEnabled {
|
||||
t.Fatal("expected hgx profile to disable NVMe post-probe")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAcquisitionPlan_ExpandsScopedPaths(t *testing.T) {
|
||||
signals := MatchSignals{}
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/1", "/redfish/v1/Systems/2"},
|
||||
}, signals)
|
||||
joined := joinResolvedPaths(resolved.SeedPaths)
|
||||
for _, wanted := range []string{
|
||||
"/redfish/v1/Systems/1/SimpleStorage",
|
||||
"/redfish/v1/Systems/1/Storage/IntelVROC",
|
||||
"/redfish/v1/Systems/2/SimpleStorage",
|
||||
"/redfish/v1/Systems/2/Storage/IntelVROC",
|
||||
} {
|
||||
if !containsJoinedPath(joined, wanted) {
|
||||
t.Fatalf("expected resolved seed path %q", wanted)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAcquisitionPlan_CriticalBaselineIsShapedByProfiles(t *testing.T) {
|
||||
signals := MatchSignals{}
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/1"},
|
||||
ChassisPaths: []string{"/redfish/v1/Chassis/1"},
|
||||
ManagerPaths: []string{"/redfish/v1/Managers/1"},
|
||||
}, signals)
|
||||
joined := joinResolvedPaths(resolved.CriticalPaths)
|
||||
for _, wanted := range []string{
|
||||
"/redfish/v1",
|
||||
"/redfish/v1/Systems/1",
|
||||
"/redfish/v1/Systems/1/Memory",
|
||||
"/redfish/v1/Chassis/1/Assembly",
|
||||
"/redfish/v1/Managers/1/NetworkProtocol",
|
||||
"/redfish/v1/UpdateService",
|
||||
} {
|
||||
if !containsJoinedPath(joined, wanted) {
|
||||
t.Fatalf("expected resolved critical path %q", wanted)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAcquisitionPlan_FallbackAppendsPlanBToSeeds(t *testing.T) {
|
||||
signals := MatchSignals{ServiceRootProduct: "Unknown Redfish"}
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
if plan.Mode != ModeFallback {
|
||||
t.Fatalf("expected fallback mode, got %q", plan.Mode)
|
||||
}
|
||||
plan.PlanBPaths = append(plan.PlanBPaths, "/redfish/v1/Systems/1/Oem/TestPlanB")
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/1"},
|
||||
}, signals)
|
||||
if !containsJoinedPath(joinResolvedPaths(resolved.SeedPaths), "/redfish/v1/Systems/1/Oem/TestPlanB") {
|
||||
t.Fatal("expected fallback resolved seeds to include plan-b path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAcquisitionPlan_MSIRefinesDiscoveredGPUChassis(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Micro-Star International Co., Ltd.",
|
||||
ResourceHints: []string{"/redfish/v1/Chassis/GPU1", "/redfish/v1/Chassis/GPU4/Sensors"},
|
||||
}
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
ChassisPaths: []string{"/redfish/v1/Chassis/1", "/redfish/v1/Chassis/GPU1", "/redfish/v1/Chassis/GPU4"},
|
||||
}, signals)
|
||||
joinedSeeds := joinResolvedPaths(resolved.SeedPaths)
|
||||
joinedCritical := joinResolvedPaths(resolved.CriticalPaths)
|
||||
if !containsJoinedPath(joinedSeeds, "/redfish/v1/Chassis/GPU1") || !containsJoinedPath(joinedSeeds, "/redfish/v1/Chassis/GPU4") {
|
||||
t.Fatal("expected MSI refinement to add discovered GPU chassis seed paths")
|
||||
}
|
||||
if containsJoinedPath(joinedSeeds, "/redfish/v1/Chassis/GPU2") {
|
||||
t.Fatal("did not expect undiscovered MSI GPU chassis in resolved seeds")
|
||||
}
|
||||
if !containsJoinedPath(joinedCritical, "/redfish/v1/Chassis/GPU1/Sensors") || !containsJoinedPath(joinedCritical, "/redfish/v1/Chassis/GPU4/Sensors") {
|
||||
t.Fatal("expected MSI refinement to add discovered GPU sensor critical paths")
|
||||
}
|
||||
if containsJoinedPath(joinedCritical, "/redfish/v1/Chassis/GPU3/Sensors") {
|
||||
t.Fatal("did not expect undiscovered MSI GPU sensor critical path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAcquisitionPlan_HGXRefinesDiscoveredBaseboardSystems(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Supermicro",
|
||||
SystemModel: "SYS-821GE-TNHR",
|
||||
ChassisModel: "HGX B200",
|
||||
ResourceHints: []string{
|
||||
"/redfish/v1/Systems/HGX_Baseboard_0",
|
||||
"/redfish/v1/Systems/HGX_Baseboard_0/Processors",
|
||||
"/redfish/v1/Systems/1",
|
||||
},
|
||||
}
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/1", "/redfish/v1/Systems/HGX_Baseboard_0"},
|
||||
}, signals)
|
||||
joinedSeeds := joinResolvedPaths(resolved.SeedPaths)
|
||||
joinedCritical := joinResolvedPaths(resolved.CriticalPaths)
|
||||
if !containsJoinedPath(joinedSeeds, "/redfish/v1/Systems/HGX_Baseboard_0") || !containsJoinedPath(joinedSeeds, "/redfish/v1/Systems/HGX_Baseboard_0/Processors") {
|
||||
t.Fatal("expected HGX refinement to add discovered baseboard system paths")
|
||||
}
|
||||
if !containsJoinedPath(joinedCritical, "/redfish/v1/Systems/HGX_Baseboard_0") || !containsJoinedPath(joinedCritical, "/redfish/v1/Systems/HGX_Baseboard_0/Processors") {
|
||||
t.Fatal("expected HGX refinement to add discovered baseboard critical paths")
|
||||
}
|
||||
if containsJoinedPath(joinedSeeds, "/redfish/v1/Systems/HGX_Baseboard_1") {
|
||||
t.Fatal("did not expect undiscovered HGX baseboard system path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAcquisitionPlan_SupermicroRefinesFirmwareInventoryFromHint(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Supermicro",
|
||||
ResourceHints: []string{
|
||||
"/redfish/v1/UpdateService/Oem/Supermicro/FirmwareInventory",
|
||||
"/redfish/v1/Managers/1/Oem/Supermicro/FanMode",
|
||||
},
|
||||
}
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
ManagerPaths: []string{"/redfish/v1/Managers/1"},
|
||||
}, signals)
|
||||
joinedCritical := joinResolvedPaths(resolved.CriticalPaths)
|
||||
if !containsJoinedPath(joinedCritical, "/redfish/v1/UpdateService/Oem/Supermicro/FirmwareInventory") {
|
||||
t.Fatal("expected Supermicro refinement to add firmware inventory critical path")
|
||||
}
|
||||
if !containsJoinedPath(joinResolvedPaths(resolved.Plan.PlanBPaths), "/redfish/v1/UpdateService/Oem/Supermicro/FirmwareInventory") {
|
||||
t.Fatal("expected Supermicro refinement to add firmware inventory plan-b path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAcquisitionPlan_DellRefinesDiscoveredIDRACManager(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Dell Inc.",
|
||||
ServiceRootProduct: "iDRAC Redfish Service",
|
||||
}
|
||||
match := MatchProfiles(signals)
|
||||
plan := BuildAcquisitionPlan(signals)
|
||||
resolved := ResolveAcquisitionPlan(match, plan, DiscoveredResources{
|
||||
ManagerPaths: []string{"/redfish/v1/Managers/1", "/redfish/v1/Managers/iDRAC.Embedded.1"},
|
||||
}, signals)
|
||||
joinedSeeds := joinResolvedPaths(resolved.SeedPaths)
|
||||
joinedCritical := joinResolvedPaths(resolved.CriticalPaths)
|
||||
if !containsJoinedPath(joinedSeeds, "/redfish/v1/Managers/iDRAC.Embedded.1") {
|
||||
t.Fatal("expected Dell refinement to add discovered iDRAC manager seed path")
|
||||
}
|
||||
if !containsJoinedPath(joinedCritical, "/redfish/v1/Managers/iDRAC.Embedded.1") {
|
||||
t.Fatal("expected Dell refinement to add discovered iDRAC manager critical path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAnalysisDirectives_SupermicroEnablesVendorStorageFallbacks(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Supermicro",
|
||||
SystemModel: "SYS-821GE",
|
||||
}
|
||||
match := MatchProfiles(signals)
|
||||
plan := ResolveAnalysisPlan(match, map[string]interface{}{
|
||||
"/redfish/v1/Chassis/NVMeSSD.1.StorageBackplane/Drives": map[string]interface{}{},
|
||||
}, DiscoveredResources{}, signals)
|
||||
directives := plan.Directives
|
||||
if !directives.EnableSupermicroNVMeBackplane {
|
||||
t.Fatal("expected supermicro nvme backplane fallback")
|
||||
}
|
||||
}
|
||||
|
||||
func joinResolvedPaths(paths []string) string {
|
||||
return "\n" + strings.Join(paths, "\n") + "\n"
|
||||
}
|
||||
|
||||
func containsJoinedPath(joined, want string) bool {
|
||||
return strings.Contains(joined, "\n"+want+"\n")
|
||||
}
|
||||
|
||||
func TestBuildAnalysisDirectives_HGXEnablesGPUFallbacks(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Supermicro",
|
||||
SystemModel: "SYS-821GE-TNHR",
|
||||
ChassisModel: "HGX B200",
|
||||
ResourceHints: []string{"/redfish/v1/Systems/HGX_Baseboard_0", "/redfish/v1/Chassis/HGX_Chassis_0/PCIeDevices/GPU_SXM_1"},
|
||||
}
|
||||
match := MatchProfiles(signals)
|
||||
plan := ResolveAnalysisPlan(match, map[string]interface{}{
|
||||
"/redfish/v1/Systems/HGX_Baseboard_0/Processors/GPU_SXM_1": map[string]interface{}{"ProcessorType": "GPU"},
|
||||
"/redfish/v1/Chassis/HGX_Chassis_0/PCIeDevices/GPU_SXM_1": map[string]interface{}{},
|
||||
}, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/HGX_Baseboard_0"},
|
||||
}, signals)
|
||||
directives := plan.Directives
|
||||
if !directives.EnableProcessorGPUFallback {
|
||||
t.Fatal("expected processor GPU fallback for hgx profile")
|
||||
}
|
||||
if !directives.EnableProcessorGPUChassisAlias {
|
||||
t.Fatal("expected processor GPU chassis alias resolution for hgx profile")
|
||||
}
|
||||
if !directives.EnableGenericGraphicsControllerDedup {
|
||||
t.Fatal("expected graphics-controller dedup for hgx profile")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAnalysisDirectives_MSIEnablesMSIChassisLookup(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Micro-Star International Co., Ltd.",
|
||||
}
|
||||
match := MatchProfiles(signals)
|
||||
plan := ResolveAnalysisPlan(match, map[string]interface{}{
|
||||
"/redfish/v1/Systems/1/Processors/GPU1": map[string]interface{}{"ProcessorType": "GPU"},
|
||||
"/redfish/v1/Chassis/GPU1": map[string]interface{}{},
|
||||
}, DiscoveredResources{
|
||||
SystemPaths: []string{"/redfish/v1/Systems/1"},
|
||||
ChassisPaths: []string{"/redfish/v1/Chassis/GPU1"},
|
||||
}, signals)
|
||||
directives := plan.Directives
|
||||
if !directives.EnableMSIProcessorGPUChassisLookup {
|
||||
t.Fatal("expected MSI processor GPU chassis lookup")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAnalysisDirectives_SupermicroEnablesStorageRecovery(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Supermicro",
|
||||
}
|
||||
match := MatchProfiles(signals)
|
||||
plan := ResolveAnalysisPlan(match, map[string]interface{}{
|
||||
"/redfish/v1/Chassis/1/Drives": map[string]interface{}{},
|
||||
"/redfish/v1/Systems/1/Storage/IntelVROC": map[string]interface{}{},
|
||||
"/redfish/v1/Systems/1/Storage/IntelVROC/Drives": map[string]interface{}{},
|
||||
}, DiscoveredResources{}, signals)
|
||||
directives := plan.Directives
|
||||
if !directives.EnableStorageEnclosureRecovery {
|
||||
t.Fatal("expected storage enclosure recovery for supermicro")
|
||||
}
|
||||
if !directives.EnableKnownStorageControllerRecovery {
|
||||
t.Fatal("expected known storage controller recovery for supermicro")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchProfiles_OrderingIsDeterministic(t *testing.T) {
|
||||
signals := MatchSignals{
|
||||
SystemManufacturer: "Micro-Star International Co., Ltd.",
|
||||
ResourceHints: []string{"/redfish/v1/Chassis/GPU1"},
|
||||
}
|
||||
first := MatchProfiles(signals)
|
||||
second := MatchProfiles(signals)
|
||||
if len(first.Profiles) != len(second.Profiles) {
|
||||
t.Fatalf("profile stack size differs across calls: %d vs %d", len(first.Profiles), len(second.Profiles))
|
||||
}
|
||||
for i := range first.Profiles {
|
||||
if first.Profiles[i].Name() != second.Profiles[i].Name() {
|
||||
t.Fatalf("profile ordering differs at index %d: %q vs %q", i, first.Profiles[i].Name(), second.Profiles[i].Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchProfiles_FallbackOrderingIsDeterministic(t *testing.T) {
|
||||
signals := MatchSignals{ServiceRootProduct: "Unknown Redfish"}
|
||||
first := MatchProfiles(signals)
|
||||
second := MatchProfiles(signals)
|
||||
if first.Mode != ModeFallback || second.Mode != ModeFallback {
|
||||
t.Fatalf("expected fallback mode in both calls")
|
||||
}
|
||||
if len(first.Profiles) != len(second.Profiles) {
|
||||
t.Fatalf("fallback profile stack size differs: %d vs %d", len(first.Profiles), len(second.Profiles))
|
||||
}
|
||||
for i := range first.Profiles {
|
||||
if first.Profiles[i].Name() != second.Profiles[i].Name() {
|
||||
t.Fatalf("fallback profile ordering differs at index %d: %q vs %q", i, first.Profiles[i].Name(), second.Profiles[i].Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchProfiles_FallbackOnlySelectsSafeProfiles(t *testing.T) {
|
||||
match := MatchProfiles(MatchSignals{ServiceRootProduct: "Unknown Generic Redfish Server"})
|
||||
if match.Mode != ModeFallback {
|
||||
t.Fatalf("expected fallback mode, got %q", match.Mode)
|
||||
}
|
||||
for _, profile := range match.Profiles {
|
||||
if !profile.SafeForFallback() {
|
||||
t.Fatalf("fallback mode included non-safe profile %q", profile.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAnalysisDirectives_GenericMatchedKeepsFallbacksDisabled(t *testing.T) {
|
||||
match := MatchResult{
|
||||
Mode: ModeMatched,
|
||||
Profiles: []Profile{genericProfile()},
|
||||
}
|
||||
directives := ResolveAnalysisPlan(match, nil, DiscoveredResources{}, MatchSignals{}).Directives
|
||||
if directives.EnableProcessorGPUFallback {
|
||||
t.Fatal("did not expect processor GPU fallback for generic matched profile")
|
||||
}
|
||||
if directives.EnableSupermicroNVMeBackplane {
|
||||
t.Fatal("did not expect supermicro nvme fallback for generic matched profile")
|
||||
}
|
||||
if directives.EnableGenericGraphicsControllerDedup {
|
||||
t.Fatal("did not expect generic graphics-controller dedup for generic matched profile")
|
||||
}
|
||||
}
|
||||
33
internal/collector/redfishprofile/profile_ami.go
Normal file
33
internal/collector/redfishprofile/profile_ami.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package redfishprofile
|
||||
|
||||
func amiProfile() Profile {
|
||||
return staticProfile{
|
||||
name: "ami-family",
|
||||
priority: 10,
|
||||
safeForFallback: true,
|
||||
matchFn: func(s MatchSignals) int {
|
||||
score := 0
|
||||
if containsFold(s.ServiceRootVendor, "ami") || containsFold(s.ServiceRootProduct, "ami") {
|
||||
score += 70
|
||||
}
|
||||
for _, ns := range s.OEMNamespaces {
|
||||
if containsFold(ns, "ami") {
|
||||
score += 30
|
||||
break
|
||||
}
|
||||
}
|
||||
return min(score, 100)
|
||||
},
|
||||
extendAcquisition: func(plan *AcquisitionPlan, _ MatchSignals) {
|
||||
addPlanPaths(&plan.SeedPaths,
|
||||
"/redfish/v1/Oem/Ami",
|
||||
"/redfish/v1/Oem/Ami/InventoryData/Status",
|
||||
)
|
||||
ensurePrefetchEnabled(plan, true)
|
||||
addPlanNote(plan, "ami-family acquisition extensions enabled")
|
||||
},
|
||||
applyAnalysisDirectives: func(d *AnalysisDirectives, _ MatchSignals) {
|
||||
d.EnableGenericGraphicsControllerDedup = true
|
||||
},
|
||||
}
|
||||
}
|
||||
45
internal/collector/redfishprofile/profile_dell.go
Normal file
45
internal/collector/redfishprofile/profile_dell.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package redfishprofile
|
||||
|
||||
func dellProfile() Profile {
|
||||
return staticProfile{
|
||||
name: "dell",
|
||||
priority: 20,
|
||||
safeForFallback: true,
|
||||
matchFn: func(s MatchSignals) int {
|
||||
score := 0
|
||||
if containsFold(s.SystemManufacturer, "dell") || containsFold(s.ChassisManufacturer, "dell") {
|
||||
score += 80
|
||||
}
|
||||
for _, ns := range s.OEMNamespaces {
|
||||
if containsFold(ns, "dell") {
|
||||
score += 30
|
||||
break
|
||||
}
|
||||
}
|
||||
if containsFold(s.ServiceRootProduct, "idrac") {
|
||||
score += 30
|
||||
}
|
||||
return min(score, 100)
|
||||
},
|
||||
extendAcquisition: func(plan *AcquisitionPlan, _ MatchSignals) {
|
||||
ensureRecoveryPolicy(plan, AcquisitionRecoveryPolicy{
|
||||
EnableProfilePlanB: true,
|
||||
})
|
||||
addPlanNote(plan, "dell iDRAC acquisition extensions enabled")
|
||||
},
|
||||
refineAcquisition: func(resolved *ResolvedAcquisitionPlan, discovered DiscoveredResources, _ MatchSignals) {
|
||||
for _, managerPath := range discovered.ManagerPaths {
|
||||
if !containsFold(managerPath, "idrac") {
|
||||
continue
|
||||
}
|
||||
addPlanPaths(&resolved.SeedPaths, managerPath)
|
||||
addPlanPaths(&resolved.Plan.SeedPaths, managerPath)
|
||||
addPlanPaths(&resolved.CriticalPaths, managerPath)
|
||||
addPlanPaths(&resolved.Plan.CriticalPaths, managerPath)
|
||||
}
|
||||
},
|
||||
applyAnalysisDirectives: func(d *AnalysisDirectives, _ MatchSignals) {
|
||||
d.EnableGenericGraphicsControllerDedup = true
|
||||
},
|
||||
}
|
||||
}
|
||||
115
internal/collector/redfishprofile/profile_generic.go
Normal file
115
internal/collector/redfishprofile/profile_generic.go
Normal file
@@ -0,0 +1,115 @@
|
||||
package redfishprofile
|
||||
|
||||
func genericProfile() Profile {
|
||||
return staticProfile{
|
||||
name: "generic",
|
||||
priority: 100,
|
||||
safeForFallback: true,
|
||||
matchFn: func(MatchSignals) int { return 10 },
|
||||
extendAcquisition: func(plan *AcquisitionPlan, _ MatchSignals) {
|
||||
ensurePrefetchPolicy(plan, AcquisitionPrefetchPolicy{
|
||||
IncludeSuffixes: []string{
|
||||
"/Bios",
|
||||
"/Processors",
|
||||
"/Memory",
|
||||
"/Storage",
|
||||
"/SimpleStorage",
|
||||
"/PCIeDevices",
|
||||
"/PCIeFunctions",
|
||||
"/Accelerators",
|
||||
"/GraphicsControllers",
|
||||
"/EthernetInterfaces",
|
||||
"/NetworkInterfaces",
|
||||
"/NetworkAdapters",
|
||||
"/Drives",
|
||||
"/Power",
|
||||
"/PowerSubsystem/PowerSupplies",
|
||||
"/NetworkProtocol",
|
||||
"/UpdateService",
|
||||
"/UpdateService/FirmwareInventory",
|
||||
},
|
||||
ExcludeContains: []string{
|
||||
"/Fabrics",
|
||||
"/Backplanes",
|
||||
"/Boards",
|
||||
"/Assembly",
|
||||
"/Sensors",
|
||||
"/ThresholdSensors",
|
||||
"/DiscreteSensors",
|
||||
"/ThermalConfig",
|
||||
"/ThermalSubsystem",
|
||||
"/EnvironmentMetrics",
|
||||
"/Certificates",
|
||||
"/LogServices",
|
||||
},
|
||||
})
|
||||
ensureScopedPathPolicy(plan, AcquisitionScopedPathPolicy{
|
||||
SystemCriticalSuffixes: []string{
|
||||
"/Bios",
|
||||
"/Oem/Public",
|
||||
"/Oem/Public/FRU",
|
||||
"/Processors",
|
||||
"/Memory",
|
||||
"/Storage",
|
||||
"/PCIeDevices",
|
||||
"/PCIeFunctions",
|
||||
"/Accelerators",
|
||||
"/GraphicsControllers",
|
||||
"/EthernetInterfaces",
|
||||
"/NetworkInterfaces",
|
||||
"/SimpleStorage",
|
||||
"/Storage/IntelVROC",
|
||||
"/Storage/IntelVROC/Drives",
|
||||
"/Storage/IntelVROC/Volumes",
|
||||
},
|
||||
ChassisCriticalSuffixes: []string{
|
||||
"/Oem/Public",
|
||||
"/Oem/Public/FRU",
|
||||
"/Power",
|
||||
"/NetworkAdapters",
|
||||
"/PCIeDevices",
|
||||
"/Accelerators",
|
||||
"/Drives",
|
||||
"/Assembly",
|
||||
},
|
||||
ManagerCriticalSuffixes: []string{
|
||||
"/NetworkProtocol",
|
||||
},
|
||||
SystemSeedSuffixes: []string{
|
||||
"/SimpleStorage",
|
||||
"/Storage/IntelVROC",
|
||||
"/Storage/IntelVROC/Drives",
|
||||
"/Storage/IntelVROC/Volumes",
|
||||
},
|
||||
})
|
||||
addPlanPaths(&plan.CriticalPaths,
|
||||
"/redfish/v1/UpdateService",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory",
|
||||
)
|
||||
ensureSnapshotMaxDocuments(plan, 100000)
|
||||
ensureSnapshotWorkers(plan, 6)
|
||||
ensurePrefetchWorkers(plan, 4)
|
||||
ensureETABaseline(plan, AcquisitionETABaseline{
|
||||
DiscoverySeconds: 8,
|
||||
SnapshotSeconds: 90,
|
||||
PrefetchSeconds: 20,
|
||||
CriticalPlanBSeconds: 20,
|
||||
ProfilePlanBSeconds: 15,
|
||||
})
|
||||
ensurePostProbePolicy(plan, AcquisitionPostProbePolicy{
|
||||
EnableNumericCollectionProbe: true,
|
||||
})
|
||||
ensureRecoveryPolicy(plan, AcquisitionRecoveryPolicy{
|
||||
EnableCriticalCollectionMemberRetry: true,
|
||||
EnableCriticalSlowProbe: true,
|
||||
})
|
||||
ensureRatePolicy(plan, AcquisitionRatePolicy{
|
||||
TargetP95LatencyMS: 900,
|
||||
ThrottleP95LatencyMS: 1800,
|
||||
MinSnapshotWorkers: 2,
|
||||
MinPrefetchWorkers: 1,
|
||||
DisablePrefetchOnErrors: true,
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
85
internal/collector/redfishprofile/profile_hgx.go
Normal file
85
internal/collector/redfishprofile/profile_hgx.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package redfishprofile
|
||||
|
||||
func hgxProfile() Profile {
|
||||
return staticProfile{
|
||||
name: "hgx-topology",
|
||||
priority: 30,
|
||||
safeForFallback: true,
|
||||
matchFn: func(s MatchSignals) int {
|
||||
score := 0
|
||||
if containsFold(s.SystemModel, "hgx") || containsFold(s.ChassisModel, "hgx") {
|
||||
score += 70
|
||||
}
|
||||
for _, hint := range s.ResourceHints {
|
||||
if containsFold(hint, "hgx_") || containsFold(hint, "gpu_sxm") {
|
||||
score += 20
|
||||
break
|
||||
}
|
||||
}
|
||||
return min(score, 100)
|
||||
},
|
||||
extendAcquisition: func(plan *AcquisitionPlan, _ MatchSignals) {
|
||||
ensureSnapshotMaxDocuments(plan, 180000)
|
||||
ensureSnapshotWorkers(plan, 4)
|
||||
ensurePrefetchWorkers(plan, 4)
|
||||
ensureNVMePostProbeEnabled(plan, false)
|
||||
ensureRecoveryPolicy(plan, AcquisitionRecoveryPolicy{
|
||||
EnableProfilePlanB: true,
|
||||
})
|
||||
ensureETABaseline(plan, AcquisitionETABaseline{
|
||||
DiscoverySeconds: 20,
|
||||
SnapshotSeconds: 300,
|
||||
PrefetchSeconds: 50,
|
||||
CriticalPlanBSeconds: 90,
|
||||
ProfilePlanBSeconds: 40,
|
||||
})
|
||||
ensureRatePolicy(plan, AcquisitionRatePolicy{
|
||||
TargetP95LatencyMS: 1500,
|
||||
ThrottleP95LatencyMS: 3000,
|
||||
MinSnapshotWorkers: 1,
|
||||
MinPrefetchWorkers: 1,
|
||||
DisablePrefetchOnErrors: true,
|
||||
})
|
||||
addPlanNote(plan, "hgx topology acquisition extensions enabled")
|
||||
},
|
||||
refineAcquisition: func(resolved *ResolvedAcquisitionPlan, discovered DiscoveredResources, _ MatchSignals) {
|
||||
for _, systemPath := range discovered.SystemPaths {
|
||||
if !containsFold(systemPath, "hgx_baseboard_") {
|
||||
continue
|
||||
}
|
||||
addPlanPaths(&resolved.SeedPaths, systemPath, joinPath(systemPath, "/Processors"))
|
||||
addPlanPaths(&resolved.Plan.SeedPaths, systemPath, joinPath(systemPath, "/Processors"))
|
||||
addPlanPaths(&resolved.CriticalPaths, systemPath, joinPath(systemPath, "/Processors"))
|
||||
addPlanPaths(&resolved.Plan.CriticalPaths, systemPath, joinPath(systemPath, "/Processors"))
|
||||
addPlanPaths(&resolved.Plan.PlanBPaths, systemPath, joinPath(systemPath, "/Processors"))
|
||||
}
|
||||
},
|
||||
applyAnalysisDirectives: func(d *AnalysisDirectives, _ MatchSignals) {
|
||||
d.EnableGenericGraphicsControllerDedup = true
|
||||
d.EnableStorageEnclosureRecovery = true
|
||||
},
|
||||
refineAnalysis: func(plan *ResolvedAnalysisPlan, snapshot map[string]interface{}, discovered DiscoveredResources, _ MatchSignals) {
|
||||
if snapshotHasGPUProcessor(snapshot, discovered.SystemPaths) && (snapshotHasPathContaining(snapshot, "gpu_sxm") || snapshotHasPathContaining(snapshot, "hgx_")) {
|
||||
plan.Directives.EnableProcessorGPUFallback = true
|
||||
plan.Directives.EnableProcessorGPUChassisAlias = true
|
||||
addAnalysisLookupMode(plan, "hgx-alias")
|
||||
addAnalysisNote(plan, "hgx analysis enables processor-gpu alias fallback from snapshot topology")
|
||||
}
|
||||
if snapshotHasStorageControllerHint(snapshot, "/storage/intelvroc", "/storage/ha-raid", "/storage/mrvl.ha-raid") {
|
||||
plan.Directives.EnableKnownStorageControllerRecovery = true
|
||||
addAnalysisStorageDriveCollections(plan,
|
||||
"/Storage/IntelVROC/Drives",
|
||||
"/Storage/IntelVROC/Controllers/1/Drives",
|
||||
)
|
||||
addAnalysisStorageVolumeCollections(plan,
|
||||
"/Storage/IntelVROC/Volumes",
|
||||
"/Storage/HA-RAID/Volumes",
|
||||
"/Storage/MRVL.HA-RAID/Volumes",
|
||||
)
|
||||
}
|
||||
if snapshotHasPathContaining(snapshot, "/chassis/nvmessd.") && snapshotHasPathContaining(snapshot, ".storagebackplane") {
|
||||
plan.Directives.EnableSupermicroNVMeBackplane = true
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
74
internal/collector/redfishprofile/profile_msi.go
Normal file
74
internal/collector/redfishprofile/profile_msi.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package redfishprofile
|
||||
|
||||
import "strings"
|
||||
|
||||
func msiProfile() Profile {
|
||||
return staticProfile{
|
||||
name: "msi",
|
||||
priority: 20,
|
||||
safeForFallback: true,
|
||||
matchFn: func(s MatchSignals) int {
|
||||
score := 0
|
||||
if containsFold(s.SystemManufacturer, "micro-star") || containsFold(s.ChassisManufacturer, "micro-star") {
|
||||
score += 80
|
||||
}
|
||||
if containsFold(s.SystemManufacturer, "msi") || containsFold(s.ChassisManufacturer, "msi") {
|
||||
score += 40
|
||||
}
|
||||
for _, hint := range s.ResourceHints {
|
||||
if strings.HasPrefix(hint, "/redfish/v1/Chassis/GPU") {
|
||||
score += 10
|
||||
break
|
||||
}
|
||||
}
|
||||
return min(score, 100)
|
||||
},
|
||||
extendAcquisition: func(plan *AcquisitionPlan, _ MatchSignals) {
|
||||
ensureSnapshotWorkers(plan, 6)
|
||||
ensurePrefetchWorkers(plan, 8)
|
||||
ensureETABaseline(plan, AcquisitionETABaseline{
|
||||
DiscoverySeconds: 12,
|
||||
SnapshotSeconds: 120,
|
||||
PrefetchSeconds: 25,
|
||||
CriticalPlanBSeconds: 35,
|
||||
ProfilePlanBSeconds: 25,
|
||||
})
|
||||
ensureRatePolicy(plan, AcquisitionRatePolicy{
|
||||
TargetP95LatencyMS: 1000,
|
||||
ThrottleP95LatencyMS: 2200,
|
||||
MinSnapshotWorkers: 2,
|
||||
MinPrefetchWorkers: 2,
|
||||
DisablePrefetchOnErrors: true,
|
||||
})
|
||||
ensureRecoveryPolicy(plan, AcquisitionRecoveryPolicy{
|
||||
EnableProfilePlanB: true,
|
||||
})
|
||||
addPlanNote(plan, "msi gpu chassis probes enabled")
|
||||
},
|
||||
refineAcquisition: func(resolved *ResolvedAcquisitionPlan, discovered DiscoveredResources, _ MatchSignals) {
|
||||
for _, chassisPath := range discovered.ChassisPaths {
|
||||
if !strings.HasPrefix(chassisPath, "/redfish/v1/Chassis/GPU") {
|
||||
continue
|
||||
}
|
||||
addPlanPaths(&resolved.SeedPaths, chassisPath)
|
||||
addPlanPaths(&resolved.Plan.SeedPaths, chassisPath)
|
||||
addPlanPaths(&resolved.CriticalPaths, joinPath(chassisPath, "/Sensors"))
|
||||
addPlanPaths(&resolved.Plan.CriticalPaths, joinPath(chassisPath, "/Sensors"))
|
||||
addPlanPaths(&resolved.Plan.PlanBPaths, joinPath(chassisPath, "/Sensors"))
|
||||
}
|
||||
},
|
||||
applyAnalysisDirectives: func(d *AnalysisDirectives, _ MatchSignals) {
|
||||
d.EnableGenericGraphicsControllerDedup = true
|
||||
},
|
||||
refineAnalysis: func(plan *ResolvedAnalysisPlan, snapshot map[string]interface{}, discovered DiscoveredResources, _ MatchSignals) {
|
||||
if snapshotHasGPUProcessor(snapshot, discovered.SystemPaths) && snapshotHasPathPrefix(snapshot, "/redfish/v1/Chassis/GPU") {
|
||||
plan.Directives.EnableProcessorGPUFallback = true
|
||||
plan.Directives.EnableMSIProcessorGPUChassisLookup = true
|
||||
plan.Directives.EnableMSIGhostGPUFilter = true
|
||||
addAnalysisLookupMode(plan, "msi-index")
|
||||
addAnalysisNote(plan, "msi analysis enables processor-gpu fallback from discovered GPU chassis")
|
||||
addAnalysisNote(plan, "msi ghost-gpu filter enabled: GPUs with temperature=0 on powered-on host are excluded")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
81
internal/collector/redfishprofile/profile_supermicro.go
Normal file
81
internal/collector/redfishprofile/profile_supermicro.go
Normal file
@@ -0,0 +1,81 @@
|
||||
package redfishprofile
|
||||
|
||||
func supermicroProfile() Profile {
|
||||
return staticProfile{
|
||||
name: "supermicro",
|
||||
priority: 20,
|
||||
safeForFallback: true,
|
||||
matchFn: func(s MatchSignals) int {
|
||||
score := 0
|
||||
if containsFold(s.SystemManufacturer, "supermicro") || containsFold(s.ChassisManufacturer, "supermicro") {
|
||||
score += 80
|
||||
}
|
||||
for _, hint := range s.ResourceHints {
|
||||
if containsFold(hint, "hgx_baseboard") || containsFold(hint, "hgx_gpu_sxm") {
|
||||
score += 20
|
||||
break
|
||||
}
|
||||
}
|
||||
return min(score, 100)
|
||||
},
|
||||
extendAcquisition: func(plan *AcquisitionPlan, _ MatchSignals) {
|
||||
ensureSnapshotMaxDocuments(plan, 150000)
|
||||
ensureSnapshotWorkers(plan, 6)
|
||||
ensurePrefetchWorkers(plan, 4)
|
||||
ensureETABaseline(plan, AcquisitionETABaseline{
|
||||
DiscoverySeconds: 15,
|
||||
SnapshotSeconds: 180,
|
||||
PrefetchSeconds: 35,
|
||||
CriticalPlanBSeconds: 45,
|
||||
ProfilePlanBSeconds: 30,
|
||||
})
|
||||
ensurePostProbePolicy(plan, AcquisitionPostProbePolicy{
|
||||
EnableDirectNVMEDiskBayProbe: true,
|
||||
})
|
||||
ensureRecoveryPolicy(plan, AcquisitionRecoveryPolicy{
|
||||
EnableProfilePlanB: true,
|
||||
})
|
||||
ensureRatePolicy(plan, AcquisitionRatePolicy{
|
||||
TargetP95LatencyMS: 1200,
|
||||
ThrottleP95LatencyMS: 2400,
|
||||
MinSnapshotWorkers: 2,
|
||||
MinPrefetchWorkers: 1,
|
||||
DisablePrefetchOnErrors: true,
|
||||
})
|
||||
addPlanNote(plan, "supermicro acquisition extensions enabled")
|
||||
},
|
||||
refineAcquisition: func(resolved *ResolvedAcquisitionPlan, _ DiscoveredResources, signals MatchSignals) {
|
||||
for _, hint := range signals.ResourceHints {
|
||||
if normalizePath(hint) != "/redfish/v1/UpdateService/Oem/Supermicro/FirmwareInventory" {
|
||||
continue
|
||||
}
|
||||
addPlanPaths(&resolved.CriticalPaths, hint)
|
||||
addPlanPaths(&resolved.Plan.CriticalPaths, hint)
|
||||
addPlanPaths(&resolved.Plan.PlanBPaths, hint)
|
||||
break
|
||||
}
|
||||
},
|
||||
applyAnalysisDirectives: func(d *AnalysisDirectives, _ MatchSignals) {
|
||||
d.EnableStorageEnclosureRecovery = true
|
||||
},
|
||||
refineAnalysis: func(plan *ResolvedAnalysisPlan, snapshot map[string]interface{}, _ DiscoveredResources, _ MatchSignals) {
|
||||
if snapshotHasPathContaining(snapshot, "/chassis/nvmessd.") && snapshotHasPathContaining(snapshot, ".storagebackplane") {
|
||||
plan.Directives.EnableSupermicroNVMeBackplane = true
|
||||
addAnalysisNote(plan, "supermicro analysis enables NVMe backplane recovery from snapshot paths")
|
||||
}
|
||||
if snapshotHasStorageControllerHint(snapshot, "/storage/intelvroc", "/storage/ha-raid", "/storage/mrvl.ha-raid") {
|
||||
plan.Directives.EnableKnownStorageControllerRecovery = true
|
||||
addAnalysisStorageDriveCollections(plan,
|
||||
"/Storage/IntelVROC/Drives",
|
||||
"/Storage/IntelVROC/Controllers/1/Drives",
|
||||
)
|
||||
addAnalysisStorageVolumeCollections(plan,
|
||||
"/Storage/IntelVROC/Volumes",
|
||||
"/Storage/HA-RAID/Volumes",
|
||||
"/Storage/MRVL.HA-RAID/Volumes",
|
||||
)
|
||||
addAnalysisNote(plan, "supermicro analysis enables known storage-controller recovery from snapshot paths")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
55
internal/collector/redfishprofile/profile_xfusion.go
Normal file
55
internal/collector/redfishprofile/profile_xfusion.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package redfishprofile
|
||||
|
||||
func xfusionProfile() Profile {
|
||||
return staticProfile{
|
||||
name: "xfusion",
|
||||
priority: 20,
|
||||
safeForFallback: true,
|
||||
matchFn: func(s MatchSignals) int {
|
||||
score := 0
|
||||
if containsFold(s.ServiceRootVendor, "xfusion") {
|
||||
score += 90
|
||||
}
|
||||
for _, ns := range s.OEMNamespaces {
|
||||
if containsFold(ns, "xfusion") {
|
||||
score += 20
|
||||
break
|
||||
}
|
||||
}
|
||||
if containsFold(s.SystemManufacturer, "xfusion") || containsFold(s.ChassisManufacturer, "xfusion") {
|
||||
score += 40
|
||||
}
|
||||
return min(score, 100)
|
||||
},
|
||||
extendAcquisition: func(plan *AcquisitionPlan, _ MatchSignals) {
|
||||
ensureSnapshotMaxDocuments(plan, 120000)
|
||||
ensureSnapshotWorkers(plan, 4)
|
||||
ensurePrefetchWorkers(plan, 4)
|
||||
ensurePrefetchEnabled(plan, true)
|
||||
ensureETABaseline(plan, AcquisitionETABaseline{
|
||||
DiscoverySeconds: 10,
|
||||
SnapshotSeconds: 90,
|
||||
PrefetchSeconds: 20,
|
||||
CriticalPlanBSeconds: 30,
|
||||
ProfilePlanBSeconds: 20,
|
||||
})
|
||||
ensureRatePolicy(plan, AcquisitionRatePolicy{
|
||||
TargetP95LatencyMS: 800,
|
||||
ThrottleP95LatencyMS: 1800,
|
||||
MinSnapshotWorkers: 2,
|
||||
MinPrefetchWorkers: 1,
|
||||
DisablePrefetchOnErrors: true,
|
||||
})
|
||||
addPlanNote(plan, "xfusion ibmc acquisition extensions enabled")
|
||||
},
|
||||
applyAnalysisDirectives: func(d *AnalysisDirectives, _ MatchSignals) {
|
||||
d.EnableGenericGraphicsControllerDedup = true
|
||||
},
|
||||
refineAnalysis: func(plan *ResolvedAnalysisPlan, snapshot map[string]interface{}, discovered DiscoveredResources, _ MatchSignals) {
|
||||
if snapshotHasGPUProcessor(snapshot, discovered.SystemPaths) {
|
||||
plan.Directives.EnableProcessorGPUFallback = true
|
||||
addAnalysisNote(plan, "xfusion analysis enables processor-gpu fallback from snapshot topology")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
229
internal/collector/redfishprofile/profiles_common.go
Normal file
229
internal/collector/redfishprofile/profiles_common.go
Normal file
@@ -0,0 +1,229 @@
|
||||
package redfishprofile
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
type staticProfile struct {
|
||||
name string
|
||||
priority int
|
||||
safeForFallback bool
|
||||
matchFn func(MatchSignals) int
|
||||
extendAcquisition func(*AcquisitionPlan, MatchSignals)
|
||||
refineAcquisition func(*ResolvedAcquisitionPlan, DiscoveredResources, MatchSignals)
|
||||
applyAnalysisDirectives func(*AnalysisDirectives, MatchSignals)
|
||||
refineAnalysis func(*ResolvedAnalysisPlan, map[string]interface{}, DiscoveredResources, MatchSignals)
|
||||
postAnalyze func(*models.AnalysisResult, map[string]interface{}, MatchSignals)
|
||||
}
|
||||
|
||||
func (p staticProfile) Name() string { return p.name }
|
||||
func (p staticProfile) Priority() int { return p.priority }
|
||||
func (p staticProfile) Match(signals MatchSignals) int { return p.matchFn(normalizeSignals(signals)) }
|
||||
func (p staticProfile) SafeForFallback() bool { return p.safeForFallback }
|
||||
func (p staticProfile) ExtendAcquisitionPlan(plan *AcquisitionPlan, signals MatchSignals) {
|
||||
if p.extendAcquisition != nil {
|
||||
p.extendAcquisition(plan, normalizeSignals(signals))
|
||||
}
|
||||
}
|
||||
func (p staticProfile) RefineAcquisitionPlan(resolved *ResolvedAcquisitionPlan, discovered DiscoveredResources, signals MatchSignals) {
|
||||
if p.refineAcquisition != nil {
|
||||
p.refineAcquisition(resolved, discovered, normalizeSignals(signals))
|
||||
}
|
||||
}
|
||||
func (p staticProfile) ApplyAnalysisDirectives(directives *AnalysisDirectives, signals MatchSignals) {
|
||||
if p.applyAnalysisDirectives != nil {
|
||||
p.applyAnalysisDirectives(directives, normalizeSignals(signals))
|
||||
}
|
||||
}
|
||||
func (p staticProfile) RefineAnalysisPlan(plan *ResolvedAnalysisPlan, snapshot map[string]interface{}, discovered DiscoveredResources, signals MatchSignals) {
|
||||
if p.refineAnalysis != nil {
|
||||
p.refineAnalysis(plan, snapshot, discovered, normalizeSignals(signals))
|
||||
}
|
||||
}
|
||||
func (p staticProfile) PostAnalyze(result *models.AnalysisResult, snapshot map[string]interface{}, signals MatchSignals) {
|
||||
if p.postAnalyze != nil {
|
||||
p.postAnalyze(result, snapshot, normalizeSignals(signals))
|
||||
}
|
||||
}
|
||||
|
||||
func BuiltinProfiles() []Profile {
|
||||
return []Profile{
|
||||
genericProfile(),
|
||||
amiProfile(),
|
||||
msiProfile(),
|
||||
supermicroProfile(),
|
||||
dellProfile(),
|
||||
hgxProfile(),
|
||||
xfusionProfile(),
|
||||
}
|
||||
}
|
||||
|
||||
func containsFold(v, sub string) bool {
|
||||
return strings.Contains(strings.ToLower(strings.TrimSpace(v)), strings.ToLower(strings.TrimSpace(sub)))
|
||||
}
|
||||
|
||||
func addPlanPaths(dst *[]string, paths ...string) {
|
||||
*dst = append(*dst, paths...)
|
||||
*dst = dedupeSorted(*dst)
|
||||
}
|
||||
|
||||
func addPlanNote(plan *AcquisitionPlan, note string) {
|
||||
if strings.TrimSpace(note) == "" {
|
||||
return
|
||||
}
|
||||
plan.Notes = append(plan.Notes, note)
|
||||
plan.Notes = dedupeSorted(plan.Notes)
|
||||
}
|
||||
|
||||
func addAnalysisNote(plan *ResolvedAnalysisPlan, note string) {
|
||||
if plan == nil || strings.TrimSpace(note) == "" {
|
||||
return
|
||||
}
|
||||
plan.Notes = append(plan.Notes, note)
|
||||
plan.Notes = dedupeSorted(plan.Notes)
|
||||
}
|
||||
|
||||
func addAnalysisLookupMode(plan *ResolvedAnalysisPlan, mode string) {
|
||||
if plan == nil || strings.TrimSpace(mode) == "" {
|
||||
return
|
||||
}
|
||||
plan.ProcessorGPUChassisLookupModes = dedupeSorted(append(plan.ProcessorGPUChassisLookupModes, mode))
|
||||
}
|
||||
|
||||
func addAnalysisStorageDriveCollections(plan *ResolvedAnalysisPlan, rels ...string) {
|
||||
if plan == nil {
|
||||
return
|
||||
}
|
||||
plan.KnownStorageDriveCollections = dedupeSorted(append(plan.KnownStorageDriveCollections, rels...))
|
||||
}
|
||||
|
||||
func addAnalysisStorageVolumeCollections(plan *ResolvedAnalysisPlan, rels ...string) {
|
||||
if plan == nil {
|
||||
return
|
||||
}
|
||||
plan.KnownStorageVolumeCollections = dedupeSorted(append(plan.KnownStorageVolumeCollections, rels...))
|
||||
}
|
||||
|
||||
func ensureSnapshotMaxDocuments(plan *AcquisitionPlan, n int) {
|
||||
if n <= 0 {
|
||||
return
|
||||
}
|
||||
if plan.Tuning.SnapshotMaxDocuments < n {
|
||||
plan.Tuning.SnapshotMaxDocuments = n
|
||||
}
|
||||
}
|
||||
|
||||
func ensureSnapshotWorkers(plan *AcquisitionPlan, n int) {
|
||||
if n <= 0 {
|
||||
return
|
||||
}
|
||||
if plan.Tuning.SnapshotWorkers < n {
|
||||
plan.Tuning.SnapshotWorkers = n
|
||||
}
|
||||
}
|
||||
|
||||
func ensurePrefetchEnabled(plan *AcquisitionPlan, enabled bool) {
|
||||
if plan.Tuning.PrefetchEnabled == nil {
|
||||
plan.Tuning.PrefetchEnabled = new(bool)
|
||||
}
|
||||
*plan.Tuning.PrefetchEnabled = enabled
|
||||
}
|
||||
|
||||
func ensurePrefetchWorkers(plan *AcquisitionPlan, n int) {
|
||||
if n <= 0 {
|
||||
return
|
||||
}
|
||||
if plan.Tuning.PrefetchWorkers < n {
|
||||
plan.Tuning.PrefetchWorkers = n
|
||||
}
|
||||
}
|
||||
|
||||
func ensureNVMePostProbeEnabled(plan *AcquisitionPlan, enabled bool) {
|
||||
if plan.Tuning.NVMePostProbeEnabled == nil {
|
||||
plan.Tuning.NVMePostProbeEnabled = new(bool)
|
||||
}
|
||||
*plan.Tuning.NVMePostProbeEnabled = enabled
|
||||
}
|
||||
|
||||
func ensureRatePolicy(plan *AcquisitionPlan, policy AcquisitionRatePolicy) {
|
||||
if policy.TargetP95LatencyMS > plan.Tuning.RatePolicy.TargetP95LatencyMS {
|
||||
plan.Tuning.RatePolicy.TargetP95LatencyMS = policy.TargetP95LatencyMS
|
||||
}
|
||||
if policy.ThrottleP95LatencyMS > plan.Tuning.RatePolicy.ThrottleP95LatencyMS {
|
||||
plan.Tuning.RatePolicy.ThrottleP95LatencyMS = policy.ThrottleP95LatencyMS
|
||||
}
|
||||
if policy.MinSnapshotWorkers > plan.Tuning.RatePolicy.MinSnapshotWorkers {
|
||||
plan.Tuning.RatePolicy.MinSnapshotWorkers = policy.MinSnapshotWorkers
|
||||
}
|
||||
if policy.MinPrefetchWorkers > plan.Tuning.RatePolicy.MinPrefetchWorkers {
|
||||
plan.Tuning.RatePolicy.MinPrefetchWorkers = policy.MinPrefetchWorkers
|
||||
}
|
||||
if policy.DisablePrefetchOnErrors {
|
||||
plan.Tuning.RatePolicy.DisablePrefetchOnErrors = true
|
||||
}
|
||||
}
|
||||
|
||||
func ensureETABaseline(plan *AcquisitionPlan, baseline AcquisitionETABaseline) {
|
||||
if baseline.DiscoverySeconds > plan.Tuning.ETABaseline.DiscoverySeconds {
|
||||
plan.Tuning.ETABaseline.DiscoverySeconds = baseline.DiscoverySeconds
|
||||
}
|
||||
if baseline.SnapshotSeconds > plan.Tuning.ETABaseline.SnapshotSeconds {
|
||||
plan.Tuning.ETABaseline.SnapshotSeconds = baseline.SnapshotSeconds
|
||||
}
|
||||
if baseline.PrefetchSeconds > plan.Tuning.ETABaseline.PrefetchSeconds {
|
||||
plan.Tuning.ETABaseline.PrefetchSeconds = baseline.PrefetchSeconds
|
||||
}
|
||||
if baseline.CriticalPlanBSeconds > plan.Tuning.ETABaseline.CriticalPlanBSeconds {
|
||||
plan.Tuning.ETABaseline.CriticalPlanBSeconds = baseline.CriticalPlanBSeconds
|
||||
}
|
||||
if baseline.ProfilePlanBSeconds > plan.Tuning.ETABaseline.ProfilePlanBSeconds {
|
||||
plan.Tuning.ETABaseline.ProfilePlanBSeconds = baseline.ProfilePlanBSeconds
|
||||
}
|
||||
}
|
||||
|
||||
func ensurePostProbePolicy(plan *AcquisitionPlan, policy AcquisitionPostProbePolicy) {
|
||||
if policy.EnableDirectNVMEDiskBayProbe {
|
||||
plan.Tuning.PostProbePolicy.EnableDirectNVMEDiskBayProbe = true
|
||||
}
|
||||
if policy.EnableNumericCollectionProbe {
|
||||
plan.Tuning.PostProbePolicy.EnableNumericCollectionProbe = true
|
||||
}
|
||||
if policy.EnableSensorCollectionProbe {
|
||||
plan.Tuning.PostProbePolicy.EnableSensorCollectionProbe = true
|
||||
}
|
||||
}
|
||||
|
||||
func ensureRecoveryPolicy(plan *AcquisitionPlan, policy AcquisitionRecoveryPolicy) {
|
||||
if policy.EnableCriticalCollectionMemberRetry {
|
||||
plan.Tuning.RecoveryPolicy.EnableCriticalCollectionMemberRetry = true
|
||||
}
|
||||
if policy.EnableCriticalSlowProbe {
|
||||
plan.Tuning.RecoveryPolicy.EnableCriticalSlowProbe = true
|
||||
}
|
||||
if policy.EnableProfilePlanB {
|
||||
plan.Tuning.RecoveryPolicy.EnableProfilePlanB = true
|
||||
}
|
||||
}
|
||||
|
||||
func ensureScopedPathPolicy(plan *AcquisitionPlan, policy AcquisitionScopedPathPolicy) {
|
||||
addPlanPaths(&plan.ScopedPaths.SystemSeedSuffixes, policy.SystemSeedSuffixes...)
|
||||
addPlanPaths(&plan.ScopedPaths.SystemCriticalSuffixes, policy.SystemCriticalSuffixes...)
|
||||
addPlanPaths(&plan.ScopedPaths.ChassisSeedSuffixes, policy.ChassisSeedSuffixes...)
|
||||
addPlanPaths(&plan.ScopedPaths.ChassisCriticalSuffixes, policy.ChassisCriticalSuffixes...)
|
||||
addPlanPaths(&plan.ScopedPaths.ManagerSeedSuffixes, policy.ManagerSeedSuffixes...)
|
||||
addPlanPaths(&plan.ScopedPaths.ManagerCriticalSuffixes, policy.ManagerCriticalSuffixes...)
|
||||
}
|
||||
|
||||
func ensurePrefetchPolicy(plan *AcquisitionPlan, policy AcquisitionPrefetchPolicy) {
|
||||
addPlanPaths(&plan.Tuning.PrefetchPolicy.IncludeSuffixes, policy.IncludeSuffixes...)
|
||||
addPlanPaths(&plan.Tuning.PrefetchPolicy.ExcludeContains, policy.ExcludeContains...)
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
98
internal/collector/redfishprofile/signals.go
Normal file
98
internal/collector/redfishprofile/signals.go
Normal file
@@ -0,0 +1,98 @@
|
||||
package redfishprofile
|
||||
|
||||
import "strings"
|
||||
|
||||
func CollectSignals(serviceRootDoc, systemDoc, chassisDoc, managerDoc map[string]interface{}, resourceHints []string) MatchSignals {
|
||||
signals := MatchSignals{
|
||||
ServiceRootVendor: lookupString(serviceRootDoc, "Vendor"),
|
||||
ServiceRootProduct: lookupString(serviceRootDoc, "Product"),
|
||||
SystemManufacturer: lookupString(systemDoc, "Manufacturer"),
|
||||
SystemModel: lookupString(systemDoc, "Model"),
|
||||
SystemSKU: lookupString(systemDoc, "SKU"),
|
||||
ChassisManufacturer: lookupString(chassisDoc, "Manufacturer"),
|
||||
ChassisModel: lookupString(chassisDoc, "Model"),
|
||||
ManagerManufacturer: lookupString(managerDoc, "Manufacturer"),
|
||||
ResourceHints: resourceHints,
|
||||
}
|
||||
signals.OEMNamespaces = dedupeSorted(append(
|
||||
oemNamespaces(serviceRootDoc),
|
||||
append(oemNamespaces(systemDoc), append(oemNamespaces(chassisDoc), oemNamespaces(managerDoc)...)...)...,
|
||||
))
|
||||
return normalizeSignals(signals)
|
||||
}
|
||||
|
||||
func CollectSignalsFromTree(tree map[string]interface{}) MatchSignals {
|
||||
getDoc := func(path string) map[string]interface{} {
|
||||
if v, ok := tree[path]; ok {
|
||||
if doc, ok := v.(map[string]interface{}); ok {
|
||||
return doc
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
memberPath := func(collectionPath, fallbackPath string) string {
|
||||
collection := getDoc(collectionPath)
|
||||
if len(collection) != 0 {
|
||||
if members, ok := collection["Members"].([]interface{}); ok && len(members) > 0 {
|
||||
if ref, ok := members[0].(map[string]interface{}); ok {
|
||||
if path := lookupString(ref, "@odata.id"); path != "" {
|
||||
return path
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return fallbackPath
|
||||
}
|
||||
|
||||
systemPath := memberPath("/redfish/v1/Systems", "/redfish/v1/Systems/1")
|
||||
chassisPath := memberPath("/redfish/v1/Chassis", "/redfish/v1/Chassis/1")
|
||||
managerPath := memberPath("/redfish/v1/Managers", "/redfish/v1/Managers/1")
|
||||
|
||||
resourceHints := make([]string, 0, len(tree))
|
||||
for path := range tree {
|
||||
path = strings.TrimSpace(path)
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
resourceHints = append(resourceHints, path)
|
||||
}
|
||||
|
||||
return CollectSignals(
|
||||
getDoc("/redfish/v1"),
|
||||
getDoc(systemPath),
|
||||
getDoc(chassisPath),
|
||||
getDoc(managerPath),
|
||||
resourceHints,
|
||||
)
|
||||
}
|
||||
|
||||
func lookupString(doc map[string]interface{}, key string) string {
|
||||
if len(doc) == 0 {
|
||||
return ""
|
||||
}
|
||||
value, _ := doc[key]
|
||||
if s, ok := value.(string); ok {
|
||||
return strings.TrimSpace(s)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func oemNamespaces(doc map[string]interface{}) []string {
|
||||
if len(doc) == 0 {
|
||||
return nil
|
||||
}
|
||||
oem, ok := doc["Oem"].(map[string]interface{})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(oem))
|
||||
for key := range oem {
|
||||
key = strings.TrimSpace(key)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
out = append(out, key)
|
||||
}
|
||||
return out
|
||||
}
|
||||
17
internal/collector/redfishprofile/testdata/ami-generic.json
vendored
Normal file
17
internal/collector/redfishprofile/testdata/ami-generic.json
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"ServiceRootVendor": "AMI",
|
||||
"ServiceRootProduct": "AMI Redfish Server",
|
||||
"SystemManufacturer": "Gigabyte",
|
||||
"SystemModel": "G292-Z42",
|
||||
"SystemSKU": "",
|
||||
"ChassisManufacturer": "",
|
||||
"ChassisModel": "",
|
||||
"ManagerManufacturer": "",
|
||||
"OEMNamespaces": ["Ami"],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/Self",
|
||||
"/redfish/v1/Managers/Self",
|
||||
"/redfish/v1/Oem/Ami",
|
||||
"/redfish/v1/Systems/Self"
|
||||
]
|
||||
}
|
||||
18
internal/collector/redfishprofile/testdata/dell-r750.json
vendored
Normal file
18
internal/collector/redfishprofile/testdata/dell-r750.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"ServiceRootVendor": "",
|
||||
"ServiceRootProduct": "iDRAC Redfish Service",
|
||||
"SystemManufacturer": "Dell Inc.",
|
||||
"SystemModel": "PowerEdge R750",
|
||||
"SystemSKU": "0A42H9",
|
||||
"ChassisManufacturer": "Dell Inc.",
|
||||
"ChassisModel": "PowerEdge R750",
|
||||
"ManagerManufacturer": "Dell Inc.",
|
||||
"OEMNamespaces": ["Dell"],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/System.Embedded.1",
|
||||
"/redfish/v1/Managers/iDRAC.Embedded.1",
|
||||
"/redfish/v1/Managers/iDRAC.Embedded.1/Oem/Dell",
|
||||
"/redfish/v1/Systems/System.Embedded.1",
|
||||
"/redfish/v1/Systems/System.Embedded.1/Storage"
|
||||
]
|
||||
}
|
||||
33
internal/collector/redfishprofile/testdata/msi-cg290.json
vendored
Normal file
33
internal/collector/redfishprofile/testdata/msi-cg290.json
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"ServiceRootVendor": "AMI",
|
||||
"ServiceRootProduct": "AMI Redfish Server",
|
||||
"SystemManufacturer": "Micro-Star International Co., Ltd.",
|
||||
"SystemModel": "CG290-S3063",
|
||||
"SystemSKU": "S3063G290RAU4",
|
||||
"ChassisManufacturer": "NVIDIA",
|
||||
"ChassisModel": "",
|
||||
"ManagerManufacturer": "",
|
||||
"OEMNamespaces": ["Ami"],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/GPU1",
|
||||
"/redfish/v1/Chassis/GPU1/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_Power",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_TLimit",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_Temperature",
|
||||
"/redfish/v1/Chassis/GPU2",
|
||||
"/redfish/v1/Chassis/GPU2/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_Power",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_TLimit",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_Temperature",
|
||||
"/redfish/v1/Chassis/GPU3",
|
||||
"/redfish/v1/Chassis/GPU3/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_Power",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_TLimit",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_Temperature",
|
||||
"/redfish/v1/Chassis/GPU4",
|
||||
"/redfish/v1/Chassis/GPU4/NetworkAdapters"
|
||||
]
|
||||
}
|
||||
33
internal/collector/redfishprofile/testdata/msi-cg480-copy.json
vendored
Normal file
33
internal/collector/redfishprofile/testdata/msi-cg480-copy.json
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"ServiceRootVendor": "AMI",
|
||||
"ServiceRootProduct": "AMI Redfish Server",
|
||||
"SystemManufacturer": "Micro-Star International Co., Ltd.",
|
||||
"SystemModel": "CG480-S5063",
|
||||
"SystemSKU": "5063G480RAE20",
|
||||
"ChassisManufacturer": "NVIDIA",
|
||||
"ChassisModel": "",
|
||||
"ManagerManufacturer": "",
|
||||
"OEMNamespaces": ["Ami"],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/GPU1",
|
||||
"/redfish/v1/Chassis/GPU1/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_Power",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_TLimit",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_Temperature",
|
||||
"/redfish/v1/Chassis/GPU2",
|
||||
"/redfish/v1/Chassis/GPU2/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_Power",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_TLimit",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_Temperature",
|
||||
"/redfish/v1/Chassis/GPU3",
|
||||
"/redfish/v1/Chassis/GPU3/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_Power",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_TLimit",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_Temperature",
|
||||
"/redfish/v1/Chassis/GPU4",
|
||||
"/redfish/v1/Chassis/GPU4/NetworkAdapters"
|
||||
]
|
||||
}
|
||||
33
internal/collector/redfishprofile/testdata/msi-cg480.json
vendored
Normal file
33
internal/collector/redfishprofile/testdata/msi-cg480.json
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"ServiceRootVendor": "AMI",
|
||||
"ServiceRootProduct": "AMI Redfish Server",
|
||||
"SystemManufacturer": "Micro-Star International Co., Ltd.",
|
||||
"SystemModel": "CG480-S5063",
|
||||
"SystemSKU": "5063G480RAE20",
|
||||
"ChassisManufacturer": "NVIDIA",
|
||||
"ChassisModel": "",
|
||||
"ManagerManufacturer": "",
|
||||
"OEMNamespaces": ["Ami"],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/GPU1",
|
||||
"/redfish/v1/Chassis/GPU1/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_Power",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_TLimit",
|
||||
"/redfish/v1/Chassis/GPU1/Sensors/GPU1_Temperature",
|
||||
"/redfish/v1/Chassis/GPU2",
|
||||
"/redfish/v1/Chassis/GPU2/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_Power",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_TLimit",
|
||||
"/redfish/v1/Chassis/GPU2/Sensors/GPU2_Temperature",
|
||||
"/redfish/v1/Chassis/GPU3",
|
||||
"/redfish/v1/Chassis/GPU3/NetworkAdapters",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_Power",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_TLimit",
|
||||
"/redfish/v1/Chassis/GPU3/Sensors/GPU3_Temperature",
|
||||
"/redfish/v1/Chassis/GPU4",
|
||||
"/redfish/v1/Chassis/GPU4/NetworkAdapters"
|
||||
]
|
||||
}
|
||||
33
internal/collector/redfishprofile/testdata/supermicro-hgx.json
vendored
Normal file
33
internal/collector/redfishprofile/testdata/supermicro-hgx.json
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"ServiceRootVendor": "Supermicro",
|
||||
"ServiceRootProduct": "",
|
||||
"SystemManufacturer": "Supermicro",
|
||||
"SystemModel": "SYS-821GE-TNHR",
|
||||
"SystemSKU": "0x1D1415D9",
|
||||
"ChassisManufacturer": "Supermicro",
|
||||
"ChassisModel": "X13DEG-OAD",
|
||||
"ManagerManufacturer": "",
|
||||
"OEMNamespaces": ["Supermicro"],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/HGX_BMC_0",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/Assembly",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/Controls",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/Drives",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/EnvironmentMetrics",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/LogServices",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/PCIeDevices",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/PCIeSlots",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/PowerSubsystem",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/PowerSubsystem/PowerSupplies",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/Sensors",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/Sensors/HGX_BMC_0_Temp_0",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/ThermalSubsystem",
|
||||
"/redfish/v1/Chassis/HGX_BMC_0/ThermalSubsystem/ThermalMetrics",
|
||||
"/redfish/v1/Chassis/HGX_Chassis_0",
|
||||
"/redfish/v1/Chassis/HGX_Chassis_0/Assembly",
|
||||
"/redfish/v1/Chassis/HGX_Chassis_0/Controls",
|
||||
"/redfish/v1/Chassis/HGX_Chassis_0/Controls/TotalGPU_Power_0",
|
||||
"/redfish/v1/Chassis/HGX_Chassis_0/Drives",
|
||||
"/redfish/v1/Chassis/HGX_Chassis_0/EnvironmentMetrics"
|
||||
]
|
||||
}
|
||||
51
internal/collector/redfishprofile/testdata/supermicro-oam-amd.json
vendored
Normal file
51
internal/collector/redfishprofile/testdata/supermicro-oam-amd.json
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
{
|
||||
"ServiceRootVendor": "",
|
||||
"ServiceRootProduct": "H12DGQ-NT6",
|
||||
"SystemManufacturer": "Supermicro",
|
||||
"SystemModel": "AS -4124GQ-TNMI",
|
||||
"SystemSKU": "091715D9",
|
||||
"ChassisManufacturer": "Supermicro",
|
||||
"ChassisModel": "H12DGQ-NT6",
|
||||
"ManagerManufacturer": "",
|
||||
"OEMNamespaces": [
|
||||
"Supermicro"
|
||||
],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/1/PCIeDevices",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU1",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU1/PCIeFunctions",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU1/PCIeFunctions/1",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU2",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU2/PCIeFunctions",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU2/PCIeFunctions/1",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU3",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU3/PCIeFunctions",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU3/PCIeFunctions/1",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU4",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU4/PCIeFunctions",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU4/PCIeFunctions/1",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU5",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU5/PCIeFunctions",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU5/PCIeFunctions/1",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU6",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU6/PCIeFunctions",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU6/PCIeFunctions/1",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU7",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU7/PCIeFunctions",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU7/PCIeFunctions/1",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU8",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU8/PCIeFunctions",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices/GPU8/PCIeFunctions/1",
|
||||
"/redfish/v1/Managers/1/Oem/Supermicro/FanMode",
|
||||
"/redfish/v1/Oem/Supermicro/DumpService",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory/GPU1",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory/GPU2",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory/GPU3",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory/GPU4",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory/GPU5",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory/GPU6",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory/GPU7",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory/GPU8",
|
||||
"/redfish/v1/UpdateService/Oem/Supermicro/FirmwareInventory"
|
||||
]
|
||||
}
|
||||
16
internal/collector/redfishprofile/testdata/unknown-vendor.json
vendored
Normal file
16
internal/collector/redfishprofile/testdata/unknown-vendor.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ServiceRootVendor": "",
|
||||
"ServiceRootProduct": "Redfish Service",
|
||||
"SystemManufacturer": "",
|
||||
"SystemModel": "",
|
||||
"SystemSKU": "",
|
||||
"ChassisManufacturer": "",
|
||||
"ChassisModel": "",
|
||||
"ManagerManufacturer": "",
|
||||
"OEMNamespaces": [],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/1",
|
||||
"/redfish/v1/Managers/1",
|
||||
"/redfish/v1/Systems/1"
|
||||
]
|
||||
}
|
||||
24
internal/collector/redfishprofile/testdata/xfusion-g5500v7.json
vendored
Normal file
24
internal/collector/redfishprofile/testdata/xfusion-g5500v7.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"ServiceRootVendor": "xFusion",
|
||||
"ServiceRootProduct": "G5500 V7",
|
||||
"SystemManufacturer": "OEM",
|
||||
"SystemModel": "G5500 V7",
|
||||
"SystemSKU": "",
|
||||
"ChassisManufacturer": "OEM",
|
||||
"ChassisModel": "G5500 V7",
|
||||
"ManagerManufacturer": "XFUSION",
|
||||
"OEMNamespaces": ["xFusion"],
|
||||
"ResourceHints": [
|
||||
"/redfish/v1/Chassis/1",
|
||||
"/redfish/v1/Chassis/1/Drives",
|
||||
"/redfish/v1/Chassis/1/PCIeDevices",
|
||||
"/redfish/v1/Chassis/1/Sensors",
|
||||
"/redfish/v1/Managers/1",
|
||||
"/redfish/v1/Systems/1",
|
||||
"/redfish/v1/Systems/1/GraphicsControllers",
|
||||
"/redfish/v1/Systems/1/Processors",
|
||||
"/redfish/v1/Systems/1/Processors/Gpu1",
|
||||
"/redfish/v1/Systems/1/Storages",
|
||||
"/redfish/v1/UpdateService/FirmwareInventory"
|
||||
]
|
||||
}
|
||||
168
internal/collector/redfishprofile/types.go
Normal file
168
internal/collector/redfishprofile/types.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package redfishprofile
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
type MatchSignals struct {
|
||||
ServiceRootVendor string
|
||||
ServiceRootProduct string
|
||||
SystemManufacturer string
|
||||
SystemModel string
|
||||
SystemSKU string
|
||||
ChassisManufacturer string
|
||||
ChassisModel string
|
||||
ManagerManufacturer string
|
||||
OEMNamespaces []string
|
||||
ResourceHints []string
|
||||
}
|
||||
|
||||
type AcquisitionPlan struct {
|
||||
Mode string
|
||||
Profiles []string
|
||||
SeedPaths []string
|
||||
CriticalPaths []string
|
||||
PlanBPaths []string
|
||||
Notes []string
|
||||
ScopedPaths AcquisitionScopedPathPolicy
|
||||
Tuning AcquisitionTuning
|
||||
}
|
||||
|
||||
type DiscoveredResources struct {
|
||||
SystemPaths []string
|
||||
ChassisPaths []string
|
||||
ManagerPaths []string
|
||||
}
|
||||
|
||||
type ResolvedAcquisitionPlan struct {
|
||||
Plan AcquisitionPlan
|
||||
SeedPaths []string
|
||||
CriticalPaths []string
|
||||
}
|
||||
|
||||
type AcquisitionScopedPathPolicy struct {
|
||||
SystemSeedSuffixes []string
|
||||
SystemCriticalSuffixes []string
|
||||
ChassisSeedSuffixes []string
|
||||
ChassisCriticalSuffixes []string
|
||||
ManagerSeedSuffixes []string
|
||||
ManagerCriticalSuffixes []string
|
||||
}
|
||||
|
||||
type AcquisitionTuning struct {
|
||||
SnapshotMaxDocuments int
|
||||
SnapshotWorkers int
|
||||
PrefetchEnabled *bool
|
||||
PrefetchWorkers int
|
||||
NVMePostProbeEnabled *bool
|
||||
RatePolicy AcquisitionRatePolicy
|
||||
ETABaseline AcquisitionETABaseline
|
||||
PostProbePolicy AcquisitionPostProbePolicy
|
||||
RecoveryPolicy AcquisitionRecoveryPolicy
|
||||
PrefetchPolicy AcquisitionPrefetchPolicy
|
||||
}
|
||||
|
||||
type AcquisitionRatePolicy struct {
|
||||
TargetP95LatencyMS int
|
||||
ThrottleP95LatencyMS int
|
||||
MinSnapshotWorkers int
|
||||
MinPrefetchWorkers int
|
||||
DisablePrefetchOnErrors bool
|
||||
}
|
||||
|
||||
type AcquisitionETABaseline struct {
|
||||
DiscoverySeconds int
|
||||
SnapshotSeconds int
|
||||
PrefetchSeconds int
|
||||
CriticalPlanBSeconds int
|
||||
ProfilePlanBSeconds int
|
||||
}
|
||||
|
||||
type AcquisitionPostProbePolicy struct {
|
||||
EnableDirectNVMEDiskBayProbe bool
|
||||
EnableNumericCollectionProbe bool
|
||||
EnableSensorCollectionProbe bool
|
||||
}
|
||||
|
||||
type AcquisitionRecoveryPolicy struct {
|
||||
EnableCriticalCollectionMemberRetry bool
|
||||
EnableCriticalSlowProbe bool
|
||||
EnableProfilePlanB bool
|
||||
}
|
||||
|
||||
type AcquisitionPrefetchPolicy struct {
|
||||
IncludeSuffixes []string
|
||||
ExcludeContains []string
|
||||
}
|
||||
|
||||
type AnalysisDirectives struct {
|
||||
EnableProcessorGPUFallback bool
|
||||
EnableSupermicroNVMeBackplane bool
|
||||
EnableProcessorGPUChassisAlias bool
|
||||
EnableGenericGraphicsControllerDedup bool
|
||||
EnableMSIProcessorGPUChassisLookup bool
|
||||
EnableMSIGhostGPUFilter bool
|
||||
EnableStorageEnclosureRecovery bool
|
||||
EnableKnownStorageControllerRecovery bool
|
||||
}
|
||||
|
||||
type ResolvedAnalysisPlan struct {
|
||||
Match MatchResult
|
||||
Directives AnalysisDirectives
|
||||
Notes []string
|
||||
ProcessorGPUChassisLookupModes []string
|
||||
KnownStorageDriveCollections []string
|
||||
KnownStorageVolumeCollections []string
|
||||
}
|
||||
|
||||
type Profile interface {
|
||||
Name() string
|
||||
Priority() int
|
||||
Match(signals MatchSignals) int
|
||||
SafeForFallback() bool
|
||||
ExtendAcquisitionPlan(plan *AcquisitionPlan, signals MatchSignals)
|
||||
RefineAcquisitionPlan(resolved *ResolvedAcquisitionPlan, discovered DiscoveredResources, signals MatchSignals)
|
||||
ApplyAnalysisDirectives(directives *AnalysisDirectives, signals MatchSignals)
|
||||
RefineAnalysisPlan(plan *ResolvedAnalysisPlan, snapshot map[string]interface{}, discovered DiscoveredResources, signals MatchSignals)
|
||||
PostAnalyze(result *models.AnalysisResult, snapshot map[string]interface{}, signals MatchSignals)
|
||||
}
|
||||
|
||||
type MatchResult struct {
|
||||
Mode string
|
||||
Profiles []Profile
|
||||
Scores []ProfileScore
|
||||
}
|
||||
|
||||
type ProfileScore struct {
|
||||
Name string
|
||||
Score int
|
||||
Active bool
|
||||
Priority int
|
||||
}
|
||||
|
||||
func normalizeSignals(signals MatchSignals) MatchSignals {
|
||||
signals.OEMNamespaces = dedupeSorted(signals.OEMNamespaces)
|
||||
signals.ResourceHints = dedupeSorted(signals.ResourceHints)
|
||||
return signals
|
||||
}
|
||||
|
||||
func dedupeSorted(items []string) []string {
|
||||
if len(items) == 0 {
|
||||
return nil
|
||||
}
|
||||
set := make(map[string]struct{}, len(items))
|
||||
for _, item := range items {
|
||||
if item == "" {
|
||||
continue
|
||||
}
|
||||
set[item] = struct{}{}
|
||||
}
|
||||
out := make([]string, 0, len(set))
|
||||
for item := range set {
|
||||
out = append(out, item)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
37
internal/collector/registry.go
Normal file
37
internal/collector/registry.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package collector
|
||||
|
||||
import "sync"
|
||||
|
||||
type Registry struct {
|
||||
mu sync.RWMutex
|
||||
connectors map[string]Connector
|
||||
}
|
||||
|
||||
func NewRegistry() *Registry {
|
||||
return &Registry{
|
||||
connectors: make(map[string]Connector),
|
||||
}
|
||||
}
|
||||
|
||||
func NewDefaultRegistry() *Registry {
|
||||
r := NewRegistry()
|
||||
r.Register(NewRedfishConnector())
|
||||
r.Register(NewIPMIMockConnector())
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *Registry) Register(connector Connector) {
|
||||
if connector == nil {
|
||||
return
|
||||
}
|
||||
r.mu.Lock()
|
||||
r.connectors[connector.Protocol()] = connector
|
||||
r.mu.Unlock()
|
||||
}
|
||||
|
||||
func (r *Registry) Get(protocol string) (Connector, bool) {
|
||||
r.mu.RLock()
|
||||
connector, ok := r.connectors[protocol]
|
||||
r.mu.RUnlock()
|
||||
return connector, ok
|
||||
}
|
||||
79
internal/collector/types.go
Normal file
79
internal/collector/types.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
type Request struct {
|
||||
Host string
|
||||
Protocol string
|
||||
Port int
|
||||
Username string
|
||||
AuthType string
|
||||
Password string
|
||||
Token string
|
||||
TLSMode string
|
||||
PowerOnIfHostOff bool
|
||||
}
|
||||
|
||||
type Progress struct {
|
||||
Status string
|
||||
Progress int
|
||||
Message string
|
||||
CurrentPhase string
|
||||
ETASeconds int
|
||||
ActiveModules []ModuleActivation
|
||||
ModuleScores []ModuleScore
|
||||
DebugInfo *CollectDebugInfo
|
||||
}
|
||||
|
||||
type ProgressFn func(Progress)
|
||||
|
||||
type ModuleActivation struct {
|
||||
Name string
|
||||
Score int
|
||||
}
|
||||
|
||||
type ModuleScore struct {
|
||||
Name string
|
||||
Score int
|
||||
Active bool
|
||||
Priority int
|
||||
}
|
||||
|
||||
type CollectDebugInfo struct {
|
||||
AdaptiveThrottled bool
|
||||
SnapshotWorkers int
|
||||
PrefetchWorkers int
|
||||
PrefetchEnabled *bool
|
||||
PhaseTelemetry []PhaseTelemetry
|
||||
}
|
||||
|
||||
type PhaseTelemetry struct {
|
||||
Phase string
|
||||
Requests int
|
||||
Errors int
|
||||
ErrorRate float64
|
||||
AvgMS int64
|
||||
P95MS int64
|
||||
}
|
||||
|
||||
type ProbeResult struct {
|
||||
Reachable bool
|
||||
Protocol string
|
||||
HostPowerState string
|
||||
HostPoweredOn bool
|
||||
PowerControlAvailable bool
|
||||
SystemPath string
|
||||
}
|
||||
|
||||
type Connector interface {
|
||||
Protocol() string
|
||||
Collect(ctx context.Context, req Request, emit ProgressFn) (*models.AnalysisResult, error)
|
||||
}
|
||||
|
||||
type Prober interface {
|
||||
Probe(ctx context.Context, req Request) (*ProbeResult, error)
|
||||
}
|
||||
@@ -3,8 +3,8 @@ package exporter
|
||||
import (
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
@@ -25,7 +25,7 @@ func (e *Exporter) ExportCSV(w io.Writer) error {
|
||||
defer writer.Flush()
|
||||
|
||||
// Header
|
||||
if err := writer.Write([]string{"Component", "Serial Number", "Manufacturer", "Part Number"}); err != nil {
|
||||
if err := writer.Write([]string{"Component", "Serial Number", "Manufacturer", "Location"}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ func (e *Exporter) ExportCSV(w io.Writer) error {
|
||||
|
||||
// FRU data
|
||||
for _, fru := range e.result.FRU {
|
||||
if fru.SerialNumber == "" {
|
||||
if !hasUsableSerial(fru.SerialNumber) {
|
||||
continue
|
||||
}
|
||||
name := fru.ProductName
|
||||
@@ -54,46 +54,45 @@ func (e *Exporter) ExportCSV(w io.Writer) error {
|
||||
|
||||
// Hardware data
|
||||
if e.result.Hardware != nil {
|
||||
// Memory
|
||||
for _, mem := range e.result.Hardware.Memory {
|
||||
if mem.SerialNumber == "" {
|
||||
continue
|
||||
}
|
||||
// Board
|
||||
if hasUsableSerial(e.result.Hardware.BoardInfo.SerialNumber) {
|
||||
if err := writer.Write([]string{
|
||||
fmt.Sprintf("DIMM Slot %d (%s)", mem.Slot, mem.PartNumber),
|
||||
mem.SerialNumber,
|
||||
mem.Manufacturer,
|
||||
mem.PartNumber,
|
||||
e.result.Hardware.BoardInfo.ProductName,
|
||||
strings.TrimSpace(e.result.Hardware.BoardInfo.SerialNumber),
|
||||
e.result.Hardware.BoardInfo.Manufacturer,
|
||||
"Board",
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Storage
|
||||
for _, stor := range e.result.Hardware.Storage {
|
||||
if stor.SerialNumber == "" {
|
||||
seenCanonical := make(map[string]struct{})
|
||||
for _, dev := range canonicalDevicesForExport(e.result.Hardware) {
|
||||
if !hasUsableSerial(dev.SerialNumber) {
|
||||
continue
|
||||
}
|
||||
if err := writer.Write([]string{
|
||||
fmt.Sprintf("%s %s", stor.Type, stor.Model),
|
||||
stor.SerialNumber,
|
||||
"",
|
||||
"",
|
||||
}); err != nil {
|
||||
serial := strings.TrimSpace(dev.SerialNumber)
|
||||
seenCanonical[serial] = struct{}{}
|
||||
component, manufacturer, location := csvFieldsFromCanonicalDevice(dev)
|
||||
if err := writer.Write([]string{component, serial, manufacturer, location}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// PCIe devices
|
||||
for _, pcie := range e.result.Hardware.PCIeDevices {
|
||||
if pcie.SerialNumber == "" {
|
||||
// Legacy network cards
|
||||
for _, nic := range e.result.Hardware.NetworkCards {
|
||||
if !hasUsableSerial(nic.SerialNumber) {
|
||||
continue
|
||||
}
|
||||
serial := strings.TrimSpace(nic.SerialNumber)
|
||||
if _, ok := seenCanonical[serial]; ok {
|
||||
continue
|
||||
}
|
||||
if err := writer.Write([]string{
|
||||
fmt.Sprintf("%s (%s)", pcie.DeviceClass, pcie.Slot),
|
||||
pcie.SerialNumber,
|
||||
nic.Model,
|
||||
serial,
|
||||
"",
|
||||
pcie.PartNumber,
|
||||
"Network",
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -110,155 +109,64 @@ func (e *Exporter) ExportJSON(w io.Writer) error {
|
||||
return encoder.Encode(e.result)
|
||||
}
|
||||
|
||||
// ExportTXT exports a human-readable text report
|
||||
func (e *Exporter) ExportTXT(w io.Writer) error {
|
||||
fmt.Fprintln(w, "LOGPile Analysis Report - mchus.pro")
|
||||
fmt.Fprintln(w, "====================================")
|
||||
fmt.Fprintln(w)
|
||||
|
||||
if e.result == nil {
|
||||
fmt.Fprintln(w, "No data loaded.")
|
||||
return nil
|
||||
func hasUsableSerial(serial string) bool {
|
||||
s := strings.TrimSpace(serial)
|
||||
if s == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "File: %s\n\n", e.result.Filename)
|
||||
|
||||
// Hardware summary
|
||||
if e.result.Hardware != nil {
|
||||
hw := e.result.Hardware
|
||||
|
||||
// Firmware
|
||||
if len(hw.Firmware) > 0 {
|
||||
fmt.Fprintln(w, "FIRMWARE VERSIONS")
|
||||
fmt.Fprintln(w, "-----------------")
|
||||
for _, fw := range hw.Firmware {
|
||||
fmt.Fprintf(w, " %s: %s\n", fw.DeviceName, fw.Version)
|
||||
}
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
|
||||
// CPUs
|
||||
if len(hw.CPUs) > 0 {
|
||||
fmt.Fprintln(w, "PROCESSORS")
|
||||
fmt.Fprintln(w, "----------")
|
||||
for _, cpu := range hw.CPUs {
|
||||
fmt.Fprintf(w, " Socket %d: %s\n", cpu.Socket, cpu.Model)
|
||||
fmt.Fprintf(w, " Cores: %d, Threads: %d, Freq: %d MHz (Turbo: %d MHz)\n",
|
||||
cpu.Cores, cpu.Threads, cpu.FrequencyMHz, cpu.MaxFreqMHz)
|
||||
fmt.Fprintf(w, " TDP: %dW, L3 Cache: %d KB\n", cpu.TDP, cpu.L3CacheKB)
|
||||
}
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
|
||||
// Memory
|
||||
if len(hw.Memory) > 0 {
|
||||
fmt.Fprintln(w, "MEMORY")
|
||||
fmt.Fprintln(w, "------")
|
||||
totalMB := 0
|
||||
for _, mem := range hw.Memory {
|
||||
totalMB += mem.SizeMB
|
||||
}
|
||||
fmt.Fprintf(w, " Total: %d GB (%d DIMMs)\n", totalMB/1024, len(hw.Memory))
|
||||
fmt.Fprintf(w, " Type: %s @ %d MHz\n", hw.Memory[0].Type, hw.Memory[0].CurrentSpeedMHz)
|
||||
fmt.Fprintf(w, " Manufacturer: %s\n", hw.Memory[0].Manufacturer)
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
|
||||
// Storage
|
||||
if len(hw.Storage) > 0 {
|
||||
fmt.Fprintln(w, "STORAGE")
|
||||
fmt.Fprintln(w, "-------")
|
||||
for _, stor := range hw.Storage {
|
||||
fmt.Fprintf(w, " %s: %s (%d GB) - S/N: %s\n",
|
||||
stor.Slot, stor.Model, stor.SizeGB, stor.SerialNumber)
|
||||
}
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
|
||||
// PCIe
|
||||
if len(hw.PCIeDevices) > 0 {
|
||||
fmt.Fprintln(w, "PCIE DEVICES")
|
||||
fmt.Fprintln(w, "------------")
|
||||
for _, pcie := range hw.PCIeDevices {
|
||||
fmt.Fprintf(w, " %s: %s (x%d %s)\n",
|
||||
pcie.Slot, pcie.DeviceClass, pcie.LinkWidth, pcie.LinkSpeed)
|
||||
if pcie.SerialNumber != "" {
|
||||
fmt.Fprintf(w, " S/N: %s\n", pcie.SerialNumber)
|
||||
}
|
||||
if len(pcie.MACAddresses) > 0 {
|
||||
fmt.Fprintf(w, " MACs: %v\n", pcie.MACAddresses)
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
switch strings.ToUpper(s) {
|
||||
case "N/A", "NA", "NONE", "NULL", "UNKNOWN", "-":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
|
||||
// Sensors summary
|
||||
if len(e.result.Sensors) > 0 {
|
||||
fmt.Fprintln(w, "SENSOR READINGS")
|
||||
fmt.Fprintln(w, "---------------")
|
||||
|
||||
// Group by type
|
||||
byType := make(map[string][]models.SensorReading)
|
||||
for _, s := range e.result.Sensors {
|
||||
byType[s.Type] = append(byType[s.Type], s)
|
||||
}
|
||||
|
||||
for stype, sensors := range byType {
|
||||
fmt.Fprintf(w, "\n %s:\n", stype)
|
||||
for _, s := range sensors {
|
||||
if s.Value != 0 {
|
||||
fmt.Fprintf(w, " %s: %.0f %s [%s]\n", s.Name, s.Value, s.Unit, s.Status)
|
||||
} else if s.RawValue != "" {
|
||||
fmt.Fprintf(w, " %s: %s [%s]\n", s.Name, s.RawValue, s.Status)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
|
||||
// FRU summary
|
||||
if len(e.result.FRU) > 0 {
|
||||
fmt.Fprintln(w, "FRU COMPONENTS")
|
||||
fmt.Fprintln(w, "--------------")
|
||||
for _, fru := range e.result.FRU {
|
||||
name := fru.ProductName
|
||||
if name == "" {
|
||||
name = fru.Description
|
||||
}
|
||||
fmt.Fprintf(w, " %s\n", name)
|
||||
if fru.SerialNumber != "" {
|
||||
fmt.Fprintf(w, " Serial: %s\n", fru.SerialNumber)
|
||||
}
|
||||
if fru.Manufacturer != "" {
|
||||
fmt.Fprintf(w, " Manufacturer: %s\n", fru.Manufacturer)
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
|
||||
// Events summary
|
||||
fmt.Fprintf(w, "EVENTS: %d total\n", len(e.result.Events))
|
||||
var critical, warning, info int
|
||||
for _, ev := range e.result.Events {
|
||||
switch ev.Severity {
|
||||
case models.SeverityCritical:
|
||||
critical++
|
||||
case models.SeverityWarning:
|
||||
warning++
|
||||
case models.SeverityInfo:
|
||||
info++
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, " Critical: %d\n", critical)
|
||||
fmt.Fprintf(w, " Warning: %d\n", warning)
|
||||
fmt.Fprintf(w, " Info: %d\n", info)
|
||||
|
||||
// Footer
|
||||
fmt.Fprintln(w)
|
||||
fmt.Fprintln(w, "------------------------------------")
|
||||
fmt.Fprintln(w, "Generated by LOGPile - mchus.pro")
|
||||
fmt.Fprintln(w, "https://git.mchus.pro/mchus/logpile")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func csvFieldsFromCanonicalDevice(dev models.HardwareDevice) (component, manufacturer, location string) {
|
||||
component = firstNonEmptyString(
|
||||
dev.Model,
|
||||
dev.PartNumber,
|
||||
dev.DeviceClass,
|
||||
dev.Kind,
|
||||
)
|
||||
manufacturer = firstNonEmptyString(dev.Manufacturer, inferCSVVendor(dev))
|
||||
location = firstNonEmptyString(dev.Location, dev.Slot, dev.BDF, dev.Kind)
|
||||
|
||||
switch dev.Kind {
|
||||
case models.DeviceKindCPU:
|
||||
if component == "" {
|
||||
component = "CPU"
|
||||
}
|
||||
if location == "" {
|
||||
location = "CPU"
|
||||
}
|
||||
case models.DeviceKindMemory:
|
||||
component = firstNonEmptyString(dev.PartNumber, dev.Model, "Memory")
|
||||
case models.DeviceKindPCIe, models.DeviceKindGPU, models.DeviceKindNetwork:
|
||||
if location == "" {
|
||||
location = firstNonEmptyString(dev.Slot, dev.BDF, "PCIe")
|
||||
}
|
||||
case models.DeviceKindPSU:
|
||||
component = firstNonEmptyString(dev.Model, "Power Supply")
|
||||
}
|
||||
|
||||
return component, manufacturer, location
|
||||
}
|
||||
|
||||
func inferCSVVendor(dev models.HardwareDevice) string {
|
||||
switch dev.Kind {
|
||||
case models.DeviceKindCPU:
|
||||
return ""
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func firstNonEmptyString(values ...string) string {
|
||||
for _, value := range values {
|
||||
if strings.TrimSpace(value) != "" {
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
79
internal/exporter/exporter_csv_test.go
Normal file
79
internal/exporter/exporter_csv_test.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package exporter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/csv"
|
||||
"testing"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func TestExportCSV_IncludesAllComponentTypesWithUsableSerials(t *testing.T) {
|
||||
result := &models.AnalysisResult{
|
||||
FRU: []models.FRUInfo{
|
||||
{ProductName: "FRU Board", SerialNumber: "FRU-001", Manufacturer: "ACME"},
|
||||
},
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: models.BoardInfo{
|
||||
ProductName: "X12",
|
||||
SerialNumber: "BOARD-001",
|
||||
Manufacturer: "Supermicro",
|
||||
},
|
||||
CPUs: []models.CPU{
|
||||
{Socket: 0, Model: "Xeon", SerialNumber: "CPU-001"},
|
||||
},
|
||||
Memory: []models.MemoryDIMM{
|
||||
{Slot: "DIMM0", PartNumber: "MEM-PN", SerialNumber: "MEM-001", Manufacturer: "Samsung"},
|
||||
},
|
||||
Storage: []models.Storage{
|
||||
{Slot: "U.2-1", Model: "PM9A3", SerialNumber: "SSD-001", Manufacturer: "Samsung"},
|
||||
},
|
||||
GPUs: []models.GPU{
|
||||
{Slot: "GPU1", Model: "H200", SerialNumber: "GPU-001", Manufacturer: "NVIDIA"},
|
||||
},
|
||||
PCIeDevices: []models.PCIeDevice{
|
||||
{Slot: "PCIe1", DeviceClass: "NVSwitch", SerialNumber: "PCIE-001", Manufacturer: "NVIDIA"},
|
||||
},
|
||||
NetworkAdapters: []models.NetworkAdapter{
|
||||
{Slot: "Slot 17", Location: "#CPU0_PCIE4", Model: "I350", SerialNumber: "NIC-001", Vendor: "Intel"},
|
||||
{Slot: "Slot 18", Model: "skip-na", SerialNumber: "N/A", Vendor: "Intel"},
|
||||
},
|
||||
NetworkCards: []models.NIC{
|
||||
{Model: "Legacy NIC", SerialNumber: "LNIC-001"},
|
||||
},
|
||||
PowerSupply: []models.PSU{
|
||||
{Slot: "PSU0", Model: "GW-CRPS3000LW", SerialNumber: "PSU-001", Vendor: "Great Wall"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := New(result).ExportCSV(&buf); err != nil {
|
||||
t.Fatalf("ExportCSV failed: %v", err)
|
||||
}
|
||||
|
||||
rows, err := csv.NewReader(bytes.NewReader(buf.Bytes())).ReadAll()
|
||||
if err != nil {
|
||||
t.Fatalf("read csv: %v", err)
|
||||
}
|
||||
if len(rows) < 2 {
|
||||
t.Fatalf("expected data rows, got %d", len(rows))
|
||||
}
|
||||
|
||||
serials := make(map[string]bool)
|
||||
for _, row := range rows[1:] {
|
||||
if len(row) > 1 {
|
||||
serials[row[1]] = true
|
||||
}
|
||||
}
|
||||
|
||||
want := []string{"FRU-001", "BOARD-001", "CPU-001", "MEM-001", "SSD-001", "GPU-001", "PCIE-001", "NIC-001", "LNIC-001", "PSU-001"}
|
||||
for _, sn := range want {
|
||||
if !serials[sn] {
|
||||
t.Fatalf("expected serial %s in csv export", sn)
|
||||
}
|
||||
}
|
||||
if serials["N/A"] {
|
||||
t.Fatalf("did not expect unusable serial N/A in export")
|
||||
}
|
||||
}
|
||||
164
internal/exporter/generate_example_test.go
Normal file
164
internal/exporter/generate_example_test.go
Normal file
@@ -0,0 +1,164 @@
|
||||
package exporter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
// TestGenerateReanimatorExample generates an example reanimator.json file
|
||||
// This test is marked as skipped by default - run with: go test -v -run TestGenerateReanimatorExample
|
||||
func TestGenerateReanimatorExample(t *testing.T) {
|
||||
t.Skip("Skip by default - run manually to generate example")
|
||||
|
||||
// Create realistic test data matching import-example-full.json structure
|
||||
result := &models.AnalysisResult{
|
||||
Filename: "redfish://10.10.10.103",
|
||||
SourceType: "api",
|
||||
Protocol: "redfish",
|
||||
TargetHost: "10.10.10.103",
|
||||
CollectedAt: time.Date(2026, 2, 10, 15, 30, 0, 0, time.UTC),
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: models.BoardInfo{
|
||||
Manufacturer: "Supermicro",
|
||||
ProductName: "X12DPG-QT6",
|
||||
SerialNumber: "21D634101",
|
||||
PartNumber: "X12DPG-QT6-REV1.01",
|
||||
UUID: "d7ef2fe5-2fd0-11f0-910a-346f11040868",
|
||||
},
|
||||
Firmware: []models.FirmwareInfo{
|
||||
{DeviceName: "BIOS", Version: "06.08.05"},
|
||||
{DeviceName: "BMC", Version: "5.17.00"},
|
||||
{DeviceName: "CPLD", Version: "01.02.03"},
|
||||
},
|
||||
CPUs: []models.CPU{
|
||||
{
|
||||
Socket: 0,
|
||||
Model: "INTEL(R) XEON(R) GOLD 6530",
|
||||
Cores: 32,
|
||||
Threads: 64,
|
||||
FrequencyMHz: 2100,
|
||||
MaxFreqMHz: 4000,
|
||||
},
|
||||
{
|
||||
Socket: 1,
|
||||
Model: "INTEL(R) XEON(R) GOLD 6530",
|
||||
Cores: 32,
|
||||
Threads: 64,
|
||||
FrequencyMHz: 2100,
|
||||
MaxFreqMHz: 4000,
|
||||
},
|
||||
},
|
||||
Memory: []models.MemoryDIMM{
|
||||
{
|
||||
Slot: "CPU0_C0D0",
|
||||
Location: "CPU0_C0D0",
|
||||
Present: true,
|
||||
SizeMB: 32768,
|
||||
Type: "DDR5",
|
||||
MaxSpeedMHz: 4800,
|
||||
CurrentSpeedMHz: 4800,
|
||||
Manufacturer: "Hynix",
|
||||
SerialNumber: "80AD032419E17CEEC1",
|
||||
PartNumber: "HMCG88AGBRA191N",
|
||||
Status: "OK",
|
||||
},
|
||||
{
|
||||
Slot: "CPU1_C0D0",
|
||||
Location: "CPU1_C0D0",
|
||||
Present: true,
|
||||
SizeMB: 32768,
|
||||
Type: "DDR5",
|
||||
MaxSpeedMHz: 4800,
|
||||
CurrentSpeedMHz: 4800,
|
||||
Manufacturer: "Hynix",
|
||||
SerialNumber: "80AD032419E17D6FBA",
|
||||
PartNumber: "HMCG88AGBRA191N",
|
||||
Status: "OK",
|
||||
},
|
||||
},
|
||||
Storage: []models.Storage{
|
||||
{
|
||||
Slot: "OB01",
|
||||
Type: "NVMe",
|
||||
Model: "INTEL SSDPF2KX076T1",
|
||||
SizeGB: 7680,
|
||||
SerialNumber: "BTAX41900GF87P6DGN",
|
||||
Manufacturer: "Intel",
|
||||
Firmware: "9CV10510",
|
||||
Interface: "NVMe",
|
||||
Present: true,
|
||||
},
|
||||
{
|
||||
Slot: "OB02",
|
||||
Type: "NVMe",
|
||||
Model: "INTEL SSDPF2KX076T1",
|
||||
SizeGB: 7680,
|
||||
SerialNumber: "BTAX41900BEG7P6DGN",
|
||||
Manufacturer: "Intel",
|
||||
Firmware: "9CV10510",
|
||||
Interface: "NVMe",
|
||||
Present: true,
|
||||
},
|
||||
},
|
||||
PCIeDevices: []models.PCIeDevice{
|
||||
{
|
||||
Slot: "PCIeCard1",
|
||||
VendorID: 32902,
|
||||
DeviceID: 2912,
|
||||
BDF: "0000:18:00.0",
|
||||
DeviceClass: "MassStorageController",
|
||||
Manufacturer: "Intel",
|
||||
PartNumber: "RAID Controller",
|
||||
SerialNumber: "RAID-001-12345",
|
||||
LinkWidth: 8,
|
||||
LinkSpeed: "Gen3",
|
||||
MaxLinkWidth: 8,
|
||||
MaxLinkSpeed: "Gen3",
|
||||
},
|
||||
},
|
||||
PowerSupply: []models.PSU{
|
||||
{
|
||||
Slot: "0",
|
||||
Present: true,
|
||||
Model: "GW-CRPS3000LW",
|
||||
Vendor: "Great Wall",
|
||||
WattageW: 3000,
|
||||
SerialNumber: "2P06C102610",
|
||||
PartNumber: "V0310C9000000000",
|
||||
Firmware: "00.03.05",
|
||||
Status: "OK",
|
||||
InputType: "ACWideRange",
|
||||
InputPowerW: 137,
|
||||
OutputPowerW: 104,
|
||||
InputVoltage: 215.25,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Convert to Reanimator format
|
||||
reanimator, err := ConvertToReanimator(result)
|
||||
if err != nil {
|
||||
t.Fatalf("ConvertToReanimator failed: %v", err)
|
||||
}
|
||||
|
||||
// Marshal to JSON with indentation
|
||||
jsonData, err := json.MarshalIndent(reanimator, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal JSON: %v", err)
|
||||
}
|
||||
|
||||
// Write to example file
|
||||
examplePath := filepath.Join("../../example/docs", "export-example-logpile.json")
|
||||
if err := os.WriteFile(examplePath, jsonData, 0644); err != nil {
|
||||
t.Fatalf("Failed to write example file: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Generated example file: %s", examplePath)
|
||||
t.Logf("JSON length: %d bytes", len(jsonData))
|
||||
}
|
||||
2340
internal/exporter/reanimator_converter.go
Normal file
2340
internal/exporter/reanimator_converter.go
Normal file
File diff suppressed because it is too large
Load Diff
1867
internal/exporter/reanimator_converter_test.go
Normal file
1867
internal/exporter/reanimator_converter_test.go
Normal file
File diff suppressed because it is too large
Load Diff
289
internal/exporter/reanimator_integration_test.go
Normal file
289
internal/exporter/reanimator_integration_test.go
Normal file
@@ -0,0 +1,289 @@
|
||||
package exporter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
// TestFullReanimatorExport tests complete export with realistic data
|
||||
func TestFullReanimatorExport(t *testing.T) {
|
||||
// Create a realistic AnalysisResult similar to import-example-full.json
|
||||
result := &models.AnalysisResult{
|
||||
Filename: "redfish://10.10.10.103",
|
||||
SourceType: "api",
|
||||
Protocol: "redfish",
|
||||
TargetHost: "10.10.10.103",
|
||||
CollectedAt: time.Date(2026, 2, 10, 15, 30, 0, 0, time.UTC),
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: models.BoardInfo{
|
||||
Manufacturer: "Supermicro",
|
||||
ProductName: "X12DPG-QT6",
|
||||
SerialNumber: "21D634101",
|
||||
PartNumber: "X12DPG-QT6-REV1.01",
|
||||
UUID: "d7ef2fe5-2fd0-11f0-910a-346f11040868",
|
||||
},
|
||||
Firmware: []models.FirmwareInfo{
|
||||
{DeviceName: "BIOS", Version: "06.08.05"},
|
||||
{DeviceName: "BMC", Version: "5.17.00"},
|
||||
{DeviceName: "CPLD", Version: "01.02.03"},
|
||||
},
|
||||
CPUs: []models.CPU{
|
||||
{
|
||||
Socket: 0,
|
||||
Model: "INTEL(R) XEON(R) GOLD 6530",
|
||||
Cores: 32,
|
||||
Threads: 64,
|
||||
FrequencyMHz: 2100,
|
||||
MaxFreqMHz: 4000,
|
||||
},
|
||||
{
|
||||
Socket: 1,
|
||||
Model: "INTEL(R) XEON(R) GOLD 6530",
|
||||
Cores: 32,
|
||||
Threads: 64,
|
||||
FrequencyMHz: 2100,
|
||||
MaxFreqMHz: 4000,
|
||||
},
|
||||
},
|
||||
Memory: []models.MemoryDIMM{
|
||||
{
|
||||
Slot: "CPU0_C0D0",
|
||||
Location: "CPU0_C0D0",
|
||||
Present: true,
|
||||
SizeMB: 32768,
|
||||
Type: "DDR5",
|
||||
MaxSpeedMHz: 4800,
|
||||
CurrentSpeedMHz: 4800,
|
||||
Manufacturer: "Hynix",
|
||||
SerialNumber: "80AD032419E17CEEC1",
|
||||
PartNumber: "HMCG88AGBRA191N",
|
||||
Status: "OK",
|
||||
},
|
||||
{
|
||||
Slot: "CPU0_C1D0",
|
||||
Location: "CPU0_C1D0",
|
||||
Present: false,
|
||||
SizeMB: 0,
|
||||
Type: "",
|
||||
MaxSpeedMHz: 0,
|
||||
CurrentSpeedMHz: 0,
|
||||
Status: "Empty",
|
||||
},
|
||||
},
|
||||
Storage: []models.Storage{
|
||||
{
|
||||
Slot: "OB01",
|
||||
Type: "NVMe",
|
||||
Model: "INTEL SSDPF2KX076T1",
|
||||
SizeGB: 7680,
|
||||
SerialNumber: "BTAX41900GF87P6DGN",
|
||||
Manufacturer: "Intel",
|
||||
Firmware: "9CV10510",
|
||||
Interface: "NVMe",
|
||||
Present: true,
|
||||
},
|
||||
{
|
||||
Slot: "FP00HDD00",
|
||||
Type: "HDD",
|
||||
Model: "ST12000NM0008",
|
||||
SizeGB: 12000,
|
||||
SerialNumber: "ZJV01234ABC",
|
||||
Manufacturer: "Seagate",
|
||||
Firmware: "SN03",
|
||||
Interface: "SATA",
|
||||
Present: true,
|
||||
},
|
||||
},
|
||||
PCIeDevices: []models.PCIeDevice{
|
||||
{
|
||||
Slot: "PCIeCard1",
|
||||
VendorID: 32902,
|
||||
DeviceID: 2912,
|
||||
BDF: "0000:18:00.0",
|
||||
DeviceClass: "MassStorageController",
|
||||
Manufacturer: "Intel",
|
||||
PartNumber: "RAID Controller RSP3DD080F",
|
||||
LinkWidth: 8,
|
||||
LinkSpeed: "Gen3",
|
||||
MaxLinkWidth: 8,
|
||||
MaxLinkSpeed: "Gen3",
|
||||
SerialNumber: "RAID-001-12345",
|
||||
},
|
||||
{
|
||||
Slot: "PCIeCard2",
|
||||
VendorID: 5555,
|
||||
DeviceID: 4401,
|
||||
BDF: "0000:3b:00.0",
|
||||
DeviceClass: "NetworkController",
|
||||
Manufacturer: "Mellanox",
|
||||
PartNumber: "ConnectX-5",
|
||||
LinkWidth: 16,
|
||||
LinkSpeed: "Gen3",
|
||||
MaxLinkWidth: 16,
|
||||
MaxLinkSpeed: "Gen3",
|
||||
SerialNumber: "MT2892012345",
|
||||
},
|
||||
},
|
||||
PowerSupply: []models.PSU{
|
||||
{
|
||||
Slot: "0",
|
||||
Present: true,
|
||||
Model: "GW-CRPS3000LW",
|
||||
Vendor: "Great Wall",
|
||||
WattageW: 3000,
|
||||
SerialNumber: "2P06C102610",
|
||||
PartNumber: "V0310C9000000000",
|
||||
Firmware: "00.03.05",
|
||||
Status: "OK",
|
||||
InputType: "ACWideRange",
|
||||
InputPowerW: 137,
|
||||
OutputPowerW: 104,
|
||||
InputVoltage: 215.25,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Convert to Reanimator format
|
||||
reanimator, err := ConvertToReanimator(result)
|
||||
if err != nil {
|
||||
t.Fatalf("ConvertToReanimator failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify top-level fields
|
||||
if reanimator.Filename != "redfish://10.10.10.103" {
|
||||
t.Errorf("Filename mismatch: got %q", reanimator.Filename)
|
||||
}
|
||||
|
||||
if reanimator.SourceType != "api" {
|
||||
t.Errorf("SourceType mismatch: got %q", reanimator.SourceType)
|
||||
}
|
||||
|
||||
if reanimator.Protocol != "redfish" {
|
||||
t.Errorf("Protocol mismatch: got %q", reanimator.Protocol)
|
||||
}
|
||||
|
||||
if reanimator.TargetHost != "10.10.10.103" {
|
||||
t.Errorf("TargetHost mismatch: got %q", reanimator.TargetHost)
|
||||
}
|
||||
|
||||
if reanimator.CollectedAt != "2026-02-10T15:30:00Z" {
|
||||
t.Errorf("CollectedAt mismatch: got %q", reanimator.CollectedAt)
|
||||
}
|
||||
|
||||
// Verify hardware sections
|
||||
hw := reanimator.Hardware
|
||||
|
||||
// Board
|
||||
if hw.Board.SerialNumber != "21D634101" {
|
||||
t.Errorf("Board serial mismatch: got %q", hw.Board.SerialNumber)
|
||||
}
|
||||
|
||||
// Firmware
|
||||
if len(hw.Firmware) != 3 {
|
||||
t.Errorf("Expected 3 firmware entries, got %d", len(hw.Firmware))
|
||||
}
|
||||
|
||||
// CPUs
|
||||
if len(hw.CPUs) != 2 {
|
||||
t.Fatalf("Expected 2 CPUs, got %d", len(hw.CPUs))
|
||||
}
|
||||
|
||||
if hw.CPUs[0].Manufacturer != "Intel" {
|
||||
t.Errorf("CPU manufacturer not inferred: got %q", hw.CPUs[0].Manufacturer)
|
||||
}
|
||||
|
||||
if hw.CPUs[0].Status != "Unknown" {
|
||||
t.Errorf("CPU status mismatch: got %q", hw.CPUs[0].Status)
|
||||
}
|
||||
|
||||
// Memory (empty slots are excluded)
|
||||
if len(hw.Memory) != 1 {
|
||||
t.Errorf("Expected 1 memory entry (installed only), got %d", len(hw.Memory))
|
||||
}
|
||||
|
||||
// Storage
|
||||
if len(hw.Storage) != 2 {
|
||||
t.Errorf("Expected 2 storage devices, got %d", len(hw.Storage))
|
||||
}
|
||||
|
||||
if hw.Storage[0].Status != "Unknown" {
|
||||
t.Errorf("Storage status mismatch: got %q", hw.Storage[0].Status)
|
||||
}
|
||||
|
||||
// PCIe devices
|
||||
if len(hw.PCIeDevices) != 2 {
|
||||
t.Errorf("Expected 2 PCIe devices, got %d", len(hw.PCIeDevices))
|
||||
}
|
||||
|
||||
if hw.PCIeDevices[0].Model == "" {
|
||||
t.Error("PCIe model should be populated from PartNumber")
|
||||
}
|
||||
|
||||
// Power supplies
|
||||
if len(hw.PowerSupplies) != 1 {
|
||||
t.Errorf("Expected 1 PSU, got %d", len(hw.PowerSupplies))
|
||||
}
|
||||
|
||||
// Verify JSON marshaling works
|
||||
jsonData, err := json.MarshalIndent(reanimator, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal to JSON: %v", err)
|
||||
}
|
||||
|
||||
// Check that JSON contains expected fields
|
||||
jsonStr := string(jsonData)
|
||||
expectedFields := []string{
|
||||
`"filename"`,
|
||||
`"source_type"`,
|
||||
`"protocol"`,
|
||||
`"target_host"`,
|
||||
`"collected_at"`,
|
||||
`"hardware"`,
|
||||
`"board"`,
|
||||
`"cpus"`,
|
||||
`"memory"`,
|
||||
`"storage"`,
|
||||
`"pcie_devices"`,
|
||||
`"power_supplies"`,
|
||||
`"firmware"`,
|
||||
}
|
||||
|
||||
for _, field := range expectedFields {
|
||||
if !strings.Contains(jsonStr, field) {
|
||||
t.Errorf("JSON missing expected field: %s", field)
|
||||
}
|
||||
}
|
||||
|
||||
// Optional: print JSON for manual inspection (commented out for normal test runs)
|
||||
// t.Logf("Generated Reanimator JSON:\n%s", string(jsonData))
|
||||
}
|
||||
|
||||
// TestReanimatorExportWithoutTargetHost tests that target_host is inferred from filename
|
||||
func TestReanimatorExportWithoutTargetHost(t *testing.T) {
|
||||
result := &models.AnalysisResult{
|
||||
Filename: "redfish://192.168.1.100",
|
||||
SourceType: "api",
|
||||
Protocol: "redfish",
|
||||
TargetHost: "", // Empty - should be inferred
|
||||
CollectedAt: time.Now(),
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: models.BoardInfo{
|
||||
SerialNumber: "TEST123",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
reanimator, err := ConvertToReanimator(result)
|
||||
if err != nil {
|
||||
t.Fatalf("ConvertToReanimator failed: %v", err)
|
||||
}
|
||||
|
||||
if reanimator.TargetHost != "192.168.1.100" {
|
||||
t.Errorf("Expected target_host to be inferred from filename, got %q", reanimator.TargetHost)
|
||||
}
|
||||
}
|
||||
254
internal/exporter/reanimator_models.go
Normal file
254
internal/exporter/reanimator_models.go
Normal file
@@ -0,0 +1,254 @@
|
||||
package exporter
|
||||
|
||||
// ReanimatorExport represents the top-level structure for Reanimator format export
|
||||
type ReanimatorExport struct {
|
||||
Filename string `json:"filename"`
|
||||
SourceType string `json:"source_type,omitempty"`
|
||||
Protocol string `json:"protocol,omitempty"`
|
||||
TargetHost string `json:"target_host,omitempty"`
|
||||
CollectedAt string `json:"collected_at"` // RFC3339 format
|
||||
Hardware ReanimatorHardware `json:"hardware"`
|
||||
}
|
||||
|
||||
// ReanimatorHardware contains all hardware components
|
||||
type ReanimatorHardware struct {
|
||||
Board ReanimatorBoard `json:"board"`
|
||||
Firmware []ReanimatorFirmware `json:"firmware,omitempty"`
|
||||
CPUs []ReanimatorCPU `json:"cpus,omitempty"`
|
||||
Memory []ReanimatorMemory `json:"memory,omitempty"`
|
||||
Storage []ReanimatorStorage `json:"storage,omitempty"`
|
||||
PCIeDevices []ReanimatorPCIe `json:"pcie_devices,omitempty"`
|
||||
PowerSupplies []ReanimatorPSU `json:"power_supplies,omitempty"`
|
||||
Sensors *ReanimatorSensors `json:"sensors,omitempty"`
|
||||
EventLogs []ReanimatorEventLog `json:"event_logs,omitempty"`
|
||||
}
|
||||
|
||||
// ReanimatorBoard represents motherboard/server information
|
||||
type ReanimatorBoard struct {
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
ProductName string `json:"product_name,omitempty"`
|
||||
SerialNumber string `json:"serial_number"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
UUID string `json:"uuid,omitempty"`
|
||||
}
|
||||
|
||||
// ReanimatorFirmware represents firmware version information
|
||||
type ReanimatorFirmware struct {
|
||||
DeviceName string `json:"device_name"`
|
||||
Version string `json:"version"`
|
||||
}
|
||||
|
||||
type ReanimatorStatusHistoryEntry struct {
|
||||
Status string `json:"status"`
|
||||
ChangedAt string `json:"changed_at"`
|
||||
Details string `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// ReanimatorCPU represents processor information
|
||||
type ReanimatorCPU struct {
|
||||
Socket int `json:"socket"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Cores int `json:"cores,omitempty"`
|
||||
Threads int `json:"threads,omitempty"`
|
||||
FrequencyMHz int `json:"frequency_mhz,omitempty"`
|
||||
MaxFrequencyMHz int `json:"max_frequency_mhz,omitempty"`
|
||||
TemperatureC float64 `json:"temperature_c,omitempty"`
|
||||
PowerW float64 `json:"power_w,omitempty"`
|
||||
Throttled *bool `json:"throttled,omitempty"`
|
||||
CorrectableErrorCount int64 `json:"correctable_error_count,omitempty"`
|
||||
UncorrectableErrorCount int64 `json:"uncorrectable_error_count,omitempty"`
|
||||
LifeRemainingPct float64 `json:"life_remaining_pct,omitempty"`
|
||||
LifeUsedPct float64 `json:"life_used_pct,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Present *bool `json:"present,omitempty"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||
ManufacturedYearWeek string `json:"manufactured_year_week,omitempty"`
|
||||
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
}
|
||||
|
||||
// ReanimatorMemory represents a memory module (DIMM)
|
||||
type ReanimatorMemory struct {
|
||||
Slot string `json:"slot"`
|
||||
Location string `json:"location,omitempty"`
|
||||
Present *bool `json:"present,omitempty"`
|
||||
SizeMB int `json:"size_mb,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
MaxSpeedMHz int `json:"max_speed_mhz,omitempty"`
|
||||
CurrentSpeedMHz int `json:"current_speed_mhz,omitempty"`
|
||||
TemperatureC float64 `json:"temperature_c,omitempty"`
|
||||
CorrectableECCErrorCount int64 `json:"correctable_ecc_error_count,omitempty"`
|
||||
UncorrectableECCErrorCount int64 `json:"uncorrectable_ecc_error_count,omitempty"`
|
||||
LifeRemainingPct float64 `json:"life_remaining_pct,omitempty"`
|
||||
LifeUsedPct float64 `json:"life_used_pct,omitempty"`
|
||||
SpareBlocksRemainingPct float64 `json:"spare_blocks_remaining_pct,omitempty"`
|
||||
PerformanceDegraded *bool `json:"performance_degraded,omitempty"`
|
||||
DataLossDetected *bool `json:"data_loss_detected,omitempty"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||
ManufacturedYearWeek string `json:"manufactured_year_week,omitempty"`
|
||||
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
}
|
||||
|
||||
// ReanimatorStorage represents a storage device
|
||||
type ReanimatorStorage struct {
|
||||
Slot string `json:"slot"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Model string `json:"model"`
|
||||
SizeGB int `json:"size_gb,omitempty"`
|
||||
SerialNumber string `json:"serial_number"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Interface string `json:"interface,omitempty"`
|
||||
Present *bool `json:"present,omitempty"`
|
||||
TemperatureC float64 `json:"temperature_c,omitempty"`
|
||||
PowerOnHours int64 `json:"power_on_hours,omitempty"`
|
||||
PowerCycles int64 `json:"power_cycles,omitempty"`
|
||||
UnsafeShutdowns int64 `json:"unsafe_shutdowns,omitempty"`
|
||||
MediaErrors int64 `json:"media_errors,omitempty"`
|
||||
ErrorLogEntries int64 `json:"error_log_entries,omitempty"`
|
||||
WrittenBytes int64 `json:"written_bytes,omitempty"`
|
||||
ReadBytes int64 `json:"read_bytes,omitempty"`
|
||||
LifeUsedPct float64 `json:"life_used_pct,omitempty"`
|
||||
RemainingEndurancePct *int `json:"remaining_endurance_pct,omitempty"`
|
||||
LifeRemainingPct float64 `json:"life_remaining_pct,omitempty"`
|
||||
AvailableSparePct float64 `json:"available_spare_pct,omitempty"`
|
||||
ReallocatedSectors int64 `json:"reallocated_sectors,omitempty"`
|
||||
CurrentPendingSectors int64 `json:"current_pending_sectors,omitempty"`
|
||||
OfflineUncorrectable int64 `json:"offline_uncorrectable,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||
ManufacturedYearWeek string `json:"manufactured_year_week,omitempty"`
|
||||
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
}
|
||||
|
||||
// ReanimatorPCIe represents a PCIe device
|
||||
type ReanimatorPCIe struct {
|
||||
Slot string `json:"slot"`
|
||||
VendorID int `json:"vendor_id,omitempty"`
|
||||
DeviceID int `json:"device_id,omitempty"`
|
||||
NUMANode int `json:"numa_node,omitempty"`
|
||||
TemperatureC float64 `json:"temperature_c,omitempty"`
|
||||
PowerW float64 `json:"power_w,omitempty"`
|
||||
LifeRemainingPct float64 `json:"life_remaining_pct,omitempty"`
|
||||
LifeUsedPct float64 `json:"life_used_pct,omitempty"`
|
||||
ECCCorrectedTotal int64 `json:"ecc_corrected_total,omitempty"`
|
||||
ECCUncorrectedTotal int64 `json:"ecc_uncorrected_total,omitempty"`
|
||||
HWSlowdown *bool `json:"hw_slowdown,omitempty"`
|
||||
BatteryChargePct float64 `json:"battery_charge_pct,omitempty"`
|
||||
BatteryHealthPct float64 `json:"battery_health_pct,omitempty"`
|
||||
BatteryTemperatureC float64 `json:"battery_temperature_c,omitempty"`
|
||||
BatteryVoltageV float64 `json:"battery_voltage_v,omitempty"`
|
||||
BatteryReplaceRequired *bool `json:"battery_replace_required,omitempty"`
|
||||
SFPTemperatureC float64 `json:"sfp_temperature_c,omitempty"`
|
||||
SFPTXPowerDBm float64 `json:"sfp_tx_power_dbm,omitempty"`
|
||||
SFPRXPowerDBm float64 `json:"sfp_rx_power_dbm,omitempty"`
|
||||
SFPVoltageV float64 `json:"sfp_voltage_v,omitempty"`
|
||||
SFPBiasMA float64 `json:"sfp_bias_ma,omitempty"`
|
||||
BDF string `json:"-"`
|
||||
DeviceClass string `json:"device_class,omitempty"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
LinkWidth int `json:"link_width,omitempty"`
|
||||
LinkSpeed string `json:"link_speed,omitempty"`
|
||||
MaxLinkWidth int `json:"max_link_width,omitempty"`
|
||||
MaxLinkSpeed string `json:"max_link_speed,omitempty"`
|
||||
MACAddresses []string `json:"mac_addresses,omitempty"`
|
||||
Present *bool `json:"present,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||
ManufacturedYearWeek string `json:"manufactured_year_week,omitempty"`
|
||||
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
}
|
||||
|
||||
// ReanimatorPSU represents a power supply unit
|
||||
type ReanimatorPSU struct {
|
||||
Slot string `json:"slot"`
|
||||
Present *bool `json:"present,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Vendor string `json:"vendor,omitempty"`
|
||||
WattageW int `json:"wattage_w,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
InputType string `json:"input_type,omitempty"`
|
||||
InputPowerW float64 `json:"input_power_w,omitempty"`
|
||||
OutputPowerW float64 `json:"output_power_w,omitempty"`
|
||||
InputVoltage float64 `json:"input_voltage,omitempty"`
|
||||
TemperatureC float64 `json:"temperature_c,omitempty"`
|
||||
LifeRemainingPct float64 `json:"life_remaining_pct,omitempty"`
|
||||
LifeUsedPct float64 `json:"life_used_pct,omitempty"`
|
||||
StatusCheckedAt string `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt string `json:"status_changed_at,omitempty"`
|
||||
ManufacturedYearWeek string `json:"manufactured_year_week,omitempty"`
|
||||
StatusHistory []ReanimatorStatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
}
|
||||
|
||||
type ReanimatorEventLog struct {
|
||||
Source string `json:"source"`
|
||||
EventTime string `json:"event_time,omitempty"`
|
||||
Severity string `json:"severity,omitempty"`
|
||||
MessageID string `json:"message_id,omitempty"`
|
||||
Message string `json:"message"`
|
||||
ComponentRef string `json:"component_ref,omitempty"`
|
||||
Fingerprint string `json:"fingerprint,omitempty"`
|
||||
IsActive *bool `json:"is_active,omitempty"`
|
||||
RawPayload map[string]any `json:"raw_payload,omitempty"`
|
||||
}
|
||||
|
||||
type ReanimatorSensors struct {
|
||||
Fans []ReanimatorFanSensor `json:"fans,omitempty"`
|
||||
Power []ReanimatorPowerSensor `json:"power,omitempty"`
|
||||
Temperatures []ReanimatorTemperatureSensor `json:"temperatures,omitempty"`
|
||||
Other []ReanimatorOtherSensor `json:"other,omitempty"`
|
||||
}
|
||||
|
||||
type ReanimatorFanSensor struct {
|
||||
Name string `json:"name"`
|
||||
Location string `json:"location,omitempty"`
|
||||
RPM int `json:"rpm,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
}
|
||||
|
||||
type ReanimatorPowerSensor struct {
|
||||
Name string `json:"name"`
|
||||
Location string `json:"location,omitempty"`
|
||||
VoltageV float64 `json:"voltage_v,omitempty"`
|
||||
CurrentA float64 `json:"current_a,omitempty"`
|
||||
PowerW float64 `json:"power_w,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
}
|
||||
|
||||
type ReanimatorTemperatureSensor struct {
|
||||
Name string `json:"name"`
|
||||
Location string `json:"location,omitempty"`
|
||||
Celsius float64 `json:"celsius,omitempty"`
|
||||
ThresholdWarningCelsius float64 `json:"threshold_warning_celsius,omitempty"`
|
||||
ThresholdCriticalCelsius float64 `json:"threshold_critical_celsius,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
}
|
||||
|
||||
type ReanimatorOtherSensor struct {
|
||||
Name string `json:"name"`
|
||||
Location string `json:"location,omitempty"`
|
||||
Value float64 `json:"value,omitempty"`
|
||||
Unit string `json:"unit,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
}
|
||||
63
internal/ingest/service.go
Normal file
63
internal/ingest/service.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package ingest
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/collector"
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
)
|
||||
|
||||
type Service struct{}
|
||||
|
||||
type RedfishSourceMetadata struct {
|
||||
TargetHost string
|
||||
SourceTimezone string
|
||||
Filename string
|
||||
}
|
||||
|
||||
func NewService() *Service {
|
||||
return &Service{}
|
||||
}
|
||||
|
||||
func (s *Service) AnalyzeArchivePayload(filename string, payload []byte) (*models.AnalysisResult, string, error) {
|
||||
p := parser.NewBMCParser()
|
||||
if err := p.ParseFromReader(bytes.NewReader(payload), filename); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
return p.Result(), p.DetectedVendor(), nil
|
||||
}
|
||||
|
||||
func (s *Service) AnalyzeRedfishRawPayloads(rawPayloads map[string]any, meta RedfishSourceMetadata) (*models.AnalysisResult, string, error) {
|
||||
result, err := collector.ReplayRedfishFromRawPayloads(rawPayloads, nil)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if result == nil {
|
||||
return nil, "", fmt.Errorf("redfish replay returned nil result")
|
||||
}
|
||||
if strings.TrimSpace(result.Protocol) == "" {
|
||||
result.Protocol = "redfish"
|
||||
}
|
||||
if strings.TrimSpace(result.SourceType) == "" {
|
||||
result.SourceType = models.SourceTypeAPI
|
||||
}
|
||||
if strings.TrimSpace(result.TargetHost) == "" {
|
||||
result.TargetHost = strings.TrimSpace(meta.TargetHost)
|
||||
}
|
||||
if strings.TrimSpace(result.SourceTimezone) == "" {
|
||||
result.SourceTimezone = strings.TrimSpace(meta.SourceTimezone)
|
||||
}
|
||||
if strings.TrimSpace(result.Filename) == "" {
|
||||
if strings.TrimSpace(meta.Filename) != "" {
|
||||
result.Filename = strings.TrimSpace(meta.Filename)
|
||||
} else if target := strings.TrimSpace(result.TargetHost); target != "" {
|
||||
result.Filename = "redfish://" + target
|
||||
} else {
|
||||
result.Filename = "redfish://snapshot"
|
||||
}
|
||||
}
|
||||
return result, "redfish", nil
|
||||
}
|
||||
@@ -2,13 +2,25 @@ package models
|
||||
|
||||
import "time"
|
||||
|
||||
const (
|
||||
SourceTypeArchive = "archive"
|
||||
SourceTypeAPI = "api"
|
||||
)
|
||||
|
||||
// AnalysisResult contains all parsed data from an archive
|
||||
type AnalysisResult struct {
|
||||
Filename string `json:"filename"`
|
||||
Events []Event `json:"events"`
|
||||
FRU []FRUInfo `json:"fru"`
|
||||
Sensors []SensorReading `json:"sensors"`
|
||||
Hardware *HardwareConfig `json:"hardware"`
|
||||
Filename string `json:"filename"`
|
||||
SourceType string `json:"source_type,omitempty"` // archive | api
|
||||
Protocol string `json:"protocol,omitempty"` // redfish | ipmi
|
||||
TargetHost string `json:"target_host,omitempty"` // BMC host for live collect
|
||||
SourceTimezone string `json:"source_timezone,omitempty"` // Source timezone/offset used during collection (e.g. +08:00)
|
||||
CollectedAt time.Time `json:"collected_at,omitempty"` // Collection/upload timestamp
|
||||
InventoryLastModifiedAt time.Time `json:"inventory_last_modified_at,omitempty"` // Redfish inventory last modified (InventoryData/Status)
|
||||
RawPayloads map[string]any `json:"raw_payloads,omitempty"` // Additional source payloads (e.g. Redfish tree)
|
||||
Events []Event `json:"events"`
|
||||
FRU []FRUInfo `json:"fru"`
|
||||
Sensors []SensorReading `json:"sensors"`
|
||||
Hardware *HardwareConfig `json:"hardware"`
|
||||
}
|
||||
|
||||
// Event represents a single log event
|
||||
@@ -33,6 +45,19 @@ const (
|
||||
SeverityInfo Severity = "info"
|
||||
)
|
||||
|
||||
// StatusAtCollection captures component status at a specific timestamp.
|
||||
type StatusAtCollection struct {
|
||||
Status string `json:"status"`
|
||||
At time.Time `json:"at"`
|
||||
}
|
||||
|
||||
// StatusHistoryEntry represents a status transition point.
|
||||
type StatusHistoryEntry struct {
|
||||
Status string `json:"status"`
|
||||
ChangedAt time.Time `json:"changed_at"`
|
||||
Details string `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// SensorReading represents a single sensor reading
|
||||
type SensorReading struct {
|
||||
Name string `json:"name"`
|
||||
@@ -61,9 +86,11 @@ type FRUInfo struct {
|
||||
type HardwareConfig struct {
|
||||
Firmware []FirmwareInfo `json:"firmware,omitempty"`
|
||||
BoardInfo BoardInfo `json:"board,omitempty"`
|
||||
Devices []HardwareDevice `json:"devices,omitempty"`
|
||||
CPUs []CPU `json:"cpus,omitempty"`
|
||||
Memory []MemoryDIMM `json:"memory,omitempty"`
|
||||
Storage []Storage `json:"storage,omitempty"`
|
||||
Volumes []StorageVolume `json:"volumes,omitempty"`
|
||||
PCIeDevices []PCIeDevice `json:"pcie_devices,omitempty"`
|
||||
GPUs []GPU `json:"gpus,omitempty"`
|
||||
NetworkCards []NIC `json:"network_cards,omitempty"`
|
||||
@@ -71,25 +98,93 @@ type HardwareConfig struct {
|
||||
PowerSupply []PSU `json:"power_supplies,omitempty"`
|
||||
}
|
||||
|
||||
// FirmwareInfo represents firmware version information
|
||||
type FirmwareInfo struct {
|
||||
DeviceName string `json:"device_name"`
|
||||
Version string `json:"version"`
|
||||
BuildTime string `json:"build_time,omitempty"`
|
||||
const (
|
||||
DeviceKindBoard = "board"
|
||||
DeviceKindCPU = "cpu"
|
||||
DeviceKindMemory = "memory"
|
||||
DeviceKindStorage = "storage"
|
||||
DeviceKindPCIe = "pcie"
|
||||
DeviceKindGPU = "gpu"
|
||||
DeviceKindNetwork = "network"
|
||||
DeviceKindPSU = "psu"
|
||||
)
|
||||
|
||||
// HardwareDevice is canonical device inventory used across UI and exports.
|
||||
type HardwareDevice struct {
|
||||
ID string `json:"id"`
|
||||
Kind string `json:"kind"`
|
||||
Source string `json:"source,omitempty"`
|
||||
Slot string `json:"slot,omitempty"`
|
||||
Location string `json:"location,omitempty"`
|
||||
BDF string `json:"bdf,omitempty"`
|
||||
DeviceClass string `json:"device_class,omitempty"`
|
||||
VendorID int `json:"vendor_id,omitempty"`
|
||||
DeviceID int `json:"device_id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Interface string `json:"interface,omitempty"`
|
||||
Present *bool `json:"present,omitempty"`
|
||||
SizeMB int `json:"size_mb,omitempty"`
|
||||
SizeGB int `json:"size_gb,omitempty"`
|
||||
Cores int `json:"cores,omitempty"`
|
||||
Threads int `json:"threads,omitempty"`
|
||||
FrequencyMHz int `json:"frequency_mhz,omitempty"`
|
||||
MaxFreqMHz int `json:"max_frequency_mhz,omitempty"`
|
||||
PortCount int `json:"port_count,omitempty"`
|
||||
PortType string `json:"port_type,omitempty"`
|
||||
MACAddresses []string `json:"mac_addresses,omitempty"`
|
||||
LinkWidth int `json:"link_width,omitempty"`
|
||||
LinkSpeed string `json:"link_speed,omitempty"`
|
||||
MaxLinkWidth int `json:"max_link_width,omitempty"`
|
||||
MaxLinkSpeed string `json:"max_link_speed,omitempty"`
|
||||
WattageW int `json:"wattage_w,omitempty"`
|
||||
InputType string `json:"input_type,omitempty"`
|
||||
InputPowerW int `json:"input_power_w,omitempty"`
|
||||
OutputPowerW int `json:"output_power_w,omitempty"`
|
||||
InputVoltage float64 `json:"input_voltage,omitempty"`
|
||||
TemperatureC int `json:"temperature_c,omitempty"`
|
||||
RemainingEndurancePct *int `json:"remaining_endurance_pct,omitempty"` // 0-100 %; nil = not reported
|
||||
NUMANode int `json:"numa_node,omitempty"` // 0 = not reported/N/A
|
||||
Status string `json:"status,omitempty"`
|
||||
|
||||
StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt *time.Time `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
|
||||
Details map[string]any `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// BoardInfo represents motherboard information
|
||||
// FirmwareInfo represents firmware version information
|
||||
type FirmwareInfo struct {
|
||||
DeviceName string `json:"device_name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Version string `json:"version"`
|
||||
BuildTime string `json:"build_time,omitempty"`
|
||||
}
|
||||
|
||||
// BoardInfo represents motherboard/system information
|
||||
type BoardInfo struct {
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
ProductName string `json:"product_name,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
ProductName string `json:"product_name,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Version string `json:"version,omitempty"`
|
||||
UUID string `json:"uuid,omitempty"`
|
||||
BMCMACAddress string `json:"bmc_mac_address,omitempty"`
|
||||
}
|
||||
|
||||
// CPU represents processor information
|
||||
type CPU struct {
|
||||
Socket int `json:"socket"`
|
||||
Model string `json:"model"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Cores int `json:"cores"`
|
||||
Threads int `json:"threads"`
|
||||
FrequencyMHz int `json:"frequency_mhz"`
|
||||
@@ -100,12 +195,21 @@ type CPU struct {
|
||||
TDP int `json:"tdp_w,omitempty"`
|
||||
PPIN string `json:"ppin,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
|
||||
StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt *time.Time `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
Details map[string]any `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// MemoryDIMM represents a memory module
|
||||
type MemoryDIMM struct {
|
||||
Slot string `json:"slot"`
|
||||
Location string `json:"location"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Present bool `json:"present"`
|
||||
SizeMB int `json:"size_mb"`
|
||||
Type string `json:"type"`
|
||||
@@ -117,23 +221,57 @@ type MemoryDIMM struct {
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
Ranks int `json:"ranks,omitempty"`
|
||||
|
||||
StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt *time.Time `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
Details map[string]any `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// Storage represents a storage device
|
||||
type Storage struct {
|
||||
Slot string `json:"slot"`
|
||||
Type string `json:"type"`
|
||||
Model string `json:"model"`
|
||||
SizeGB int `json:"size_gb"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Interface string `json:"interface,omitempty"`
|
||||
Slot string `json:"slot"`
|
||||
Type string `json:"type"`
|
||||
Model string `json:"model"`
|
||||
Description string `json:"description,omitempty"`
|
||||
SizeGB int `json:"size_gb"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Interface string `json:"interface,omitempty"`
|
||||
Present bool `json:"present"`
|
||||
Location string `json:"location,omitempty"` // Front/Rear
|
||||
BackplaneID int `json:"backplane_id,omitempty"`
|
||||
RemainingEndurancePct *int `json:"remaining_endurance_pct,omitempty"` // 0-100 %; nil = not reported
|
||||
Status string `json:"status,omitempty"`
|
||||
Details map[string]any `json:"details,omitempty"`
|
||||
|
||||
StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt *time.Time `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
}
|
||||
|
||||
// StorageVolume represents a logical storage volume (RAID/VROC/etc.).
|
||||
type StorageVolume struct {
|
||||
ID string `json:"id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Controller string `json:"controller,omitempty"`
|
||||
RAIDLevel string `json:"raid_level,omitempty"`
|
||||
SizeGB int `json:"size_gb,omitempty"`
|
||||
CapacityBytes int64 `json:"capacity_bytes,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
Bootable bool `json:"bootable,omitempty"`
|
||||
Encrypted bool `json:"encrypted,omitempty"`
|
||||
}
|
||||
|
||||
// PCIeDevice represents a PCIe device
|
||||
type PCIeDevice struct {
|
||||
Slot string `json:"slot"`
|
||||
Description string `json:"description,omitempty"`
|
||||
VendorID int `json:"vendor_id"`
|
||||
DeviceID int `json:"device_id"`
|
||||
BDF string `json:"bdf"`
|
||||
@@ -146,12 +284,22 @@ type PCIeDevice struct {
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
MACAddresses []string `json:"mac_addresses,omitempty"`
|
||||
NUMANode int `json:"numa_node,omitempty"` // 0 = not reported/N/A
|
||||
Status string `json:"status,omitempty"`
|
||||
|
||||
StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt *time.Time `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
Details map[string]any `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// NIC represents a network interface card
|
||||
type NIC struct {
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
Description string `json:"description,omitempty"`
|
||||
MACAddress string `json:"mac_address"`
|
||||
SpeedMbps int `json:"speed_mbps,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
@@ -159,35 +307,68 @@ type NIC struct {
|
||||
|
||||
// PSU represents a power supply unit
|
||||
type PSU struct {
|
||||
Slot string `json:"slot"`
|
||||
Present bool `json:"present"`
|
||||
Model string `json:"model"`
|
||||
Vendor string `json:"vendor,omitempty"`
|
||||
WattageW int `json:"wattage_w,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
InputType string `json:"input_type,omitempty"`
|
||||
InputPowerW int `json:"input_power_w,omitempty"`
|
||||
OutputPowerW int `json:"output_power_w,omitempty"`
|
||||
InputVoltage float64 `json:"input_voltage,omitempty"`
|
||||
OutputVoltage float64 `json:"output_voltage,omitempty"`
|
||||
TemperatureC int `json:"temperature_c,omitempty"`
|
||||
Slot string `json:"slot"`
|
||||
Present bool `json:"present"`
|
||||
Model string `json:"model"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Vendor string `json:"vendor,omitempty"`
|
||||
WattageW int `json:"wattage_w,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
InputType string `json:"input_type,omitempty"`
|
||||
InputPowerW int `json:"input_power_w,omitempty"`
|
||||
OutputPowerW int `json:"output_power_w,omitempty"`
|
||||
InputVoltage float64 `json:"input_voltage,omitempty"`
|
||||
OutputVoltage float64 `json:"output_voltage,omitempty"`
|
||||
TemperatureC int `json:"temperature_c,omitempty"`
|
||||
Details map[string]any `json:"details,omitempty"`
|
||||
|
||||
StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt *time.Time `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
}
|
||||
|
||||
// GPU represents a graphics processing unit
|
||||
type GPU struct {
|
||||
Slot string `json:"slot"`
|
||||
Model string `json:"model"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
VendorID int `json:"vendor_id,omitempty"`
|
||||
DeviceID int `json:"device_id,omitempty"`
|
||||
BDF string `json:"bdf,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
LinkWidth int `json:"link_width,omitempty"`
|
||||
LinkSpeed string `json:"link_speed,omitempty"`
|
||||
Slot string `json:"slot"`
|
||||
Location string `json:"location,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Manufacturer string `json:"manufacturer,omitempty"`
|
||||
VendorID int `json:"vendor_id,omitempty"`
|
||||
DeviceID int `json:"device_id,omitempty"`
|
||||
BDF string `json:"bdf,omitempty"`
|
||||
UUID string `json:"uuid,omitempty"`
|
||||
SerialNumber string `json:"serial_number,omitempty"`
|
||||
PartNumber string `json:"part_number,omitempty"`
|
||||
Firmware string `json:"firmware,omitempty"`
|
||||
VideoBIOS string `json:"video_bios,omitempty"`
|
||||
IRQ int `json:"irq,omitempty"`
|
||||
BusType string `json:"bus_type,omitempty"`
|
||||
DMASize string `json:"dma_size,omitempty"`
|
||||
DMAMask string `json:"dma_mask,omitempty"`
|
||||
DeviceMinor int `json:"device_minor,omitempty"`
|
||||
Temperature int `json:"temperature,omitempty"` // GPU core temp
|
||||
MemTemperature int `json:"mem_temperature,omitempty"` // GPU memory temp
|
||||
Power int `json:"power,omitempty"` // Current power draw (W)
|
||||
MaxPower int `json:"max_power,omitempty"` // TDP (W)
|
||||
ClockSpeed int `json:"clock_speed,omitempty"` // Operating speed MHz
|
||||
MaxLinkWidth int `json:"max_link_width,omitempty"`
|
||||
MaxLinkSpeed string `json:"max_link_speed,omitempty"`
|
||||
CurrentLinkWidth int `json:"current_link_width,omitempty"`
|
||||
CurrentLinkSpeed string `json:"current_link_speed,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
|
||||
StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt *time.Time `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
Details map[string]any `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// NetworkAdapter represents a network adapter with detailed info
|
||||
@@ -195,7 +376,9 @@ type NetworkAdapter struct {
|
||||
Slot string `json:"slot"`
|
||||
Location string `json:"location"`
|
||||
Present bool `json:"present"`
|
||||
BDF string `json:"bdf,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Vendor string `json:"vendor,omitempty"`
|
||||
VendorID int `json:"vendor_id,omitempty"`
|
||||
DeviceID int `json:"device_id,omitempty"`
|
||||
@@ -205,5 +388,17 @@ type NetworkAdapter struct {
|
||||
PortCount int `json:"port_count,omitempty"`
|
||||
PortType string `json:"port_type,omitempty"`
|
||||
MACAddresses []string `json:"mac_addresses,omitempty"`
|
||||
LinkWidth int `json:"link_width,omitempty"`
|
||||
LinkSpeed string `json:"link_speed,omitempty"`
|
||||
MaxLinkWidth int `json:"max_link_width,omitempty"`
|
||||
MaxLinkSpeed string `json:"max_link_speed,omitempty"`
|
||||
NUMANode int `json:"numa_node,omitempty"` // 0 = not reported/N/A
|
||||
Status string `json:"status,omitempty"`
|
||||
|
||||
StatusCheckedAt *time.Time `json:"status_checked_at,omitempty"`
|
||||
StatusChangedAt *time.Time `json:"status_changed_at,omitempty"`
|
||||
StatusAtCollect *StatusAtCollection `json:"status_at_collection,omitempty"`
|
||||
StatusHistory []StatusHistoryEntry `json:"status_history,omitempty"`
|
||||
ErrorDescription string `json:"error_description,omitempty"`
|
||||
Details map[string]any `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
@@ -3,29 +3,56 @@ package parser
|
||||
import (
|
||||
"archive/tar"
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const maxSingleFileSize = 10 * 1024 * 1024
|
||||
const maxZipArchiveSize = 50 * 1024 * 1024
|
||||
const maxGzipDecompressedSize = 50 * 1024 * 1024
|
||||
|
||||
var supportedArchiveExt = map[string]struct{}{
|
||||
".gz": {},
|
||||
".tgz": {},
|
||||
".tar": {},
|
||||
".sds": {},
|
||||
".zip": {},
|
||||
".txt": {},
|
||||
".log": {},
|
||||
}
|
||||
|
||||
// ExtractedFile represents a file extracted from archive
|
||||
type ExtractedFile struct {
|
||||
Path string
|
||||
Content []byte
|
||||
Path string
|
||||
Content []byte
|
||||
ModTime time.Time
|
||||
Truncated bool
|
||||
TruncatedMessage string
|
||||
}
|
||||
|
||||
// ExtractArchive extracts tar.gz or zip archive and returns file contents
|
||||
func ExtractArchive(archivePath string) ([]ExtractedFile, error) {
|
||||
if !IsSupportedArchiveFilename(archivePath) {
|
||||
return nil, fmt.Errorf("unsupported archive format: %s", strings.ToLower(filepath.Ext(archivePath)))
|
||||
}
|
||||
ext := strings.ToLower(filepath.Ext(archivePath))
|
||||
|
||||
switch ext {
|
||||
case ".gz", ".tgz":
|
||||
return extractTarGz(archivePath)
|
||||
case ".tar", ".sds":
|
||||
return extractTar(archivePath)
|
||||
case ".zip":
|
||||
return extractZip(archivePath)
|
||||
case ".txt", ".log":
|
||||
return extractSingleFile(archivePath)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported archive format: %s", ext)
|
||||
}
|
||||
@@ -33,16 +60,46 @@ func ExtractArchive(archivePath string) ([]ExtractedFile, error) {
|
||||
|
||||
// ExtractArchiveFromReader extracts archive from reader
|
||||
func ExtractArchiveFromReader(r io.Reader, filename string) ([]ExtractedFile, error) {
|
||||
if !IsSupportedArchiveFilename(filename) {
|
||||
return nil, fmt.Errorf("unsupported archive format: %s", strings.ToLower(filepath.Ext(filename)))
|
||||
}
|
||||
ext := strings.ToLower(filepath.Ext(filename))
|
||||
|
||||
switch ext {
|
||||
case ".gz", ".tgz":
|
||||
return extractTarGzFromReader(r)
|
||||
return extractTarGzFromReader(r, filename)
|
||||
case ".tar", ".sds":
|
||||
return extractTarFromReader(r)
|
||||
case ".zip":
|
||||
return extractZipFromReader(r)
|
||||
case ".txt", ".log":
|
||||
return extractSingleFileFromReader(r, filename)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported archive format: %s", ext)
|
||||
}
|
||||
}
|
||||
|
||||
// IsSupportedArchiveFilename reports whether filename extension is supported by archive extractor.
|
||||
func IsSupportedArchiveFilename(filename string) bool {
|
||||
ext := strings.ToLower(strings.TrimSpace(filepath.Ext(filename)))
|
||||
if ext == "" {
|
||||
return false
|
||||
}
|
||||
_, ok := supportedArchiveExt[ext]
|
||||
return ok
|
||||
}
|
||||
|
||||
// SupportedArchiveExtensions returns sorted list of archive/file extensions
|
||||
// accepted by archive extractor.
|
||||
func SupportedArchiveExtensions() []string {
|
||||
out := make([]string, 0, len(supportedArchiveExt))
|
||||
for ext := range supportedArchiveExt {
|
||||
out = append(out, ext)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
|
||||
func extractTarGz(archivePath string) ([]ExtractedFile, error) {
|
||||
f, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
@@ -50,17 +107,21 @@ func extractTarGz(archivePath string) ([]ExtractedFile, error) {
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return extractTarGzFromReader(f)
|
||||
return extractTarGzFromReader(f, filepath.Base(archivePath))
|
||||
}
|
||||
|
||||
func extractTarGzFromReader(r io.Reader) ([]ExtractedFile, error) {
|
||||
gzr, err := gzip.NewReader(r)
|
||||
func extractTar(archivePath string) ([]ExtractedFile, error) {
|
||||
f, err := os.Open(archivePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gzip reader: %w", err)
|
||||
return nil, fmt.Errorf("open archive: %w", err)
|
||||
}
|
||||
defer gzr.Close()
|
||||
defer f.Close()
|
||||
|
||||
tr := tar.NewReader(gzr)
|
||||
return extractTarFromReader(f)
|
||||
}
|
||||
|
||||
func extractTarFromReader(r io.Reader) ([]ExtractedFile, error) {
|
||||
tr := tar.NewReader(r)
|
||||
var files []ExtractedFile
|
||||
|
||||
for {
|
||||
@@ -90,12 +151,95 @@ func extractTarGzFromReader(r io.Reader) ([]ExtractedFile, error) {
|
||||
files = append(files, ExtractedFile{
|
||||
Path: header.Name,
|
||||
Content: content,
|
||||
ModTime: header.ModTime,
|
||||
})
|
||||
}
|
||||
|
||||
return files, nil
|
||||
}
|
||||
|
||||
func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, error) {
|
||||
gzr, err := gzip.NewReader(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gzip reader: %w", err)
|
||||
}
|
||||
defer gzr.Close()
|
||||
|
||||
// Read decompressed content with a hard cap.
|
||||
// When the payload exceeds the cap, keep the first chunk and mark it as truncated.
|
||||
decompressed, err := io.ReadAll(io.LimitReader(gzr, maxGzipDecompressedSize+1))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read gzip content: %w", err)
|
||||
}
|
||||
gzipTruncated := len(decompressed) > maxGzipDecompressedSize
|
||||
if gzipTruncated {
|
||||
decompressed = decompressed[:maxGzipDecompressedSize]
|
||||
}
|
||||
|
||||
// Try to read as tar archive
|
||||
tr := tar.NewReader(bytes.NewReader(decompressed))
|
||||
var files []ExtractedFile
|
||||
|
||||
header, err := tr.Next()
|
||||
if err != nil {
|
||||
// Not a tar archive - treat as a single gzipped file
|
||||
if strings.Contains(err.Error(), "invalid tar header") || err == io.EOF {
|
||||
// Get base filename without .gz extension
|
||||
baseName := strings.TrimSuffix(filename, ".gz")
|
||||
if gzr.Name != "" {
|
||||
baseName = gzr.Name
|
||||
}
|
||||
|
||||
file := ExtractedFile{
|
||||
Path: baseName,
|
||||
Content: decompressed,
|
||||
ModTime: gzr.ModTime,
|
||||
}
|
||||
if gzipTruncated {
|
||||
file.Truncated = true
|
||||
file.TruncatedMessage = fmt.Sprintf(
|
||||
"decompressed gzip content exceeded %d bytes and was truncated",
|
||||
maxGzipDecompressedSize,
|
||||
)
|
||||
}
|
||||
|
||||
return []ExtractedFile{file}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("tar read: %w", err)
|
||||
}
|
||||
|
||||
// It's a valid tar archive, process it
|
||||
for {
|
||||
// Skip directories
|
||||
if header.Typeflag != tar.TypeDir {
|
||||
// Skip large files (>10MB)
|
||||
if header.Size <= 10*1024*1024 {
|
||||
content, err := io.ReadAll(tr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read file %s: %w", header.Name, err)
|
||||
}
|
||||
|
||||
files = append(files, ExtractedFile{
|
||||
Path: header.Name,
|
||||
Content: content,
|
||||
ModTime: header.ModTime,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Read next header
|
||||
header, err = tr.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("tar read: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return files, nil
|
||||
}
|
||||
|
||||
func extractZip(archivePath string) ([]ExtractedFile, error) {
|
||||
r, err := zip.OpenReader(archivePath)
|
||||
if err != nil {
|
||||
@@ -129,12 +273,111 @@ func extractZip(archivePath string) ([]ExtractedFile, error) {
|
||||
files = append(files, ExtractedFile{
|
||||
Path: f.Name,
|
||||
Content: content,
|
||||
ModTime: f.Modified,
|
||||
})
|
||||
}
|
||||
|
||||
return files, nil
|
||||
}
|
||||
|
||||
func extractZipFromReader(r io.Reader) ([]ExtractedFile, error) {
|
||||
// Read all data into memory with a hard cap
|
||||
data, err := io.ReadAll(io.LimitReader(r, maxZipArchiveSize+1))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read zip data: %w", err)
|
||||
}
|
||||
if len(data) > maxZipArchiveSize {
|
||||
return nil, fmt.Errorf("zip too large: max %d bytes", maxZipArchiveSize)
|
||||
}
|
||||
|
||||
// Create a ReaderAt from the byte slice
|
||||
readerAt := bytes.NewReader(data)
|
||||
|
||||
// Open the zip archive
|
||||
zipReader, err := zip.NewReader(readerAt, int64(len(data)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open zip: %w", err)
|
||||
}
|
||||
|
||||
var files []ExtractedFile
|
||||
|
||||
for _, f := range zipReader.File {
|
||||
if f.FileInfo().IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip large files (>10MB)
|
||||
if f.FileInfo().Size() > 10*1024*1024 {
|
||||
continue
|
||||
}
|
||||
|
||||
rc, err := f.Open()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open file %s: %w", f.Name, err)
|
||||
}
|
||||
|
||||
content, err := io.ReadAll(rc)
|
||||
rc.Close()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read file %s: %w", f.Name, err)
|
||||
}
|
||||
|
||||
files = append(files, ExtractedFile{
|
||||
Path: f.Name,
|
||||
Content: content,
|
||||
ModTime: f.Modified,
|
||||
})
|
||||
}
|
||||
|
||||
return files, nil
|
||||
}
|
||||
|
||||
func extractSingleFile(path string) ([]ExtractedFile, error) {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("stat file: %w", err)
|
||||
}
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open file: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
files, err := extractSingleFileFromReader(f, filepath.Base(path))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(files) > 0 {
|
||||
files[0].ModTime = info.ModTime()
|
||||
}
|
||||
return files, nil
|
||||
}
|
||||
|
||||
func extractSingleFileFromReader(r io.Reader, filename string) ([]ExtractedFile, error) {
|
||||
content, err := io.ReadAll(io.LimitReader(r, maxSingleFileSize+1))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read file content: %w", err)
|
||||
}
|
||||
truncated := len(content) > maxSingleFileSize
|
||||
if truncated {
|
||||
content = content[:maxSingleFileSize]
|
||||
}
|
||||
|
||||
file := ExtractedFile{
|
||||
Path: filepath.Base(filename),
|
||||
Content: content,
|
||||
}
|
||||
if truncated {
|
||||
file.Truncated = true
|
||||
file.TruncatedMessage = fmt.Sprintf(
|
||||
"file exceeded %d bytes and was truncated",
|
||||
maxSingleFileSize,
|
||||
)
|
||||
}
|
||||
|
||||
return []ExtractedFile{file}, nil
|
||||
}
|
||||
|
||||
// FindFileByPattern finds files matching pattern in extracted files
|
||||
func FindFileByPattern(files []ExtractedFile, patterns ...string) []ExtractedFile {
|
||||
var result []ExtractedFile
|
||||
|
||||
126
internal/parser/archive_test.go
Normal file
126
internal/parser/archive_test.go
Normal file
@@ -0,0 +1,126 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtractArchiveFromReaderTXT(t *testing.T) {
|
||||
content := "loader_brand=\"XigmaNAS\"\nSystem uptime:\n"
|
||||
files, err := ExtractArchiveFromReader(strings.NewReader(content), "xigmanas.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("extract txt from reader: %v", err)
|
||||
}
|
||||
if len(files) != 1 {
|
||||
t.Fatalf("expected 1 file, got %d", len(files))
|
||||
}
|
||||
if files[0].Path != "xigmanas.txt" {
|
||||
t.Fatalf("expected filename xigmanas.txt, got %q", files[0].Path)
|
||||
}
|
||||
if string(files[0].Content) != content {
|
||||
t.Fatalf("content mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractArchiveTXT(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "sample.txt")
|
||||
want := "plain text log"
|
||||
if err := os.WriteFile(path, []byte(want), 0o600); err != nil {
|
||||
t.Fatalf("write sample txt: %v", err)
|
||||
}
|
||||
|
||||
files, err := ExtractArchive(path)
|
||||
if err != nil {
|
||||
t.Fatalf("extract txt file: %v", err)
|
||||
}
|
||||
if len(files) != 1 {
|
||||
t.Fatalf("expected 1 file, got %d", len(files))
|
||||
}
|
||||
if files[0].Path != "sample.txt" {
|
||||
t.Fatalf("expected sample.txt, got %q", files[0].Path)
|
||||
}
|
||||
if string(files[0].Content) != want {
|
||||
t.Fatalf("content mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractArchiveFromReaderTXT_TruncatedWhenTooLarge(t *testing.T) {
|
||||
large := bytes.Repeat([]byte("a"), maxSingleFileSize+1024)
|
||||
files, err := ExtractArchiveFromReader(bytes.NewReader(large), "huge.log")
|
||||
if err != nil {
|
||||
t.Fatalf("extract huge txt from reader: %v", err)
|
||||
}
|
||||
if len(files) != 1 {
|
||||
t.Fatalf("expected 1 file, got %d", len(files))
|
||||
}
|
||||
|
||||
f := files[0]
|
||||
if !f.Truncated {
|
||||
t.Fatalf("expected file to be marked as truncated")
|
||||
}
|
||||
if got := len(f.Content); got != maxSingleFileSize {
|
||||
t.Fatalf("expected truncated size %d, got %d", maxSingleFileSize, got)
|
||||
}
|
||||
if f.TruncatedMessage == "" {
|
||||
t.Fatalf("expected truncation message")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsSupportedArchiveFilename(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
want bool
|
||||
}{
|
||||
{name: "dump.tar.gz", want: true},
|
||||
{name: "nvidia-bug-report-1651124000923.log.gz", want: true},
|
||||
{name: "snapshot.zip", want: true},
|
||||
{name: "h3c_20250819.sds", want: true},
|
||||
{name: "report.log", want: true},
|
||||
{name: "xigmanas.txt", want: true},
|
||||
{name: "raw_export.json", want: false},
|
||||
{name: "archive.bin", want: false},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
got := IsSupportedArchiveFilename(tc.name)
|
||||
if got != tc.want {
|
||||
t.Fatalf("IsSupportedArchiveFilename(%q)=%v, want %v", tc.name, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractArchiveFromReaderSDS(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
tw := tar.NewWriter(&buf)
|
||||
|
||||
payload := []byte("STARTTIME:0\nENDTIME:0\n")
|
||||
if err := tw.WriteHeader(&tar.Header{
|
||||
Name: "bmc/pack.info",
|
||||
Mode: 0o600,
|
||||
Size: int64(len(payload)),
|
||||
}); err != nil {
|
||||
t.Fatalf("write tar header: %v", err)
|
||||
}
|
||||
if _, err := tw.Write(payload); err != nil {
|
||||
t.Fatalf("write tar payload: %v", err)
|
||||
}
|
||||
if err := tw.Close(); err != nil {
|
||||
t.Fatalf("close tar writer: %v", err)
|
||||
}
|
||||
|
||||
files, err := ExtractArchiveFromReader(bytes.NewReader(buf.Bytes()), "sample.sds")
|
||||
if err != nil {
|
||||
t.Fatalf("extract sds from reader: %v", err)
|
||||
}
|
||||
if len(files) != 1 {
|
||||
t.Fatalf("expected 1 extracted file, got %d", len(files))
|
||||
}
|
||||
if files[0].Path != "bmc/pack.info" {
|
||||
t.Fatalf("expected bmc/pack.info, got %q", files[0].Path)
|
||||
}
|
||||
}
|
||||
135
internal/parser/fru_manufactured.go
Normal file
135
internal/parser/fru_manufactured.go
Normal file
@@ -0,0 +1,135 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
var manufacturedYearWeekPattern = regexp.MustCompile(`^\d{4}-W\d{2}$`)
|
||||
|
||||
// NormalizeManufacturedYearWeek converts common FRU manufacturing date formats
|
||||
// into contract-compatible YYYY-Www values. Unknown or ambiguous inputs return "".
|
||||
func NormalizeManufacturedYearWeek(raw string) string {
|
||||
value := strings.TrimSpace(raw)
|
||||
if value == "" {
|
||||
return ""
|
||||
}
|
||||
upper := strings.ToUpper(value)
|
||||
if manufacturedYearWeekPattern.MatchString(upper) {
|
||||
return upper
|
||||
}
|
||||
|
||||
layouts := []string{
|
||||
time.RFC3339,
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02 15:04:05",
|
||||
"2006-01-02",
|
||||
"2006/01/02",
|
||||
"01/02/2006 15:04:05",
|
||||
"01/02/2006",
|
||||
"01-02-2006",
|
||||
"Mon Jan 2 15:04:05 2006",
|
||||
"Mon Jan _2 15:04:05 2006",
|
||||
"Jan 2 2006",
|
||||
"Jan _2 2006",
|
||||
}
|
||||
for _, layout := range layouts {
|
||||
if ts, err := time.Parse(layout, value); err == nil {
|
||||
year, week := ts.ISOWeek()
|
||||
return formatYearWeek(year, week)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func formatYearWeek(year, week int) string {
|
||||
if year <= 0 || week <= 0 || week > 53 {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf("%04d-W%02d", year, week)
|
||||
}
|
||||
|
||||
// ApplyManufacturedYearWeekFromFRU attaches normalized manufactured_year_week to
|
||||
// component details by exact serial-number match. Board-level FRU entries are not
|
||||
// expanded to components.
|
||||
func ApplyManufacturedYearWeekFromFRU(frus []models.FRUInfo, hw *models.HardwareConfig) {
|
||||
if hw == nil || len(frus) == 0 {
|
||||
return
|
||||
}
|
||||
bySerial := make(map[string]string, len(frus))
|
||||
for _, fru := range frus {
|
||||
serial := normalizeFRUSerial(fru.SerialNumber)
|
||||
yearWeek := NormalizeManufacturedYearWeek(fru.MfgDate)
|
||||
if serial == "" || yearWeek == "" {
|
||||
continue
|
||||
}
|
||||
if _, exists := bySerial[serial]; exists {
|
||||
continue
|
||||
}
|
||||
bySerial[serial] = yearWeek
|
||||
}
|
||||
if len(bySerial) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for i := range hw.CPUs {
|
||||
attachYearWeek(&hw.CPUs[i].Details, bySerial[normalizeFRUSerial(hw.CPUs[i].SerialNumber)])
|
||||
}
|
||||
for i := range hw.Memory {
|
||||
attachYearWeek(&hw.Memory[i].Details, bySerial[normalizeFRUSerial(hw.Memory[i].SerialNumber)])
|
||||
}
|
||||
for i := range hw.Storage {
|
||||
attachYearWeek(&hw.Storage[i].Details, bySerial[normalizeFRUSerial(hw.Storage[i].SerialNumber)])
|
||||
}
|
||||
for i := range hw.PCIeDevices {
|
||||
attachYearWeek(&hw.PCIeDevices[i].Details, bySerial[normalizeFRUSerial(hw.PCIeDevices[i].SerialNumber)])
|
||||
}
|
||||
for i := range hw.GPUs {
|
||||
attachYearWeek(&hw.GPUs[i].Details, bySerial[normalizeFRUSerial(hw.GPUs[i].SerialNumber)])
|
||||
}
|
||||
for i := range hw.NetworkAdapters {
|
||||
attachYearWeek(&hw.NetworkAdapters[i].Details, bySerial[normalizeFRUSerial(hw.NetworkAdapters[i].SerialNumber)])
|
||||
}
|
||||
for i := range hw.PowerSupply {
|
||||
attachYearWeek(&hw.PowerSupply[i].Details, bySerial[normalizeFRUSerial(hw.PowerSupply[i].SerialNumber)])
|
||||
}
|
||||
}
|
||||
|
||||
func attachYearWeek(details *map[string]any, yearWeek string) {
|
||||
if yearWeek == "" {
|
||||
return
|
||||
}
|
||||
if *details == nil {
|
||||
*details = map[string]any{}
|
||||
}
|
||||
if existing, ok := (*details)["manufactured_year_week"]; ok && strings.TrimSpace(toString(existing)) != "" {
|
||||
return
|
||||
}
|
||||
(*details)["manufactured_year_week"] = yearWeek
|
||||
}
|
||||
|
||||
func normalizeFRUSerial(v string) string {
|
||||
s := strings.TrimSpace(v)
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
switch strings.ToUpper(s) {
|
||||
case "N/A", "NA", "NULL", "UNKNOWN", "-", "0":
|
||||
return ""
|
||||
default:
|
||||
return strings.ToUpper(s)
|
||||
}
|
||||
}
|
||||
|
||||
func toString(v any) string {
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
return x
|
||||
default:
|
||||
return strings.TrimSpace(fmt.Sprint(v))
|
||||
}
|
||||
}
|
||||
65
internal/parser/fru_manufactured_test.go
Normal file
65
internal/parser/fru_manufactured_test.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func TestNormalizeManufacturedYearWeek(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"2024-W07", "2024-W07"},
|
||||
{"2024-02-13", "2024-W07"},
|
||||
{"02/13/2024", "2024-W07"},
|
||||
{"Tue Feb 13 12:00:00 2024", "2024-W07"},
|
||||
{"", ""},
|
||||
{"not-a-date", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if got := NormalizeManufacturedYearWeek(tt.in); got != tt.want {
|
||||
t.Fatalf("NormalizeManufacturedYearWeek(%q) = %q, want %q", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyManufacturedYearWeekFromFRU_AttachesByExactSerial(t *testing.T) {
|
||||
hw := &models.HardwareConfig{
|
||||
PowerSupply: []models.PSU{
|
||||
{
|
||||
Slot: "PSU0",
|
||||
SerialNumber: "PSU-SN-001",
|
||||
},
|
||||
},
|
||||
Storage: []models.Storage{
|
||||
{
|
||||
Slot: "OB01",
|
||||
SerialNumber: "DISK-SN-001",
|
||||
},
|
||||
},
|
||||
}
|
||||
fru := []models.FRUInfo{
|
||||
{
|
||||
Description: "PSU0_FRU (ID 30)",
|
||||
SerialNumber: "PSU-SN-001",
|
||||
MfgDate: "2024-02-13",
|
||||
},
|
||||
{
|
||||
Description: "Builtin FRU Device (ID 0)",
|
||||
SerialNumber: "BOARD-SN-001",
|
||||
MfgDate: "2024-02-01",
|
||||
},
|
||||
}
|
||||
|
||||
ApplyManufacturedYearWeekFromFRU(fru, hw)
|
||||
|
||||
if got := hw.PowerSupply[0].Details["manufactured_year_week"]; got != "2024-W07" {
|
||||
t.Fatalf("expected PSU year week 2024-W07, got %#v", hw.PowerSupply[0].Details)
|
||||
}
|
||||
if hw.Storage[0].Details != nil {
|
||||
t.Fatalf("expected unmatched storage serial to stay untouched, got %#v", hw.Storage[0].Details)
|
||||
}
|
||||
}
|
||||
@@ -9,9 +9,13 @@ type VendorParser interface {
|
||||
// Name returns human-readable parser name
|
||||
Name() string
|
||||
|
||||
// Vendor returns vendor identifier (e.g., "inspur", "supermicro", "dell")
|
||||
// Vendor returns vendor identifier (e.g., "inspur", "dell", "h3c_g6")
|
||||
Vendor() string
|
||||
|
||||
// Version returns parser version string
|
||||
// IMPORTANT: Increment version when modifying parser logic!
|
||||
Version() string
|
||||
|
||||
// Detect checks if this parser can handle the given files
|
||||
// Returns confidence score 0-100 (0 = cannot parse, 100 = definitely this format)
|
||||
Detect(files []ExtractedFile) int
|
||||
|
||||
@@ -3,6 +3,8 @@ package parser
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
@@ -62,11 +64,74 @@ func (p *BMCParser) parseFiles() error {
|
||||
|
||||
// Preserve filename
|
||||
result.Filename = p.result.Filename
|
||||
|
||||
appendExtractionWarnings(result, p.files)
|
||||
if result.CollectedAt.IsZero() {
|
||||
if ts := inferCollectedAtFromExtractedFiles(p.files); !ts.IsZero() {
|
||||
result.CollectedAt = ts.UTC()
|
||||
}
|
||||
}
|
||||
p.result = result
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func inferCollectedAtFromExtractedFiles(files []ExtractedFile) time.Time {
|
||||
var latestReliable time.Time
|
||||
var latestAny time.Time
|
||||
for _, f := range files {
|
||||
ts := f.ModTime
|
||||
if ts.IsZero() {
|
||||
continue
|
||||
}
|
||||
if latestAny.IsZero() || ts.After(latestAny) {
|
||||
latestAny = ts
|
||||
}
|
||||
// Ignore placeholder archive mtimes like 1980-01-01.
|
||||
if ts.Year() < 2000 {
|
||||
continue
|
||||
}
|
||||
if latestReliable.IsZero() || ts.After(latestReliable) {
|
||||
latestReliable = ts
|
||||
}
|
||||
}
|
||||
if !latestReliable.IsZero() {
|
||||
return latestReliable
|
||||
}
|
||||
return latestAny
|
||||
}
|
||||
|
||||
func appendExtractionWarnings(result *models.AnalysisResult, files []ExtractedFile) {
|
||||
if result == nil {
|
||||
return
|
||||
}
|
||||
|
||||
truncated := make([]string, 0)
|
||||
for _, f := range files {
|
||||
if !f.Truncated {
|
||||
continue
|
||||
}
|
||||
if f.TruncatedMessage != "" {
|
||||
truncated = append(truncated, fmt.Sprintf("%s: %s", f.Path, f.TruncatedMessage))
|
||||
continue
|
||||
}
|
||||
truncated = append(truncated, fmt.Sprintf("%s: content was truncated due to size limit", f.Path))
|
||||
}
|
||||
|
||||
if len(truncated) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
result.Events = append(result.Events, models.Event{
|
||||
Timestamp: time.Now(),
|
||||
Source: "LOGPile",
|
||||
EventType: "Analysis Warning",
|
||||
Severity: models.SeverityWarning,
|
||||
Description: "Input data was too large; analysis is partial and may be incomplete",
|
||||
RawData: strings.Join(truncated, "; "),
|
||||
})
|
||||
}
|
||||
|
||||
// Result returns the analysis result
|
||||
func (p *BMCParser) Result() *models.AnalysisResult {
|
||||
return p.result
|
||||
|
||||
62
internal/parser/parser_test.go
Normal file
62
internal/parser/parser_test.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func TestAppendExtractionWarnings(t *testing.T) {
|
||||
result := &models.AnalysisResult{
|
||||
Events: make([]models.Event, 0),
|
||||
}
|
||||
|
||||
files := []ExtractedFile{
|
||||
{Path: "ok.log", Content: []byte("ok")},
|
||||
{Path: "big.log", Truncated: true, TruncatedMessage: "file exceeded size limit and was truncated"},
|
||||
}
|
||||
|
||||
appendExtractionWarnings(result, files)
|
||||
|
||||
if len(result.Events) != 1 {
|
||||
t.Fatalf("expected 1 warning event, got %d", len(result.Events))
|
||||
}
|
||||
ev := result.Events[0]
|
||||
if ev.Severity != models.SeverityWarning {
|
||||
t.Fatalf("expected warning severity, got %q", ev.Severity)
|
||||
}
|
||||
if ev.EventType != "Analysis Warning" {
|
||||
t.Fatalf("unexpected event type: %q", ev.EventType)
|
||||
}
|
||||
if ev.RawData == "" {
|
||||
t.Fatalf("expected warning details in RawData")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInferCollectedAtFromExtractedFiles_PrefersReliableMTime(t *testing.T) {
|
||||
files := []ExtractedFile{
|
||||
{Path: "a.log", ModTime: time.Date(1980, 1, 1, 0, 0, 0, 0, time.UTC)},
|
||||
{Path: "b.log", ModTime: time.Date(2025, 12, 12, 10, 14, 49, 0, time.FixedZone("EST", -5*3600))},
|
||||
{Path: "c.log", ModTime: time.Date(2026, 2, 28, 4, 18, 18, 0, time.FixedZone("UTC+8", 8*3600))},
|
||||
}
|
||||
|
||||
got := inferCollectedAtFromExtractedFiles(files)
|
||||
want := files[2].ModTime
|
||||
if !got.Equal(want) {
|
||||
t.Fatalf("expected %s, got %s", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInferCollectedAtFromExtractedFiles_FallsBackToAnyMTime(t *testing.T) {
|
||||
files := []ExtractedFile{
|
||||
{Path: "a.log", ModTime: time.Date(1980, 1, 1, 0, 0, 0, 0, time.UTC)},
|
||||
{Path: "b.log", ModTime: time.Date(1970, 1, 2, 0, 0, 0, 0, time.UTC)},
|
||||
}
|
||||
|
||||
got := inferCollectedAtFromExtractedFiles(files)
|
||||
want := files[0].ModTime
|
||||
if !got.Equal(want) {
|
||||
t.Fatalf("expected fallback %s, got %s", want, got)
|
||||
}
|
||||
}
|
||||
@@ -46,6 +46,35 @@ func ListParsers() []string {
|
||||
return vendors
|
||||
}
|
||||
|
||||
// ParserInfo contains information about a registered parser
|
||||
type ParserInfo struct {
|
||||
Vendor string `json:"vendor"`
|
||||
Name string `json:"name"`
|
||||
Version string `json:"version"`
|
||||
}
|
||||
|
||||
// ListParsersInfo returns detailed info about all registered parsers
|
||||
func ListParsersInfo() []ParserInfo {
|
||||
registryLock.RLock()
|
||||
defer registryLock.RUnlock()
|
||||
|
||||
parsers := make([]ParserInfo, 0, len(registry))
|
||||
for _, p := range registry {
|
||||
parsers = append(parsers, ParserInfo{
|
||||
Vendor: p.Vendor(),
|
||||
Name: p.Name(),
|
||||
Version: p.Version(),
|
||||
})
|
||||
}
|
||||
|
||||
// Sort by vendor name
|
||||
sort.Slice(parsers, func(i, j int) bool {
|
||||
return parsers[i].Vendor < parsers[j].Vendor
|
||||
})
|
||||
|
||||
return parsers
|
||||
}
|
||||
|
||||
// DetectResult holds detection result for a parser
|
||||
type DetectResult struct {
|
||||
Parser VendorParser
|
||||
|
||||
33
internal/parser/timezone.go
Normal file
33
internal/parser/timezone.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const fallbackTimezoneName = "Europe/Moscow"
|
||||
|
||||
var (
|
||||
fallbackTimezoneOnce sync.Once
|
||||
fallbackTimezone *time.Location
|
||||
)
|
||||
|
||||
// DefaultArchiveLocation returns the timezone used for source timestamps
|
||||
// that do not contain an explicit offset.
|
||||
func DefaultArchiveLocation() *time.Location {
|
||||
fallbackTimezoneOnce.Do(func() {
|
||||
loc, err := time.LoadLocation(fallbackTimezoneName)
|
||||
if err != nil {
|
||||
fallbackTimezone = time.FixedZone("MSK", 3*60*60)
|
||||
return
|
||||
}
|
||||
fallbackTimezone = loc
|
||||
})
|
||||
return fallbackTimezone
|
||||
}
|
||||
|
||||
// ParseInDefaultArchiveLocation parses timestamps without timezone information
|
||||
// using Europe/Moscow as the assumed source timezone.
|
||||
func ParseInDefaultArchiveLocation(layout, value string) (time.Time, error) {
|
||||
return time.ParseInLocation(layout, value, DefaultArchiveLocation())
|
||||
}
|
||||
96
internal/parser/vendors/README.md
vendored
96
internal/parser/vendors/README.md
vendored
@@ -1,96 +0,0 @@
|
||||
# Vendor Parser Modules
|
||||
|
||||
Каждый производитель серверов имеет свой формат диагностических архивов BMC.
|
||||
Эта директория содержит модули парсеров для разных производителей.
|
||||
|
||||
## Структура модуля
|
||||
|
||||
```
|
||||
vendors/
|
||||
├── vendors.go # Импорты всех модулей (добавьте сюда новый)
|
||||
├── README.md # Эта документация
|
||||
├── template/ # Шаблон для нового модуля
|
||||
│ └── parser.go.template
|
||||
├── inspur/ # Модуль Inspur/Kaytus
|
||||
│ ├── parser.go # Основной парсер + регистрация
|
||||
│ ├── sdr.go # Парсинг SDR (сенсоры)
|
||||
│ ├── fru.go # Парсинг FRU (серийники)
|
||||
│ ├── asset.go # Парсинг asset.json
|
||||
│ └── syslog.go # Парсинг syslog
|
||||
├── supermicro/ # Будущий модуль Supermicro
|
||||
├── dell/ # Будущий модуль Dell iDRAC
|
||||
└── hpe/ # Будущий модуль HPE iLO
|
||||
```
|
||||
|
||||
## Как добавить новый модуль
|
||||
|
||||
### 1. Создайте директорию модуля
|
||||
|
||||
```bash
|
||||
mkdir -p internal/parser/vendors/VENDORNAME
|
||||
```
|
||||
|
||||
### 2. Скопируйте шаблон
|
||||
|
||||
```bash
|
||||
cp internal/parser/vendors/template/parser.go.template \
|
||||
internal/parser/vendors/VENDORNAME/parser.go
|
||||
```
|
||||
|
||||
### 3. Отредактируйте parser.go
|
||||
|
||||
- Замените `VENDORNAME` на идентификатор вендора (например, `supermicro`)
|
||||
- Замените `VENDOR_DESCRIPTION` на описание (например, `Supermicro`)
|
||||
- Реализуйте метод `Detect()` для определения формата
|
||||
- Реализуйте метод `Parse()` для парсинга данных
|
||||
|
||||
### 4. Зарегистрируйте модуль
|
||||
|
||||
Добавьте импорт в `vendors/vendors.go`:
|
||||
|
||||
```go
|
||||
import (
|
||||
_ "git.mchus.pro/mchus/logpile/internal/parser/vendors/inspur"
|
||||
_ "git.mchus.pro/mchus/logpile/internal/parser/vendors/VENDORNAME" // Новый модуль
|
||||
)
|
||||
```
|
||||
|
||||
### 5. Готово!
|
||||
|
||||
Модуль автоматически зарегистрируется при старте приложения через `init()`.
|
||||
|
||||
## Интерфейс VendorParser
|
||||
|
||||
```go
|
||||
type VendorParser interface {
|
||||
// Name возвращает человекочитаемое имя парсера
|
||||
Name() string
|
||||
|
||||
// Vendor возвращает идентификатор вендора
|
||||
Vendor() string
|
||||
|
||||
// Detect проверяет, подходит ли этот парсер для файлов
|
||||
// Возвращает уверенность 0-100 (0 = не подходит, 100 = точно этот формат)
|
||||
Detect(files []ExtractedFile) int
|
||||
|
||||
// Parse парсит извлеченные файлы
|
||||
Parse(files []ExtractedFile) (*models.AnalysisResult, error)
|
||||
}
|
||||
```
|
||||
|
||||
## Советы по реализации Detect()
|
||||
|
||||
- Ищите уникальные файлы/директории для данного вендора
|
||||
- Проверяйте содержимое файлов на характерные маркеры
|
||||
- Возвращайте высокий confidence (70+) только при уверенном совпадении
|
||||
- Несколько парсеров могут вернуть >0, выбирается с максимальным confidence
|
||||
|
||||
## Поддерживаемые вендоры
|
||||
|
||||
| Вендор | Идентификатор | Статус | Протестировано на |
|
||||
|--------|---------------|--------|-------------------|
|
||||
| Inspur/Kaytus | `inspur` | ✅ Готов | KR4268X2 (onekeylog) |
|
||||
| Supermicro | `supermicro` | ⏳ Планируется | - |
|
||||
| Dell iDRAC | `dell` | ⏳ Планируется | - |
|
||||
| HPE iLO | `hpe` | ⏳ Планируется | - |
|
||||
| Lenovo XCC | `lenovo` | ⏳ Планируется | - |
|
||||
1573
internal/parser/vendors/dell/parser.go
vendored
Normal file
1573
internal/parser/vendors/dell/parser.go
vendored
Normal file
File diff suppressed because it is too large
Load Diff
480
internal/parser/vendors/dell/parser_test.go
vendored
Normal file
480
internal/parser/vendors/dell/parser_test.go
vendored
Normal file
@@ -0,0 +1,480 @@
|
||||
package dell
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
)
|
||||
|
||||
func TestDetectNestedTSRZip(t *testing.T) {
|
||||
inner := makeZipArchive(t, map[string][]byte{
|
||||
"tsr/metadata.json": []byte(`{"Make":"Dell Inc.","Model":"PowerEdge R750","ServiceTag":"G37Q064"}`),
|
||||
"tsr/hardware/sysinfo/inventory/sysinfo_DCIM_View.xml": []byte(`<CIM><MESSAGE><SIMPLEREQ/></MESSAGE></CIM>`),
|
||||
})
|
||||
|
||||
p := &Parser{}
|
||||
score := p.Detect([]parser.ExtractedFile{
|
||||
{Path: "signature", Content: []byte("ok")},
|
||||
{Path: "TSR20241119143901_G37Q064.pl.zip", Content: inner},
|
||||
})
|
||||
if score < 80 {
|
||||
t.Fatalf("expected high detect score for nested TSR zip, got %d", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNestedTSRZip(t *testing.T) {
|
||||
const viewXML = `<CIM><MESSAGE><SIMPLEREQ>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_SystemView">
|
||||
<PROPERTY NAME="Manufacturer"><VALUE>Dell Inc.</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Model"><VALUE>PowerEdge R750</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ServiceTag"><VALUE>G37Q064</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="BIOSVersionString"><VALUE>2.19.1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="LifecycleControllerVersion"><VALUE>7.00.30.00</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_CPUView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>CPU.Socket.1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Model"><VALUE>Intel(R) Xeon(R) Gold 6330</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Manufacturer"><VALUE>Intel</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="NumberOfEnabledCores"><VALUE>28</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="NumberOfEnabledThreads"><VALUE>56</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="CurrentClockSpeed"><VALUE>2000</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="MaxClockSpeed"><VALUE>3100</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PPIN"><VALUE>ABCD</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_NICView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>NIC.Slot.1-1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ProductName"><VALUE>Broadcom 57414 Dual Port 10/25GbE SFP28 Adapter</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="VendorName"><VALUE>Broadcom</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="CurrentMACAddress"><VALUE>00:11:22:33:44:55</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="SerialNumber"><VALUE>NICSERIAL1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="FamilyVersion"><VALUE>22.80.17</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PCIVendorID"><VALUE>0x14e4</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PCIDeviceID"><VALUE>0x16d7</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_PowerSupplyView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>PSU.Slot.1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Model"><VALUE>D1400E-S0</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Manufacturer"><VALUE>Dell</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="SerialNumber"><VALUE>PSUSERIAL1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="FirmwareVersion"><VALUE>00.1A</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="TotalOutputPower"><VALUE>1400</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_VideoView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>Video.Slot.38-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="MarketingName"><VALUE>NVIDIA H100 PCIe</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Description"><VALUE>GH100 [H100 PCIe]</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Manufacturer"><VALUE>NVIDIA Corporation</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PCIVendorID"><VALUE>10DE</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PCIDeviceID"><VALUE>2331</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="BusNumber"><VALUE>74</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="DeviceNumber"><VALUE>0</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="FunctionNumber"><VALUE>0</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="SerialNumber"><VALUE>1793924039808</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="FirmwareVersion"><VALUE>96.00.AF.00.01</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="GPUGUID"><VALUE>bc681a6d4785dde08c21f49c46c05cc3</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
</SIMPLEREQ></MESSAGE></CIM>`
|
||||
|
||||
const swXML = `<CIM><MESSAGE><SIMPLEREQ>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_SoftwareIdentity">
|
||||
<PROPERTY NAME="ElementName"><VALUE>NIC.Slot.1-1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="VersionString"><VALUE>22.80.17</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ComponentType"><VALUE>Network</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
</SIMPLEREQ></MESSAGE></CIM>`
|
||||
|
||||
const eventsXML = `<Log>
|
||||
<Event AgentID="Lifecycle Controller" Category="System Health" Severity="Warning" Timestamp="2024-11-19T14:39:01-0800">
|
||||
<MessageID>SYS1001</MessageID>
|
||||
<Message>Link is down</Message>
|
||||
<FQDD>NIC.Slot.1-1-1</FQDD>
|
||||
</Event>
|
||||
</Log>`
|
||||
|
||||
const cimSensorXML = `<CIM><MESSAGE><SIMPLEREQ>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_GPUSensor">
|
||||
<PROPERTY NAME="DeviceID"><VALUE>Video.Slot.38-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryGPUTemperature"><VALUE>290</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="MemoryTemperature"><VALUE>440</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PowerConsumption"><VALUE>295</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ThermalAlertStatus"><VALUE>5</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="CIM_NumericSensor">
|
||||
<PROPERTY NAME="ElementName"><VALUE>PS1 Voltage 1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="CurrentReading"><VALUE>224.0</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="BaseUnits"><VALUE>5</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="UnitModifier"><VALUE>0</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryStatus"><VALUE>5</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
</SIMPLEREQ></MESSAGE></CIM>`
|
||||
|
||||
inner := makeZipArchive(t, map[string][]byte{
|
||||
"tsr/metadata.json": []byte(`{
|
||||
"Make":"Dell Inc.",
|
||||
"Model":"PowerEdge R750",
|
||||
"ServiceTag":"G37Q064",
|
||||
"FirmwareVersion":"7.00.30.00",
|
||||
"CollectionDateTime":"2024-11-19 14:39:01.000-0800"
|
||||
}`),
|
||||
"tsr/hardware/sysinfo/inventory/sysinfo_DCIM_View.xml": []byte(viewXML),
|
||||
"tsr/hardware/sysinfo/inventory/sysinfo_DCIM_SoftwareIdentity.xml": []byte(swXML),
|
||||
"tsr/hardware/sysinfo/inventory/sysinfo_CIM_Sensor.xml": []byte(cimSensorXML),
|
||||
"tsr/hardware/sysinfo/lcfiles/curr_lclog.xml": []byte(eventsXML),
|
||||
})
|
||||
|
||||
p := &Parser{}
|
||||
result, err := p.Parse([]parser.ExtractedFile{
|
||||
{Path: "signature", Content: []byte("ok")},
|
||||
{Path: "TSR20241119143901_G37Q064.pl.zip", Content: inner},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
if result.Hardware == nil {
|
||||
t.Fatalf("expected hardware section")
|
||||
}
|
||||
|
||||
if got := result.Hardware.BoardInfo.Manufacturer; got != "Dell Inc." {
|
||||
t.Fatalf("unexpected board manufacturer: %q", got)
|
||||
}
|
||||
if got := result.Hardware.BoardInfo.ProductName; got != "PowerEdge R750" {
|
||||
t.Fatalf("unexpected board product: %q", got)
|
||||
}
|
||||
if got := result.Hardware.BoardInfo.SerialNumber; got != "G37Q064" {
|
||||
t.Fatalf("unexpected service tag: %q", got)
|
||||
}
|
||||
|
||||
if len(result.Hardware.CPUs) != 1 {
|
||||
t.Fatalf("expected 1 cpu, got %d", len(result.Hardware.CPUs))
|
||||
}
|
||||
if got := result.Hardware.CPUs[0].Model; got != "Intel(R) Xeon(R) Gold 6330" {
|
||||
t.Fatalf("unexpected cpu model: %q", got)
|
||||
}
|
||||
|
||||
if len(result.Hardware.NetworkAdapters) != 1 {
|
||||
t.Fatalf("expected 1 network adapter, got %d", len(result.Hardware.NetworkAdapters))
|
||||
}
|
||||
adapter := result.Hardware.NetworkAdapters[0]
|
||||
if adapter.Vendor != "Broadcom" {
|
||||
t.Fatalf("unexpected nic vendor: %q", adapter.Vendor)
|
||||
}
|
||||
if adapter.Firmware != "22.80.17" {
|
||||
t.Fatalf("unexpected nic firmware: %q", adapter.Firmware)
|
||||
}
|
||||
if adapter.SerialNumber != "NICSERIAL1" {
|
||||
t.Fatalf("unexpected nic serial: %q", adapter.SerialNumber)
|
||||
}
|
||||
|
||||
if len(result.Hardware.PowerSupply) != 1 {
|
||||
t.Fatalf("expected 1 psu, got %d", len(result.Hardware.PowerSupply))
|
||||
}
|
||||
psu := result.Hardware.PowerSupply[0]
|
||||
if psu.Model != "D1400E-S0" {
|
||||
t.Fatalf("unexpected psu model: %q", psu.Model)
|
||||
}
|
||||
if psu.Firmware != "00.1A" {
|
||||
t.Fatalf("unexpected psu firmware: %q", psu.Firmware)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Firmware) == 0 {
|
||||
t.Fatalf("expected firmware entries")
|
||||
}
|
||||
if len(result.Hardware.GPUs) != 1 {
|
||||
t.Fatalf("expected 1 gpu, got %d", len(result.Hardware.GPUs))
|
||||
}
|
||||
if got := result.Hardware.GPUs[0].Model; got != "NVIDIA H100 PCIe" {
|
||||
t.Fatalf("unexpected gpu model: %q", got)
|
||||
}
|
||||
if got := result.Hardware.GPUs[0].SerialNumber; got != "1793924039808" {
|
||||
t.Fatalf("unexpected gpu serial: %q", got)
|
||||
}
|
||||
if got := result.Hardware.GPUs[0].Temperature; got != 29 {
|
||||
t.Fatalf("unexpected gpu temperature: %d", got)
|
||||
}
|
||||
if len(result.Sensors) == 0 {
|
||||
t.Fatalf("expected sensors from CIM_Sensor")
|
||||
}
|
||||
if len(result.Events) != 1 {
|
||||
t.Fatalf("expected one lifecycle event, got %d", len(result.Events))
|
||||
}
|
||||
if got := string(result.Events[0].Severity); got != "warning" {
|
||||
t.Fatalf("unexpected event severity: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseDellPhysicalDiskEndurance verifies that RemainingRatedWriteEndurance from
|
||||
// DCIM_PhysicalDiskView is parsed into Storage.RemainingEndurancePct.
|
||||
func TestParseDellPhysicalDiskEndurance(t *testing.T) {
|
||||
const viewXML = `<CIM><MESSAGE><SIMPLEREQ>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_SystemView">
|
||||
<PROPERTY NAME="Manufacturer"><VALUE>Dell Inc.</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Model"><VALUE>PowerEdge R6625</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ServiceTag"><VALUE>8VS2LG4</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_PhysicalDiskView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>Disk.Bay.0:Enclosure.Internal.0-1:RAID.SL.3-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Slot"><VALUE>0</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Model"><VALUE>HFS480G3H2X069N</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="SerialNumber"><VALUE>ESEAN5254I030B26B</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="SizeInBytes"><VALUE>479559942144</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="MediaType"><VALUE>Solid State Drive</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="BusProtocol"><VALUE>SATA</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Revision"><VALUE>DZ03</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="RemainingRatedWriteEndurance"><VALUE>100</VALUE><DisplayValue>100 %</DisplayValue></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryStatus"><VALUE>1</VALUE><DisplayValue>OK</DisplayValue></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_PhysicalDiskView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>Disk.Bay.1:Enclosure.Internal.0-1:RAID.SL.3-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Slot"><VALUE>1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Model"><VALUE>TOSHIBA MG08ADA800E</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="SerialNumber"><VALUE>X1G0A0YXFVVG</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="SizeInBytes"><VALUE>8001563222016</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="MediaType"><VALUE>Hard Disk Drive</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="BusProtocol"><VALUE>SAS</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Revision"><VALUE>0104</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
</SIMPLEREQ></MESSAGE></CIM>`
|
||||
|
||||
inner := makeZipArchive(t, map[string][]byte{
|
||||
"tsr/metadata.json": []byte(`{"Make":"Dell Inc.","Model":"PowerEdge R6625","ServiceTag":"8VS2LG4"}`),
|
||||
"tsr/hardware/sysinfo/inventory/sysinfo_DCIM_View.xml": []byte(viewXML),
|
||||
})
|
||||
|
||||
p := &Parser{}
|
||||
result, err := p.Parse([]parser.ExtractedFile{
|
||||
{Path: "signature", Content: []byte("ok")},
|
||||
{Path: "TSR20260306141852_8VS2LG4.pl.zip", Content: inner},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
if len(result.Hardware.Storage) != 2 {
|
||||
t.Fatalf("expected 2 storage devices, got %d", len(result.Hardware.Storage))
|
||||
}
|
||||
|
||||
ssd := result.Hardware.Storage[0]
|
||||
if ssd.RemainingEndurancePct == nil {
|
||||
t.Fatalf("SSD slot 0: expected RemainingEndurancePct to be set")
|
||||
}
|
||||
if *ssd.RemainingEndurancePct != 100 {
|
||||
t.Errorf("SSD slot 0: expected RemainingEndurancePct=100, got %d", *ssd.RemainingEndurancePct)
|
||||
}
|
||||
|
||||
hdd := result.Hardware.Storage[1]
|
||||
if hdd.RemainingEndurancePct != nil {
|
||||
t.Errorf("HDD slot 1: expected RemainingEndurancePct absent, got %d", *hdd.RemainingEndurancePct)
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseDellInfiniBandView verifies that DCIM_InfiniBandView entries are parsed as
|
||||
// NetworkAdapters (not PCIe devices) and that the corresponding SoftwareIdentity firmware
|
||||
// entry with FQDD "InfiniBand.Slot.*" does not leak into hardware.firmware.
|
||||
//
|
||||
// Regression guard: PowerEdge R6625 (8VS2LG4) — "Mellanox Network Adapter" version
|
||||
// "20.39.35.60" appeared in hardware.firmware because DCIM_InfiniBandView was ignored
|
||||
// (device ended up only in PCIeDevices with model "16x or x16") and SoftwareIdentity
|
||||
// FQDD "InfiniBand.Slot.1-1" was not filtered. (2026-03-15)
|
||||
func TestParseDellInfiniBandView(t *testing.T) {
|
||||
const viewXML = `<CIM><MESSAGE><SIMPLEREQ>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_SystemView">
|
||||
<PROPERTY NAME="Manufacturer"><VALUE>Dell Inc.</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Model"><VALUE>PowerEdge R6625</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ServiceTag"><VALUE>8VS2LG4</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_InfiniBandView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>InfiniBand.Slot.1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="DeviceDescription"><VALUE>InfiniBand in Slot 1 Port 1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="CurrentMACAddress"><VALUE>00:1C:FD:D7:5A:E6</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="FamilyVersion"><VALUE>20.39.35.60</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="EFIVersion"><VALUE>14.32.17</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PCIVendorID"><VALUE>15B3</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PCIDeviceID"><VALUE>101B</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryStatus"><VALUE>0</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_PCIDeviceView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>InfiniBand.Slot.1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Description"><VALUE>MT28908 Family [ConnectX-6]</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="DeviceDescription"><VALUE>InfiniBand in Slot 1 Port 1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Manufacturer"><VALUE>Mellanox Technologies</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PCIVendorID"><VALUE>15B3</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PCIDeviceID"><VALUE>101B</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="DataBusWidth"><DisplayValue>16x or x16</DisplayValue></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_ControllerView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>RAID.SL.3-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ProductName"><VALUE>PERC H755 Front</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ControllerFirmwareVersion"><VALUE>52.30.0-6115</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryStatus"><VALUE>0</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
</SIMPLEREQ></MESSAGE></CIM>`
|
||||
|
||||
const swXML = `<CIM><MESSAGE><SIMPLEREQ>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_SoftwareIdentity">
|
||||
<PROPERTY NAME="ElementName"><VALUE>Mellanox Network Adapter - 00:1C:FD:D7:5A:E6</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="FQDD"><VALUE>InfiniBand.Slot.1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="VersionString"><VALUE>20.39.35.60</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_SoftwareIdentity">
|
||||
<PROPERTY NAME="ElementName"><VALUE>PERC H755 Front</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="FQDD"><VALUE>RAID.SL.3-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="VersionString"><VALUE>52.30.0-6115</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_SoftwareIdentity">
|
||||
<PROPERTY NAME="ElementName"><VALUE>BIOS</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="FQDD"><VALUE>BIOS.Setup.1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="VersionString"><VALUE>1.15.3</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
</SIMPLEREQ></MESSAGE></CIM>`
|
||||
|
||||
inner := makeZipArchive(t, map[string][]byte{
|
||||
"tsr/metadata.json": []byte(`{"Make":"Dell Inc.","Model":"PowerEdge R6625","ServiceTag":"8VS2LG4"}`),
|
||||
"tsr/hardware/sysinfo/inventory/sysinfo_DCIM_View.xml": []byte(viewXML),
|
||||
"tsr/hardware/sysinfo/inventory/sysinfo_DCIM_SoftwareIdentity.xml": []byte(swXML),
|
||||
})
|
||||
|
||||
p := &Parser{}
|
||||
result, err := p.Parse([]parser.ExtractedFile{
|
||||
{Path: "signature", Content: []byte("ok")},
|
||||
{Path: "TSR20260306141852_8VS2LG4.pl.zip", Content: inner},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
// InfiniBand adapter must appear as a NetworkAdapter, not a PCIe device.
|
||||
if len(result.Hardware.NetworkAdapters) != 1 {
|
||||
t.Fatalf("expected 1 network adapter, got %d", len(result.Hardware.NetworkAdapters))
|
||||
}
|
||||
nic := result.Hardware.NetworkAdapters[0]
|
||||
if nic.Slot != "InfiniBand.Slot.1-1" {
|
||||
t.Errorf("unexpected NIC slot: %q", nic.Slot)
|
||||
}
|
||||
if nic.Firmware != "20.39.35.60" {
|
||||
t.Errorf("unexpected NIC firmware: %q", nic.Firmware)
|
||||
}
|
||||
if len(nic.MACAddresses) == 0 || nic.MACAddresses[0] != "00:1C:FD:D7:5A:E6" {
|
||||
t.Errorf("unexpected NIC MAC: %v", nic.MACAddresses)
|
||||
}
|
||||
// pci.ids enrichment: VendorID=0x15B3, DeviceID=0x101B → chip model + vendor name.
|
||||
if nic.Model != "MT28908 Family [ConnectX-6]" {
|
||||
t.Errorf("NIC model = %q, want MT28908 Family [ConnectX-6] (from pci.ids)", nic.Model)
|
||||
}
|
||||
if nic.Vendor != "Mellanox Technologies" {
|
||||
t.Errorf("NIC vendor = %q, want Mellanox Technologies (from pci.ids)", nic.Vendor)
|
||||
}
|
||||
|
||||
// InfiniBand FQDD must NOT appear in PCIe devices.
|
||||
for _, pcie := range result.Hardware.PCIeDevices {
|
||||
if pcie.Slot == "InfiniBand.Slot.1-1" {
|
||||
t.Errorf("InfiniBand.Slot.1-1 must not appear in PCIeDevices")
|
||||
}
|
||||
}
|
||||
|
||||
// Firmware entries from SoftwareIdentity and parseControllerView must carry the FQDD
|
||||
// as their Description so the exporter's isDeviceBoundFirmwareFQDD filter can remove them.
|
||||
fqddByName := make(map[string]string)
|
||||
for _, fw := range result.Hardware.Firmware {
|
||||
fqddByName[fw.DeviceName] = fw.Description
|
||||
}
|
||||
if desc := fqddByName["Mellanox Network Adapter"]; desc != "InfiniBand.Slot.1-1" {
|
||||
t.Errorf("Mellanox firmware Description = %q, want InfiniBand.Slot.1-1 for FQDD filter", desc)
|
||||
}
|
||||
if desc := fqddByName["PERC H755 Front"]; desc != "RAID.SL.3-1" {
|
||||
t.Errorf("PERC H755 Front firmware Description = %q, want RAID.SL.3-1 for FQDD filter", desc)
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseDellCPUAffinity verifies that CPUAffinity is parsed into NUMANode for
|
||||
// NIC, PCIe, and controller views. "Not Applicable" must result in NUMANode=0.
|
||||
func TestParseDellCPUAffinity(t *testing.T) {
|
||||
const viewXML = `<CIM><MESSAGE><SIMPLEREQ>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_SystemView">
|
||||
<PROPERTY NAME="Manufacturer"><VALUE>Dell Inc.</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Model"><VALUE>PowerEdge R750</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ServiceTag"><VALUE>TESTST1</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_NICView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>NIC.Slot.2-1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ProductName"><VALUE>Some NIC</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="CPUAffinity"><VALUE>1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryStatus"><VALUE>0</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_InfiniBandView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>InfiniBand.Slot.1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="DeviceDescription"><VALUE>InfiniBand in Slot 1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="CPUAffinity"><VALUE>2</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryStatus"><VALUE>0</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_ControllerView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>RAID.Slot.1-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="ProductName"><VALUE>PERC H755</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="CPUAffinity"><VALUE>Not Applicable</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryStatus"><VALUE>0</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
<VALUE.NAMEDINSTANCE><INSTANCE CLASSNAME="DCIM_PCIDeviceView">
|
||||
<PROPERTY NAME="FQDD"><VALUE>Slot.7-1</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="Description"><VALUE>Some PCIe Card</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="CPUAffinity"><VALUE>2</VALUE></PROPERTY>
|
||||
<PROPERTY NAME="PrimaryStatus"><VALUE>0</VALUE></PROPERTY>
|
||||
</INSTANCE></VALUE.NAMEDINSTANCE>
|
||||
</SIMPLEREQ></MESSAGE></CIM>`
|
||||
|
||||
inner := makeZipArchive(t, map[string][]byte{
|
||||
"tsr/metadata.json": []byte(`{"Make":"Dell Inc.","Model":"PowerEdge R750","ServiceTag":"TESTST1"}`),
|
||||
"tsr/hardware/sysinfo/inventory/sysinfo_DCIM_View.xml": []byte(viewXML),
|
||||
})
|
||||
|
||||
p := &Parser{}
|
||||
result, err := p.Parse([]parser.ExtractedFile{
|
||||
{Path: "signature", Content: []byte("ok")},
|
||||
{Path: "TSR_TESTST1.pl.zip", Content: inner},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
// NIC CPUAffinity=1 → NUMANode=1
|
||||
nicBySlot := make(map[string]int)
|
||||
for _, nic := range result.Hardware.NetworkAdapters {
|
||||
nicBySlot[nic.Slot] = nic.NUMANode
|
||||
}
|
||||
if nicBySlot["NIC.Slot.2-1-1"] != 1 {
|
||||
t.Errorf("NIC.Slot.2-1-1 NUMANode = %d, want 1", nicBySlot["NIC.Slot.2-1-1"])
|
||||
}
|
||||
if nicBySlot["InfiniBand.Slot.1-1"] != 2 {
|
||||
t.Errorf("InfiniBand.Slot.1-1 NUMANode = %d, want 2", nicBySlot["InfiniBand.Slot.1-1"])
|
||||
}
|
||||
|
||||
// PCIe device CPUAffinity=2 → NUMANode=2; controller CPUAffinity="Not Applicable" → NUMANode=0
|
||||
pcieBySlot := make(map[string]int)
|
||||
for _, pcie := range result.Hardware.PCIeDevices {
|
||||
pcieBySlot[pcie.Slot] = pcie.NUMANode
|
||||
}
|
||||
if pcieBySlot["Slot.7-1"] != 2 {
|
||||
t.Errorf("Slot.7-1 NUMANode = %d, want 2", pcieBySlot["Slot.7-1"])
|
||||
}
|
||||
if pcieBySlot["RAID.Slot.1-1"] != 0 {
|
||||
t.Errorf("RAID.Slot.1-1 NUMANode = %d, want 0 (Not Applicable)", pcieBySlot["RAID.Slot.1-1"])
|
||||
}
|
||||
}
|
||||
|
||||
func makeZipArchive(t *testing.T, files map[string][]byte) []byte {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
zw := zip.NewWriter(&buf)
|
||||
for name, content := range files {
|
||||
w, err := zw.Create(name)
|
||||
if err != nil {
|
||||
t.Fatalf("create zip entry %s: %v", name, err)
|
||||
}
|
||||
if _, err := w.Write(content); err != nil {
|
||||
t.Fatalf("write zip entry %s: %v", name, err)
|
||||
}
|
||||
}
|
||||
if err := zw.Close(); err != nil {
|
||||
t.Fatalf("close zip: %v", err)
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
147
internal/parser/vendors/generic/parser.go
vendored
Normal file
147
internal/parser/vendors/generic/parser.go
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
// Package generic provides a fallback parser for unrecognized text files
|
||||
package generic
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
)
|
||||
|
||||
// parserVersion - version of this parser module
|
||||
const parserVersion = "1.1"
|
||||
|
||||
func init() {
|
||||
parser.Register(&Parser{})
|
||||
}
|
||||
|
||||
// Parser implements VendorParser for generic text files
|
||||
type Parser struct{}
|
||||
|
||||
// Name returns human-readable parser name
|
||||
func (p *Parser) Name() string {
|
||||
return "Generic Text File Parser"
|
||||
}
|
||||
|
||||
// Vendor returns vendor identifier
|
||||
func (p *Parser) Vendor() string {
|
||||
return "generic"
|
||||
}
|
||||
|
||||
// Version returns parser version
|
||||
func (p *Parser) Version() string {
|
||||
return parserVersion
|
||||
}
|
||||
|
||||
// Detect checks if this is a text file (fallback with low confidence)
|
||||
// Returns confidence 0-100
|
||||
func (p *Parser) Detect(files []parser.ExtractedFile) int {
|
||||
// Only detect if there's exactly one file (plain .gz or single file)
|
||||
if len(files) != 1 {
|
||||
return 0
|
||||
}
|
||||
|
||||
file := files[0]
|
||||
|
||||
// Check if content looks like text (not binary)
|
||||
if !isLikelyText(file.Content) {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Return low confidence so other parsers have priority
|
||||
return 15
|
||||
}
|
||||
|
||||
// isLikelyText checks if content is likely text (not binary)
|
||||
func isLikelyText(content []byte) bool {
|
||||
// Check first 512 bytes for binary data
|
||||
sample := content
|
||||
if len(content) > 512 {
|
||||
sample = content[:512]
|
||||
}
|
||||
|
||||
binaryCount := 0
|
||||
for _, b := range sample {
|
||||
// Count non-printable characters (excluding common whitespace)
|
||||
if b < 32 && b != '\n' && b != '\r' && b != '\t' {
|
||||
binaryCount++
|
||||
}
|
||||
if b == 0 { // NULL byte is a strong indicator of binary
|
||||
binaryCount += 10
|
||||
}
|
||||
}
|
||||
|
||||
// If less than 5% binary, consider it text
|
||||
return binaryCount < len(sample)/20
|
||||
}
|
||||
|
||||
// Parse parses generic text file
|
||||
func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, error) {
|
||||
result := &models.AnalysisResult{
|
||||
Events: make([]models.Event, 0),
|
||||
FRU: make([]models.FRUInfo, 0),
|
||||
Sensors: make([]models.SensorReading, 0),
|
||||
}
|
||||
|
||||
// Initialize hardware config
|
||||
result.Hardware = &models.HardwareConfig{}
|
||||
|
||||
if len(files) == 0 {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
file := files[0]
|
||||
content := string(file.Content)
|
||||
|
||||
// Create a single event with file info
|
||||
result.Events = append(result.Events, models.Event{
|
||||
Timestamp: time.Now(),
|
||||
Source: "File",
|
||||
EventType: "Text File",
|
||||
Description: "Generic text file loaded",
|
||||
Severity: models.SeverityInfo,
|
||||
RawData: "Filename: " + file.Path,
|
||||
})
|
||||
|
||||
// Try to extract some basic info from common file types
|
||||
if strings.Contains(strings.ToLower(file.Path), "nvidia-bug-report") {
|
||||
parseNvidiaBugReport(content, result)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// parseNvidiaBugReport extracts info from nvidia-bug-report files
|
||||
func parseNvidiaBugReport(content string, result *models.AnalysisResult) {
|
||||
lines := strings.Split(content, "\n")
|
||||
|
||||
// Look for GPU information
|
||||
for i, line := range lines {
|
||||
// Find NVIDIA driver version
|
||||
if strings.Contains(line, "NVRM version:") || strings.Contains(line, "nvidia-smi") {
|
||||
if i+5 < len(lines) {
|
||||
result.Events = append(result.Events, models.Event{
|
||||
Timestamp: time.Now(),
|
||||
Source: "NVIDIA Driver",
|
||||
EventType: "Driver Info",
|
||||
Description: "NVIDIA driver information found",
|
||||
Severity: models.SeverityInfo,
|
||||
RawData: strings.TrimSpace(line),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Find GPU devices
|
||||
if strings.Contains(line, "/proc/driver/nvidia/gpus/") && strings.Contains(line, "***") {
|
||||
result.Events = append(result.Events, models.Event{
|
||||
Timestamp: time.Now(),
|
||||
Source: "GPU",
|
||||
EventType: "GPU Device",
|
||||
Description: "GPU device detected",
|
||||
Severity: models.SeverityInfo,
|
||||
RawData: strings.TrimSpace(line),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
3535
internal/parser/vendors/h3c/parser.go
vendored
Normal file
3535
internal/parser/vendors/h3c/parser.go
vendored
Normal file
File diff suppressed because it is too large
Load Diff
962
internal/parser/vendors/h3c/parser_test.go
vendored
Normal file
962
internal/parser/vendors/h3c/parser_test.go
vendored
Normal file
@@ -0,0 +1,962 @@
|
||||
package h3c
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
)
|
||||
|
||||
func TestDetectH3C_GenerationRouting(t *testing.T) {
|
||||
g5 := &G5Parser{}
|
||||
g6 := &G6Parser{}
|
||||
|
||||
g5Files := []parser.ExtractedFile{
|
||||
{Path: "bmc/pack.info", Content: []byte("STARTTIME:0")},
|
||||
{Path: "static/FRUInfo.ini", Content: []byte("[Baseboard]\nBoard Manufacturer=H3C\n")},
|
||||
{Path: "static/hardware_info.ini", Content: []byte("[Processors: Processor 1]\nModel: Intel Xeon\n")},
|
||||
{Path: "static/hardware.info", Content: []byte("[Disk_0_Front_NA]\nSerialNumber=DISK-0\n")},
|
||||
{Path: "static/firmware_version.ini", Content: []byte("[System board]\nBIOS Version: 5.59\n")},
|
||||
{Path: "user/test1.csv", Content: []byte("Record Time Stamp,DescInfo\n2025-01-01 00:00:00,foo\n")},
|
||||
}
|
||||
if gotG5, gotG6 := g5.Detect(g5Files), g6.Detect(g5Files); gotG5 <= gotG6 {
|
||||
t.Fatalf("expected G5 confidence > G6 for G5 sample, got g5=%d g6=%d", gotG5, gotG6)
|
||||
}
|
||||
|
||||
g6Files := []parser.ExtractedFile{
|
||||
{Path: "bmc/pack.info", Content: []byte("STARTTIME:0")},
|
||||
{Path: "static/FRUInfo.ini", Content: []byte("[Baseboard]\nBoard Manufacturer=H3C\n")},
|
||||
{Path: "static/board_info.ini", Content: []byte("[System board]\nBoardMfr=H3C\n")},
|
||||
{Path: "static/firmware_version.json", Content: []byte(`{"BIOS":{"Firmware Name":"BIOS","Firmware Version":"6.10"}}`)},
|
||||
{Path: "static/CPUDetailInfo.xml", Content: []byte("<Root><CPU1><Model>X</Model></CPU1></Root>")},
|
||||
{Path: "static/MemoryDetailInfo.xml", Content: []byte("<Root><DIMM1><Name>A0</Name></DIMM1></Root>")},
|
||||
{Path: "user/Sel.json", Content: []byte(`{"Id":1}`)},
|
||||
}
|
||||
if gotG5, gotG6 := g5.Detect(g6Files), g6.Detect(g6Files); gotG6 <= gotG5 {
|
||||
t.Fatalf("expected G6 confidence > G5 for G6 sample, got g5=%d g6=%d", gotG5, gotG6)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseH3CG6_RaidAndNVMeEnrichment(t *testing.T) {
|
||||
p := &G6Parser{}
|
||||
files := []parser.ExtractedFile{
|
||||
{
|
||||
Path: "static/storage_disk.ini",
|
||||
Content: []byte(`[Disk_000]
|
||||
DiskSlotDesc=Front0
|
||||
Present=YES
|
||||
SerialNumber=SER-0
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/raid.json",
|
||||
Content: []byte(`{
|
||||
"RaidConfig": {
|
||||
"CtrlInfo": [
|
||||
{
|
||||
"CtrlSlot": 1,
|
||||
"CtrlName": "RAID-LSI-9560",
|
||||
"LDInfo": [
|
||||
{
|
||||
"LDID": "0",
|
||||
"LDName": "VD0",
|
||||
"RAIDLevel": "1",
|
||||
"CapacityBytes": 1000000000,
|
||||
"Status": "Optimal"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}`),
|
||||
},
|
||||
{
|
||||
Path: "static/Storage_RAID-LSI-9560-LP-8i-4GB[1].txt",
|
||||
Content: []byte(`Controller Information
|
||||
------------------------------------------------------------------------
|
||||
AssetTag : RAID-LSI-9560
|
||||
|
||||
Logical Device Information
|
||||
------------------------------------------------------------------------
|
||||
LDID : 0
|
||||
Name : VD0
|
||||
RAID Level : 1
|
||||
CapacityBytes : 1000000000
|
||||
Status : Optimal
|
||||
|
||||
Physical Device Information
|
||||
------------------------------------------------------------------------
|
||||
ConnectionID : 0
|
||||
Position : Front0
|
||||
StatusIndicator : OK
|
||||
Protocol : SATA
|
||||
MediaType : SSD
|
||||
Manufacturer : Samsung
|
||||
Model : PM893
|
||||
Revision : GDC1
|
||||
SerialNumber : SER-0
|
||||
CapacityBytes : 480000000000
|
||||
|
||||
ConnectionID : 1
|
||||
Position : Front1
|
||||
StatusIndicator : OK
|
||||
Protocol : SATA
|
||||
MediaType : SSD
|
||||
Manufacturer : Samsung
|
||||
Model : PM893
|
||||
Revision : GDC1
|
||||
SerialNumber : SER-1
|
||||
CapacityBytes : 480000000000
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/NVMe_info.txt",
|
||||
Content: []byte(`[NVMe_0]
|
||||
Present=YES
|
||||
DiskSlotDesc=Front2
|
||||
Model=INTEL SSDPE2KX010T8
|
||||
SerialNumber=NVME-1
|
||||
Firmware=V100
|
||||
CapacityBytes=1000204886016
|
||||
Interface=NVMe
|
||||
Status=OK
|
||||
`),
|
||||
},
|
||||
}
|
||||
|
||||
result, err := p.Parse(files)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
if result.Hardware == nil {
|
||||
t.Fatalf("expected hardware section")
|
||||
}
|
||||
|
||||
if len(result.Hardware.Volumes) != 1 {
|
||||
t.Fatalf("expected 1 volume, got %d", len(result.Hardware.Volumes))
|
||||
}
|
||||
vol := result.Hardware.Volumes[0]
|
||||
if vol.RAIDLevel != "RAID1" {
|
||||
t.Fatalf("expected RAID1 level, got %q", vol.RAIDLevel)
|
||||
}
|
||||
if vol.SizeGB != 1 {
|
||||
t.Fatalf("expected 1GB logical volume, got %d", vol.SizeGB)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Storage) != 3 {
|
||||
t.Fatalf("expected 3 unique storage devices, got %d", len(result.Hardware.Storage))
|
||||
}
|
||||
|
||||
var front0 *models.Storage
|
||||
var nvme *models.Storage
|
||||
for i := range result.Hardware.Storage {
|
||||
s := &result.Hardware.Storage[i]
|
||||
if strings.EqualFold(s.SerialNumber, "SER-0") {
|
||||
front0 = s
|
||||
}
|
||||
if strings.EqualFold(s.SerialNumber, "NVME-1") {
|
||||
nvme = s
|
||||
}
|
||||
}
|
||||
if front0 == nil {
|
||||
t.Fatalf("expected merged Front0 disk by serial SER-0")
|
||||
}
|
||||
if front0.Model != "PM893" {
|
||||
t.Fatalf("expected Front0 model PM893, got %q", front0.Model)
|
||||
}
|
||||
if front0.SizeGB != 480 {
|
||||
t.Fatalf("expected Front0 size 480GB, got %d", front0.SizeGB)
|
||||
}
|
||||
if nvme == nil {
|
||||
t.Fatalf("expected NVMe disk by serial NVME-1")
|
||||
}
|
||||
if nvme.Type != "nvme" {
|
||||
t.Fatalf("expected nvme type, got %q", nvme.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseH3CG6(t *testing.T) {
|
||||
p := &G6Parser{}
|
||||
|
||||
files := []parser.ExtractedFile{
|
||||
{
|
||||
Path: "static/FRUInfo.ini",
|
||||
Content: []byte(`[Baseboard]
|
||||
Board Manufacturer=H3C
|
||||
Board Product Name=RS36M2C6SB
|
||||
Product Product Name=H3C UniServer R4700 G6
|
||||
Product Serial Number=210235A4FYH257000010
|
||||
Product Part Number=0235A4FY
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/firmware_version.json",
|
||||
Content: []byte(`{
|
||||
"BMCP": {"Firmware Name":"HDM","Firmware Version":"1.83","Location":"bmc card","Part Model":"-"},
|
||||
"BIOS": {"Firmware Name":"BIOS","Firmware Version":"6.10.53","Location":"system board","Part Model":"-"}
|
||||
}`),
|
||||
},
|
||||
{
|
||||
Path: "static/CPUDetailInfo.xml",
|
||||
Content: []byte(`<Root>
|
||||
<CPU1>
|
||||
<Status>Presence</Status>
|
||||
<Model>INTEL(R) XEON(R) GOLD 6542Y</Model>
|
||||
<ProcessorSpeed>0xb54</ProcessorSpeed>
|
||||
<ProcessorMaxSpeed>0x1004</ProcessorMaxSpeed>
|
||||
<TotalCores>0x18</TotalCores>
|
||||
<TotalThreads>0x30</TotalThreads>
|
||||
<SerialNumber>68-5C-81-C1-0E-A3-4E-40</SerialNumber>
|
||||
<PPIN>68-5C-81-C1-0E-A3-4E-40</PPIN>
|
||||
</CPU1>
|
||||
</Root>`),
|
||||
},
|
||||
{
|
||||
Path: "static/MemoryDetailInfo.xml",
|
||||
Content: []byte(`<Root>
|
||||
<DIMM1>
|
||||
<Status>Presence</Status>
|
||||
<Name>CPU1_CH1_D0 (A0)</Name>
|
||||
<PartNumber>M321R8GA0PB0-CWMXJ</PartNumber>
|
||||
<DIMMTech>RDIMM</DIMMTech>
|
||||
<SerialNumber>80CE032519135C82ED</SerialNumber>
|
||||
<DIMMRanks>0x2</DIMMRanks>
|
||||
<DIMMSize>0x10000</DIMMSize>
|
||||
<CurFreq>0x1130</CurFreq>
|
||||
<MaxFreq>0x15e0</MaxFreq>
|
||||
<DIMMSilk>A0</DIMMSilk>
|
||||
</DIMM1>
|
||||
</Root>`),
|
||||
},
|
||||
{
|
||||
Path: "static/storage_disk.ini",
|
||||
Content: []byte(`[Disk_000]
|
||||
SerialNumber=S6KLNN0Y516813
|
||||
DiskSlotDesc=Front0
|
||||
Present=YES
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/net_cfg.ini",
|
||||
Content: []byte(`[Network Configuration]
|
||||
eth0 Link encap:Ethernet HWaddr 30:C6:D7:94:54:F6
|
||||
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
|
||||
|
||||
eth0.2 Link encap:Ethernet HWaddr 30:C6:D7:94:54:F6
|
||||
inet6 addr: fe80::32c6:d7ff:fe94:54f6/64 Scope:Link
|
||||
UP BROADCAST RUNNING MULTICAST MTU:1496 Metric:1
|
||||
|
||||
eth1 Link encap:Ethernet HWaddr 30:C6:D7:94:54:F5
|
||||
inet addr:10.201.129.0 Bcast:10.201.143.255 Mask:255.255.240.0
|
||||
inet6 addr: fe80::32c6:d7ff:fe94:54f5/64 Scope:Link
|
||||
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
|
||||
|
||||
lo Link encap:Local Loopback
|
||||
inet addr:127.0.0.1 Mask:255.0.0.0
|
||||
UP LOOPBACK RUNNING MTU:65536 Metric:1
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/psu_cfg.ini",
|
||||
Content: []byte(`[Psu0]
|
||||
SN=210231AGUNH257001569
|
||||
Max_Power(W)=1600
|
||||
Manufacturer=Great Wall
|
||||
Power Status=Input Normal, Output Normal
|
||||
Present_Status=Present
|
||||
Power_ID=1
|
||||
Model=GW-CRPS1600D2
|
||||
Version=03.02.00
|
||||
|
||||
[Psu1]
|
||||
Manufacturer=Great Wall
|
||||
Power_ID=2
|
||||
Version=03.02.00
|
||||
Power Status=Input Normal, Output Normal
|
||||
SN=210231AGUNH257001570
|
||||
Model=GW-CRPS1600D2
|
||||
Present_Status=Present
|
||||
Max_Power(W)=1600
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/hardware_info.ini",
|
||||
Content: []byte(`[Ethernet adapters: Port 1]
|
||||
Device Type : NIC
|
||||
Network Port : Port 1
|
||||
Location : PCIE-[1]
|
||||
MAC Address : E4:3D:1A:6F:B0:30
|
||||
Speed : 8.0GT/s
|
||||
Product Name : NIC-BCM957414-F-B-25Gb-2P
|
||||
[Ethernet adapters: Port 2]
|
||||
Device Type : NIC
|
||||
Network Port : Port 2
|
||||
Location : PCIE-[1]
|
||||
MAC Address : E4:3D:1A:6F:B0:31
|
||||
Speed : 8.0GT/s
|
||||
Product Name : NIC-BCM957414-F-B-25Gb-2P
|
||||
|
||||
[PCIe Card: PCIe 1]
|
||||
Location : 1
|
||||
Product Name : NIC-BCM957414-F-B-25Gb-2P
|
||||
Status : Normal
|
||||
Vendor ID : 0x14E4
|
||||
Device ID : 0x16D7
|
||||
Serial Number : NICSN-G6-001
|
||||
Part Number : NICPN-G6-001
|
||||
Firmware Version : 22.35.1010
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/sensor_info.ini",
|
||||
Content: []byte(`Sensor Name | Reading | Unit | Status| Crit low
|
||||
Inlet_Temp | 20.000 | degrees C | ok | na
|
||||
CPU1_Status | 0x0 | discrete | 0x8080| na
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "user/Sel.json",
|
||||
Content: []byte(`
|
||||
{
|
||||
"Created": "2025-07-14 03:34:18 UTC+08:00",
|
||||
"Severity": "Info",
|
||||
"EntryCode": "Asserted",
|
||||
"EntryType": "Event",
|
||||
"Id": 1,
|
||||
"Level": "Info",
|
||||
"Message": "Processor Presence detected",
|
||||
"SensorName": "CPU1_Status",
|
||||
"SensorType": "Processor"
|
||||
},
|
||||
{
|
||||
"Created": "2025-07-14 20:56:45 UTC+08:00",
|
||||
"Severity": "Critical",
|
||||
"EntryCode": "Asserted",
|
||||
"EntryType": "Event",
|
||||
"Id": 2,
|
||||
"Level": "Critical",
|
||||
"Message": "Power Supply AC lost",
|
||||
"SensorName": "PSU1_Status",
|
||||
"SensorType": "Power Supply"
|
||||
}
|
||||
`),
|
||||
},
|
||||
}
|
||||
|
||||
result, err := p.Parse(files)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
if result.Hardware == nil {
|
||||
t.Fatalf("expected hardware section")
|
||||
}
|
||||
if result.Hardware.BoardInfo.Manufacturer != "H3C" {
|
||||
t.Fatalf("unexpected board manufacturer: %q", result.Hardware.BoardInfo.Manufacturer)
|
||||
}
|
||||
if result.Hardware.BoardInfo.ProductName != "H3C UniServer R4700 G6" {
|
||||
t.Fatalf("unexpected board product: %q", result.Hardware.BoardInfo.ProductName)
|
||||
}
|
||||
if result.Hardware.BoardInfo.SerialNumber != "210235A4FYH257000010" {
|
||||
t.Fatalf("unexpected board serial: %q", result.Hardware.BoardInfo.SerialNumber)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Firmware) < 2 {
|
||||
t.Fatalf("expected firmware entries, got %d", len(result.Hardware.Firmware))
|
||||
}
|
||||
if len(result.Hardware.CPUs) != 1 {
|
||||
t.Fatalf("expected 1 cpu, got %d", len(result.Hardware.CPUs))
|
||||
}
|
||||
if result.Hardware.CPUs[0].Cores != 24 {
|
||||
t.Fatalf("expected 24 cores, got %d", result.Hardware.CPUs[0].Cores)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Memory) != 1 {
|
||||
t.Fatalf("expected 1 dimm, got %d", len(result.Hardware.Memory))
|
||||
}
|
||||
if result.Hardware.Memory[0].SizeMB != 65536 {
|
||||
t.Fatalf("expected 65536MB, got %d", result.Hardware.Memory[0].SizeMB)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Storage) != 1 {
|
||||
t.Fatalf("expected 1 disk, got %d", len(result.Hardware.Storage))
|
||||
}
|
||||
if result.Hardware.Storage[0].SerialNumber != "S6KLNN0Y516813" {
|
||||
t.Fatalf("unexpected disk serial: %q", result.Hardware.Storage[0].SerialNumber)
|
||||
}
|
||||
if len(result.Hardware.PowerSupply) != 2 {
|
||||
t.Fatalf("expected 2 PSUs from psu_cfg.ini, got %d", len(result.Hardware.PowerSupply))
|
||||
}
|
||||
if result.Hardware.PowerSupply[0].WattageW == 0 {
|
||||
t.Fatalf("expected PSU wattage parsed, got 0")
|
||||
}
|
||||
|
||||
if len(result.Hardware.NetworkAdapters) != 1 {
|
||||
t.Fatalf("expected 1 host network adapter from hardware_info.ini, got %d", len(result.Hardware.NetworkAdapters))
|
||||
}
|
||||
macs := make(map[string]struct{})
|
||||
var hostNIC models.NetworkAdapter
|
||||
var hostNICFound bool
|
||||
for _, nic := range result.Hardware.NetworkAdapters {
|
||||
if len(nic.MACAddresses) == 0 {
|
||||
t.Fatalf("expected MAC on network adapter %+v", nic)
|
||||
}
|
||||
for _, mac := range nic.MACAddresses {
|
||||
macs[strings.ToLower(mac)] = struct{}{}
|
||||
}
|
||||
if strings.EqualFold(nic.Slot, "PCIe 1") && strings.Contains(strings.ToLower(nic.Model), "bcm957414") {
|
||||
hostNIC = nic
|
||||
hostNICFound = true
|
||||
}
|
||||
}
|
||||
if !hostNICFound {
|
||||
t.Fatalf("expected host NIC from hardware_info.ini, got %+v", result.Hardware.NetworkAdapters)
|
||||
}
|
||||
if _, ok := macs["e4:3d:1a:6f:b0:30"]; !ok {
|
||||
t.Fatalf("expected host NIC MAC e4:3d:1a:6f:b0:30 in adapters, got %+v", result.Hardware.NetworkAdapters)
|
||||
}
|
||||
if _, ok := macs["e4:3d:1a:6f:b0:31"]; !ok {
|
||||
t.Fatalf("expected host NIC MAC e4:3d:1a:6f:b0:31 in adapters, got %+v", result.Hardware.NetworkAdapters)
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(hostNIC.Vendor), "broadcom") {
|
||||
t.Fatalf("expected host NIC vendor enrichment from Vendor ID, got %q", hostNIC.Vendor)
|
||||
}
|
||||
if hostNIC.SerialNumber != "NICSN-G6-001" {
|
||||
t.Fatalf("expected host NIC serial from PCIe card section, got %q", hostNIC.SerialNumber)
|
||||
}
|
||||
if hostNIC.PartNumber != "NICPN-G6-001" {
|
||||
t.Fatalf("expected host NIC part number from PCIe card section, got %q", hostNIC.PartNumber)
|
||||
}
|
||||
if hostNIC.Firmware != "22.35.1010" {
|
||||
t.Fatalf("expected host NIC firmware from PCIe card section, got %q", hostNIC.Firmware)
|
||||
}
|
||||
|
||||
if len(result.Sensors) != 2 {
|
||||
t.Fatalf("expected 2 sensors, got %d", len(result.Sensors))
|
||||
}
|
||||
if result.Sensors[0].Name != "Inlet_Temp" {
|
||||
t.Fatalf("unexpected first sensor: %q", result.Sensors[0].Name)
|
||||
}
|
||||
|
||||
if len(result.Events) != 2 {
|
||||
t.Fatalf("expected 2 events, got %d", len(result.Events))
|
||||
}
|
||||
if result.Events[0].Timestamp.Year() != 2025 || result.Events[0].Timestamp.Month() != 7 {
|
||||
t.Fatalf("expected SEL timestamp from payload, got %s", result.Events[0].Timestamp)
|
||||
}
|
||||
if result.Events[1].Severity != models.SeverityCritical {
|
||||
t.Fatalf("expected critical severity for AC lost event, got %q", result.Events[1].Severity)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseH3CG5_PCIeArgumentsEnrichesNonNVMeStorage(t *testing.T) {
|
||||
p := &G5Parser{}
|
||||
files := []parser.ExtractedFile{
|
||||
{
|
||||
Path: "static/storage_disk.ini",
|
||||
Content: []byte(`[Disk_000]
|
||||
DiskSlotDesc=Front slot 3
|
||||
Present=YES
|
||||
SerialNumber=SAT-03
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/NVMe_info.txt",
|
||||
Content: []byte(`[NVMe_0]
|
||||
Present=YES
|
||||
DiskSlotDesc=Front slot 108
|
||||
SerialNumber=NVME-108
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/PCIe_arguments_table.xml",
|
||||
Content: []byte(`<root>
|
||||
<PCIE100>
|
||||
<base_args>
|
||||
<type>SSD</type>
|
||||
<name>SSD-SATA-960G</name>
|
||||
</base_args>
|
||||
<type_get_args>
|
||||
<bios_args>
|
||||
<vendor_id>0x144D</vendor_id>
|
||||
</bios_args>
|
||||
</type_get_args>
|
||||
</PCIE100>
|
||||
<PCIE200>
|
||||
<base_args>
|
||||
<type>SSD</type>
|
||||
<name>SSD-3.84T-NVMe-SFF</name>
|
||||
</base_args>
|
||||
<type_get_args>
|
||||
<bios_args>
|
||||
<vendor_id>0x144D</vendor_id>
|
||||
</bios_args>
|
||||
</type_get_args>
|
||||
</PCIE200>
|
||||
</root>`),
|
||||
},
|
||||
}
|
||||
|
||||
result, err := p.Parse(files)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
if result.Hardware == nil {
|
||||
t.Fatalf("expected hardware section")
|
||||
}
|
||||
|
||||
if len(result.Hardware.Storage) != 2 {
|
||||
t.Fatalf("expected 2 storage devices, got %d", len(result.Hardware.Storage))
|
||||
}
|
||||
|
||||
var sata *models.Storage
|
||||
var nvme *models.Storage
|
||||
for i := range result.Hardware.Storage {
|
||||
s := &result.Hardware.Storage[i]
|
||||
switch s.SerialNumber {
|
||||
case "SAT-03":
|
||||
sata = s
|
||||
case "NVME-108":
|
||||
nvme = s
|
||||
}
|
||||
}
|
||||
|
||||
if sata == nil {
|
||||
t.Fatalf("expected SATA storage SAT-03")
|
||||
}
|
||||
if sata.Model != "SSD-SATA-960G" {
|
||||
t.Fatalf("expected SATA model enrichment from PCIe table, got %q", sata.Model)
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(sata.Manufacturer), "samsung") {
|
||||
t.Fatalf("expected SATA vendor enrichment to Samsung, got %q", sata.Manufacturer)
|
||||
}
|
||||
|
||||
if nvme == nil {
|
||||
t.Fatalf("expected NVMe storage NVME-108")
|
||||
}
|
||||
if nvme.Model != "SSD-3.84T-NVMe-SFF" {
|
||||
t.Fatalf("expected NVMe model enrichment from PCIe table, got %q", nvme.Model)
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(nvme.Manufacturer), "samsung") {
|
||||
t.Fatalf("expected NVMe vendor enrichment to Samsung, got %q", nvme.Manufacturer)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseH3CG5_VariantLayout(t *testing.T) {
|
||||
p := &G5Parser{}
|
||||
|
||||
files := []parser.ExtractedFile{
|
||||
{
|
||||
Path: "static/FRUInfo.ini",
|
||||
Content: []byte(`[Baseboard]
|
||||
Board Manufacturer=H3C
|
||||
Product Product Name=H3C UniServer R4900 G5
|
||||
Product Serial Number=02A6AX5231C003VM
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/firmware_version.ini",
|
||||
Content: []byte(`[System board]
|
||||
BIOS Version : 5.59 V100R001B05D078
|
||||
ME Version : 4.4.4.202
|
||||
HDM Version : 3.34.01 HDM V100R001B05D078SP01
|
||||
CPLD Version : V00C
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/board_cfg.ini",
|
||||
Content: []byte(`[Board Type]
|
||||
Board Type : R4900 G5
|
||||
|
||||
[Board Version]
|
||||
Board Version : VER.D
|
||||
|
||||
[Customer ID]
|
||||
CustomerID : 255
|
||||
|
||||
[OEM ID]
|
||||
OEM Flag : 1
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/hardware_info.ini",
|
||||
Content: []byte(`[Processors: Processor 1]
|
||||
Model : Intel(R) Xeon(R) Gold 6342 CPU @ 2.80GHz
|
||||
Status : Normal
|
||||
Frequency : 2800 MHz
|
||||
Cores : 24
|
||||
Threads : 48
|
||||
L1 Cache : 1920 KB
|
||||
L2 Cache : 30720 KB
|
||||
L3 Cache : 36864 KB
|
||||
CPU PPIN : 49-A9-50-C0-15-9F-2D-DC
|
||||
|
||||
[Processors: Processor 2]
|
||||
Model : Intel(R) Xeon(R) Gold 6342 CPU @ 2.80GHz
|
||||
Status : Normal
|
||||
Frequency : 2800 MHz
|
||||
Cores : 24
|
||||
Threads : 48
|
||||
CPU PPIN : 49-AC-3D-BF-85-7F-17-58
|
||||
|
||||
[Memory Details: Dimm Index 0]
|
||||
Location : Processor 1
|
||||
Channel : 1
|
||||
Socket ID : A0
|
||||
Status : Normal
|
||||
Size : 65536 MB
|
||||
Maximum Frequency : 3200 MHz
|
||||
Type : DDR4
|
||||
Ranks : 2R DIMM
|
||||
Technology : RDIMM
|
||||
Part Number : M393A8G40AB2-CWE
|
||||
Manufacture : Samsung
|
||||
Serial Number : S02K0D0243351D7079
|
||||
|
||||
[Memory Details: Dimm Index 16]
|
||||
Location : Processor 2
|
||||
Channel : 1
|
||||
Socket ID : A0
|
||||
Status : Normal
|
||||
Size : 65536 MB
|
||||
Maximum Frequency : 3200 MHz
|
||||
Type : DDR4
|
||||
Ranks : 2R DIMM
|
||||
Technology : RDIMM
|
||||
Part Number : M393A8G40AB2-CWE
|
||||
Manufacture : Samsung
|
||||
Serial Number : S02K0D0243351D73F0
|
||||
|
||||
[Ethernet adapters: Port 1]
|
||||
Device Type : NIC
|
||||
Network Port : Port 1
|
||||
Location : PCIE-[1]
|
||||
MAC Address : E4:3D:1A:6F:B0:30
|
||||
Speed : 8.0GT/s
|
||||
Product Name : NIC-BCM957414-F-B-25Gb-2P
|
||||
[Ethernet adapters: Port 2]
|
||||
Device Type : NIC
|
||||
Network Port : Port 2
|
||||
Location : PCIE-[1]
|
||||
MAC Address : E4:3D:1A:6F:B0:31
|
||||
Speed : 8.0GT/s
|
||||
Product Name : NIC-BCM957414-F-B-25Gb-2P
|
||||
|
||||
[Ethernet adapters: Port 1]
|
||||
Device Type : NIC
|
||||
Network Port : Port 1
|
||||
Location : PCIE-[4]
|
||||
MAC Address : E8:EB:D3:4F:2E:90
|
||||
Speed : 8.0GT/s
|
||||
Product Name : NIC-MCX512A-ACAT-2*25Gb-F
|
||||
[Ethernet adapters: Port 2]
|
||||
Device Type : NIC
|
||||
Network Port : Port 2
|
||||
Location : PCIE-[4]
|
||||
MAC Address : E8:EB:D3:4F:2E:91
|
||||
Speed : 8.0GT/s
|
||||
Product Name : NIC-MCX512A-ACAT-2*25Gb-F
|
||||
|
||||
[PCIe Card: PCIe 1]
|
||||
Location : 1
|
||||
Product Name : NIC-BCM957414-F-B-25Gb-2P
|
||||
Status : Normal
|
||||
Vendor ID : 0x14E4
|
||||
Device ID : 0x16D7
|
||||
Serial Number : NICSN-G5-001
|
||||
Part Number : NICPN-G5-001
|
||||
Firmware Version : 21.80.1
|
||||
|
||||
[PCIe Card: PCIe 4]
|
||||
Location : 4
|
||||
Product Name : NIC-MCX512A-ACAT-2*25Gb-F
|
||||
Status : Normal
|
||||
Vendor ID : 0x15B3
|
||||
Device ID : 0x1017
|
||||
Serial Number : NICSN-G5-004
|
||||
Part Number : NICPN-G5-004
|
||||
Firmware Version : 28.33.15
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/hardware.info",
|
||||
Content: []byte(`[Disk_0_Front_NA]
|
||||
Present=YES
|
||||
SlotNum=0
|
||||
FrontOrRear=Front
|
||||
SerialNumber=22443C4EE184
|
||||
|
||||
[Nvme_Front slot 21]
|
||||
Present=YES
|
||||
NvmePhySlot=Front slot 21
|
||||
SlotNum=121
|
||||
SerialNumber=NVME-21
|
||||
|
||||
[Nvme_255_121]
|
||||
Present=YES
|
||||
SlotNum=121
|
||||
SerialNumber=NVME-21
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/raid.json",
|
||||
Content: []byte(`{
|
||||
"RAIDCONFIG": {
|
||||
"Ctrl info": [
|
||||
{
|
||||
"CtrlDevice Slot": 3,
|
||||
"CtrlDevice Name": "AVAGO MegaRAID SAS 9460-8i",
|
||||
"LDInfo": [
|
||||
{
|
||||
"LD ID": 0,
|
||||
"LD_name": "SystemRAID",
|
||||
"RAID_level(RAID 0,RAID 1,RAID 5,RAID 6,RAID 00,RAID 10,RAID 50,RAID 60)": "RAID1",
|
||||
"Logical_capicity(per 512byte)": 936640512
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"CtrlDevice Slot": 6,
|
||||
"CtrlDevice Name": "MegaRAID 9560-16i 8GB",
|
||||
"LDInfo": [
|
||||
{
|
||||
"LD ID": 0,
|
||||
"LD_name": "DataRAID",
|
||||
"RAID_level(RAID 0,RAID 1,RAID 5,RAID 6,RAID 00,RAID 10,RAID 50,RAID 60)": "RAID50",
|
||||
"Logical_capicity(per 512byte)": 90004783104
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}`),
|
||||
},
|
||||
{
|
||||
Path: "static/Raid_BP_Conf_Info.ini",
|
||||
Content: []byte(`[BP Information]
|
||||
Description | BP TYPE | I2cPort | BpConnectorNum | FrontOrRear | Node Num | DiskSlotRange |
|
||||
8SFF SAS/SATA | BP_G5_8SFF | AUX_1 | ~ | ~ | ~ | ~ |
|
||||
8SFF SAS/SATA | BP_G5_8SFF | AUX_2 | ~ | ~ | ~ | ~ |
|
||||
8SFF SAS/SATA | BP_G5_8SFF | AUX_3 | ~ | ~ | ~ | ~ |
|
||||
|
||||
[RAID Information]
|
||||
PCIE SLOT | RAID SAS_NUM |
|
||||
3 | 2 |
|
||||
6 | 4 |
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/PCIe_arguments_table.xml",
|
||||
Content: []byte(`<root>
|
||||
<PCIE100>
|
||||
<base_args>
|
||||
<type>SSD</type>
|
||||
<name>SSD-1.92T/3.84T-NVMe-EV-SFF-sa</name>
|
||||
</base_args>
|
||||
<type_get_args>
|
||||
<bios_args>
|
||||
<vendor_id>0x144D</vendor_id>
|
||||
</bios_args>
|
||||
</type_get_args>
|
||||
</PCIE100>
|
||||
</root>`),
|
||||
},
|
||||
{
|
||||
Path: "static/psu_cfg.ini",
|
||||
Content: []byte(`[Active / Standby configuration]
|
||||
Power ID : 1
|
||||
Present Status : Present
|
||||
Cold Status : Active Power
|
||||
Model : DPS-1300AB-6 R
|
||||
SN : 210231ACT9H232000080
|
||||
Max Power(W) : 1300
|
||||
|
||||
Power ID : 2
|
||||
Present Status : Present
|
||||
Cold Status : Active Power
|
||||
Model : DPS-1300AB-6 R
|
||||
SN : 210231ACT9H232000079
|
||||
Max Power(W) : 1300
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/net_cfg.ini",
|
||||
Content: []byte(`[Network Configuration]
|
||||
eth0 Link encap:Ethernet HWaddr 30:C6:D7:94:54:F6
|
||||
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
|
||||
|
||||
eth0.2 Link encap:Ethernet HWaddr 30:C6:D7:94:54:F6
|
||||
inet6 addr: fe80::32c6:d7ff:fe94:54f6/64 Scope:Link
|
||||
UP BROADCAST RUNNING MULTICAST MTU:1496 Metric:1
|
||||
|
||||
eth1 Link encap:Ethernet HWaddr 30:C6:D7:94:54:F5
|
||||
inet addr:10.201.129.0 Bcast:10.201.143.255 Mask:255.255.240.0
|
||||
inet6 addr: fe80::32c6:d7ff:fe94:54f5/64 Scope:Link
|
||||
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
|
||||
|
||||
lo Link encap:Local Loopback
|
||||
inet addr:127.0.0.1 Mask:255.0.0.0
|
||||
UP LOOPBACK RUNNING MTU:65536 Metric:1
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "static/smartdata/Front0/first_date_analysis.txt",
|
||||
Content: []byte(`The Current System Time Is 2023_09_22_14_19_39
|
||||
Model Info: ATA Micron_5300_MTFD
|
||||
Serial Number: 22443C4EE184
|
||||
`),
|
||||
},
|
||||
{
|
||||
Path: "user/test1.csv",
|
||||
Content: []byte(`Record Time Stamp,Severity Level,Severity Level ID,SensorTypeStr,SensorName,Event Dir,Event Occurred Time,DescInfo,Explanation,Suggestion
|
||||
2025-04-01 08:50:13,Minor,0x1,NA,NA,NA,2025-04-01 08:50:13,"SSH login failed from IP: 10.200.10.121 user: admin"," "," "
|
||||
Pre-Init,Info,0x0,Management Subsystem Health,Health,Assertion event,Pre-Init,"Management controller off-line"," "," "
|
||||
2025-04-01 08:51:10,Major,0x2,Power Supply,PSU1_Status,Assertion event,2025-04-01 08:51:10,"Power Supply AC lost"," "," "
|
||||
`),
|
||||
},
|
||||
}
|
||||
|
||||
result, err := p.Parse(files)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
if result.Hardware == nil {
|
||||
t.Fatalf("expected hardware section")
|
||||
}
|
||||
|
||||
if len(result.Hardware.CPUs) != 2 {
|
||||
t.Fatalf("expected 2 CPUs from hardware_info.ini, got %d", len(result.Hardware.CPUs))
|
||||
}
|
||||
if result.Hardware.CPUs[0].FrequencyMHz != 2800 {
|
||||
t.Fatalf("expected CPU frequency 2800MHz, got %d", result.Hardware.CPUs[0].FrequencyMHz)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Memory) != 2 {
|
||||
t.Fatalf("expected 2 DIMMs from hardware_info.ini, got %d", len(result.Hardware.Memory))
|
||||
}
|
||||
if result.Hardware.Memory[0].SizeMB != 65536 {
|
||||
t.Fatalf("expected DIMM size 65536MB, got %d", result.Hardware.Memory[0].SizeMB)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Firmware) < 4 {
|
||||
t.Fatalf("expected firmware entries from firmware_version.ini, got %d", len(result.Hardware.Firmware))
|
||||
}
|
||||
if result.Hardware.BoardInfo.Version == "" {
|
||||
t.Fatalf("expected board version from board_cfg.ini")
|
||||
}
|
||||
if !strings.Contains(result.Hardware.BoardInfo.Description, "CustomerID: 255") {
|
||||
t.Fatalf("expected board description enrichment from board_cfg.ini, got %q", result.Hardware.BoardInfo.Description)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Storage) != 2 {
|
||||
t.Fatalf("expected 2 unique storage devices from hardware.info, got %d", len(result.Hardware.Storage))
|
||||
}
|
||||
var nvmeFound bool
|
||||
var diskModelEnriched bool
|
||||
for _, s := range result.Hardware.Storage {
|
||||
if s.SerialNumber == "NVME-21" {
|
||||
nvmeFound = true
|
||||
if s.Type != "nvme" {
|
||||
t.Fatalf("expected NVME-21 type nvme, got %q", s.Type)
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(s.Manufacturer), "samsung") {
|
||||
t.Fatalf("expected NVME vendor enrichment to Samsung, got %q", s.Manufacturer)
|
||||
}
|
||||
if s.Model != "SSD-1.92T/3.84T-NVMe-EV-SFF-sa" {
|
||||
t.Fatalf("expected NVME model enrichment from PCIe table, got %q", s.Model)
|
||||
}
|
||||
}
|
||||
if s.SerialNumber == "22443C4EE184" && strings.Contains(s.Model, "Micron") {
|
||||
diskModelEnriched = true
|
||||
}
|
||||
}
|
||||
if !nvmeFound {
|
||||
t.Fatalf("expected deduped NVME storage by serial NVME-21")
|
||||
}
|
||||
if !diskModelEnriched {
|
||||
t.Fatalf("expected disk model enrichment from smartdata by serial")
|
||||
}
|
||||
|
||||
if len(result.Hardware.PowerSupply) != 2 {
|
||||
t.Fatalf("expected 2 PSUs from psu_cfg.ini, got %d", len(result.Hardware.PowerSupply))
|
||||
}
|
||||
if result.Hardware.PowerSupply[0].WattageW == 0 {
|
||||
t.Fatalf("expected PSU wattage parsed, got 0")
|
||||
}
|
||||
if len(result.Hardware.NetworkAdapters) != 2 {
|
||||
t.Fatalf("expected 2 host network adapters from hardware_info.ini, got %d", len(result.Hardware.NetworkAdapters))
|
||||
}
|
||||
if len(result.Hardware.NetworkCards) != 2 {
|
||||
t.Fatalf("expected 2 network cards synthesized from adapters, got %d", len(result.Hardware.NetworkCards))
|
||||
}
|
||||
var g5NIC models.NetworkAdapter
|
||||
var g5NICFound bool
|
||||
for _, nic := range result.Hardware.NetworkAdapters {
|
||||
if strings.EqualFold(nic.Slot, "PCIe 1") && strings.Contains(strings.ToLower(nic.Model), "bcm957414") {
|
||||
g5NIC = nic
|
||||
g5NICFound = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !g5NICFound {
|
||||
t.Fatalf("expected host NIC PCIe 1 from hardware_info.ini, got %+v", result.Hardware.NetworkAdapters)
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(g5NIC.Vendor), "broadcom") {
|
||||
t.Fatalf("expected G5 NIC vendor from Vendor ID, got %q", g5NIC.Vendor)
|
||||
}
|
||||
if g5NIC.SerialNumber != "NICSN-G5-001" {
|
||||
t.Fatalf("expected G5 NIC serial from PCIe card section, got %q", g5NIC.SerialNumber)
|
||||
}
|
||||
if g5NIC.PartNumber != "NICPN-G5-001" {
|
||||
t.Fatalf("expected G5 NIC part number from PCIe card section, got %q", g5NIC.PartNumber)
|
||||
}
|
||||
if g5NIC.Firmware != "21.80.1" {
|
||||
t.Fatalf("expected G5 NIC firmware from PCIe card section, got %q", g5NIC.Firmware)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Devices) != 5 {
|
||||
t.Fatalf("expected 5 topology devices from Raid_BP_Conf_Info.ini (3 BP + 2 RAID), got %d", len(result.Hardware.Devices))
|
||||
}
|
||||
var bpFound bool
|
||||
var raidFound bool
|
||||
for _, d := range result.Hardware.Devices {
|
||||
if strings.Contains(d.ID, "h3c-bp-") && strings.Contains(d.Model, "BP_G5_8SFF") {
|
||||
bpFound = true
|
||||
}
|
||||
desc, _ := d.Details["description"].(string)
|
||||
if strings.Contains(d.ID, "h3c-raid-slot-3") && strings.Contains(desc, "SAS ports: 2") {
|
||||
raidFound = true
|
||||
}
|
||||
}
|
||||
if !bpFound || !raidFound {
|
||||
t.Fatalf("expected parsed backplane and RAID topology devices, got %+v", result.Hardware.Devices)
|
||||
}
|
||||
|
||||
if len(result.Hardware.Volumes) != 2 {
|
||||
t.Fatalf("expected 2 RAID volumes (same LD ID on different controllers), got %d", len(result.Hardware.Volumes))
|
||||
}
|
||||
var raid1Found bool
|
||||
var raid50Found bool
|
||||
for _, v := range result.Hardware.Volumes {
|
||||
if strings.Contains(v.Controller, "slot 3") {
|
||||
raid1Found = v.RAIDLevel == "RAID1" && v.CapacityBytes > 0
|
||||
}
|
||||
if strings.Contains(v.Controller, "slot 6") {
|
||||
raid50Found = v.RAIDLevel == "RAID50" && v.CapacityBytes > 0
|
||||
}
|
||||
}
|
||||
if !raid1Found || !raid50Found {
|
||||
t.Fatalf("expected RAID1 and RAID50 volumes with parsed capacities, got %+v", result.Hardware.Volumes)
|
||||
}
|
||||
|
||||
if len(result.Events) != 2 {
|
||||
t.Fatalf("expected 2 CSV events (Pre-Init skipped), got %d", len(result.Events))
|
||||
}
|
||||
if result.Events[0].Severity != models.SeverityWarning {
|
||||
t.Fatalf("expected Minor CSV severity mapped to warning, got %q", result.Events[0].Severity)
|
||||
}
|
||||
if result.Events[1].Severity != models.SeverityCritical {
|
||||
t.Fatalf("expected Major CSV severity mapped to critical, got %q", result.Events[1].Severity)
|
||||
}
|
||||
}
|
||||
151
internal/parser/vendors/inspur/asset.go
vendored
151
internal/parser/vendors/inspur/asset.go
vendored
@@ -3,12 +3,15 @@ package inspur
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser/vendors/pciids"
|
||||
)
|
||||
|
||||
var rawHexPCIDeviceRegex = regexp.MustCompile(`(?i)^0x[0-9a-f]+$`)
|
||||
|
||||
// AssetJSON represents the structure of Inspur asset.json file
|
||||
type AssetJSON struct {
|
||||
VersionInfo []struct {
|
||||
@@ -55,6 +58,7 @@ type AssetJSON struct {
|
||||
} `json:"MemInfo"`
|
||||
|
||||
HddInfo []struct {
|
||||
PresentBitmap []int `json:"PresentBitmap"`
|
||||
SerialNumber string `json:"SerialNumber"`
|
||||
Manufacturer string `json:"Manufacturer"`
|
||||
ModelName string `json:"ModelName"`
|
||||
@@ -90,8 +94,12 @@ type AssetJSON struct {
|
||||
} `json:"PcieInfo"`
|
||||
}
|
||||
|
||||
// ParseAssetJSON parses Inspur asset.json content
|
||||
func ParseAssetJSON(content []byte) (*models.HardwareConfig, error) {
|
||||
// ParseAssetJSON parses Inspur asset.json content.
|
||||
// - pcieSlotDeviceNames: optional map from integer PCIe slot ID to device name string,
|
||||
// sourced from devicefrusdr.log PCIe REST section. Fills missing NVMe model names.
|
||||
// - pcieSlotSerials: optional map from integer PCIe slot ID to serial number string,
|
||||
// sourced from audit.log SN-changed events. Fills missing NVMe serial numbers.
|
||||
func ParseAssetJSON(content []byte, pcieSlotDeviceNames map[int]string, pcieSlotSerials map[int]string) (*models.HardwareConfig, error) {
|
||||
var asset AssetJSON
|
||||
if err := json.Unmarshal(content, &asset); err != nil {
|
||||
return nil, err
|
||||
@@ -158,8 +166,36 @@ func ParseAssetJSON(content []byte) (*models.HardwareConfig, error) {
|
||||
}
|
||||
|
||||
// Parse storage info
|
||||
seenHDDFW := make(map[string]bool)
|
||||
for _, hdd := range asset.HddInfo {
|
||||
slot := normalizeAssetHDDSlot(hdd.LocationString, hdd.Location, hdd.DiskInterfaceType)
|
||||
modelName := strings.TrimSpace(hdd.ModelName)
|
||||
serial := normalizeRedisValue(hdd.SerialNumber)
|
||||
present := bitmapHasAnyValue(hdd.PresentBitmap)
|
||||
if !present && (slot != "" || modelName != "" || serial != "" || hdd.Capacity > 0) {
|
||||
present = true
|
||||
}
|
||||
|
||||
if !present && slot == "" && modelName == "" && serial == "" && hdd.Capacity == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Enrich model name from PCIe device name (supplied from devicefrusdr.log).
|
||||
// BMC does not populate HddInfo.ModelName for NVMe drives, but the PCIe REST
|
||||
// section in devicefrusdr.log carries the drive model as device_name.
|
||||
if modelName == "" && hdd.PcieSlot > 0 && len(pcieSlotDeviceNames) > 0 {
|
||||
if devName, ok := pcieSlotDeviceNames[hdd.PcieSlot]; ok && devName != "" {
|
||||
modelName = devName
|
||||
}
|
||||
}
|
||||
|
||||
// Enrich serial number from audit.log SN-changed events (supplied via pcieSlotSerials).
|
||||
// BMC asset.json does not carry NVMe serial numbers; audit.log logs every SN change.
|
||||
if serial == "" && hdd.PcieSlot > 0 && len(pcieSlotSerials) > 0 {
|
||||
if sn, ok := pcieSlotSerials[hdd.PcieSlot]; ok && sn != "" {
|
||||
serial = sn
|
||||
}
|
||||
}
|
||||
|
||||
storageType := "HDD"
|
||||
if hdd.DiskInterfaceType == 5 {
|
||||
storageType = "NVMe"
|
||||
@@ -168,35 +204,21 @@ func ParseAssetJSON(content []byte) (*models.HardwareConfig, error) {
|
||||
}
|
||||
|
||||
// Resolve manufacturer: try vendor ID first, then model name extraction
|
||||
modelName := strings.TrimSpace(hdd.ModelName)
|
||||
manufacturer := resolveManufacturer(hdd.Manufacturer, modelName)
|
||||
|
||||
config.Storage = append(config.Storage, models.Storage{
|
||||
Slot: hdd.LocationString,
|
||||
Slot: slot,
|
||||
Type: storageType,
|
||||
Model: modelName,
|
||||
SizeGB: hdd.Capacity,
|
||||
SerialNumber: hdd.SerialNumber,
|
||||
SerialNumber: serial,
|
||||
Manufacturer: manufacturer,
|
||||
Firmware: hdd.FirmwareVersion,
|
||||
Interface: diskInterfaceToString(hdd.DiskInterfaceType),
|
||||
Present: present,
|
||||
})
|
||||
|
||||
// Add HDD firmware to firmware list (deduplicated by model+version)
|
||||
if hdd.FirmwareVersion != "" {
|
||||
fwKey := modelName + ":" + hdd.FirmwareVersion
|
||||
if !seenHDDFW[fwKey] {
|
||||
slot := hdd.LocationString
|
||||
if slot == "" {
|
||||
slot = fmt.Sprintf("%s %dGB", storageType, hdd.Capacity)
|
||||
}
|
||||
config.Firmware = append(config.Firmware, models.FirmwareInfo{
|
||||
DeviceName: fmt.Sprintf("%s (%s)", modelName, slot),
|
||||
Version: hdd.FirmwareVersion,
|
||||
})
|
||||
seenHDDFW[fwKey] = true
|
||||
}
|
||||
}
|
||||
// Disk firmware is already stored in Storage.Firmware — do not duplicate in Hardware.Firmware.
|
||||
}
|
||||
|
||||
// Parse PCIe info
|
||||
@@ -225,25 +247,24 @@ func ParseAssetJSON(content []byte) (*models.HardwareConfig, error) {
|
||||
}
|
||||
// Use device name from PCI IDs database if available
|
||||
if deviceName != "" {
|
||||
device.DeviceClass = deviceName
|
||||
device.DeviceClass = normalizeModelLabel(deviceName)
|
||||
}
|
||||
config.PCIeDevices = append(config.PCIeDevices, device)
|
||||
|
||||
// Extract GPUs (class 3 = display controller)
|
||||
if pcie.ClassCode == 3 {
|
||||
gpuModel := deviceName
|
||||
if gpuModel == "" {
|
||||
gpuModel = pcieClassToString(pcie.ClassCode, pcie.SubClassCode)
|
||||
}
|
||||
gpuModel := normalizeGPUModel(pcie.VendorId, pcie.DeviceId, deviceName, pcie.ClassCode, pcie.SubClassCode)
|
||||
gpu := models.GPU{
|
||||
Slot: pcie.LocString,
|
||||
Model: gpuModel,
|
||||
Manufacturer: vendor,
|
||||
VendorID: pcie.VendorId,
|
||||
DeviceID: pcie.DeviceId,
|
||||
BDF: formatBDF(pcie.BusNumber, pcie.DeviceNumber, pcie.FunctionNumber),
|
||||
LinkWidth: pcie.NegotiatedLinkWidth,
|
||||
LinkSpeed: pcieLinkSpeedToString(pcie.CurrentLinkSpeed),
|
||||
Slot: pcie.LocString,
|
||||
Model: gpuModel,
|
||||
Manufacturer: vendor,
|
||||
VendorID: pcie.VendorId,
|
||||
DeviceID: pcie.DeviceId,
|
||||
BDF: formatBDF(pcie.BusNumber, pcie.DeviceNumber, pcie.FunctionNumber),
|
||||
CurrentLinkWidth: pcie.NegotiatedLinkWidth,
|
||||
CurrentLinkSpeed: pcieLinkSpeedToString(pcie.CurrentLinkSpeed),
|
||||
MaxLinkWidth: pcie.MaxLinkWidth,
|
||||
MaxLinkSpeed: pcieLinkSpeedToString(pcie.MaxLinkSpeed),
|
||||
}
|
||||
if pcie.PartNumber != nil {
|
||||
gpu.PartNumber = strings.TrimSpace(*pcie.PartNumber)
|
||||
@@ -258,6 +279,45 @@ func ParseAssetJSON(content []byte) (*models.HardwareConfig, error) {
|
||||
return config, nil
|
||||
}
|
||||
|
||||
func normalizeModelLabel(v string) string {
|
||||
v = strings.TrimSpace(v)
|
||||
if v == "" {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(strings.Fields(v), " ")
|
||||
}
|
||||
|
||||
func normalizeGPUModel(vendorID, deviceID int, model string, classCode, subClass int) string {
|
||||
model = normalizeModelLabel(model)
|
||||
|
||||
if model == "" || rawHexPCIDeviceRegex.MatchString(model) || isGenericGPUModelLabel(model) {
|
||||
if pciModel := normalizeModelLabel(pciids.DeviceName(vendorID, deviceID)); pciModel != "" {
|
||||
model = pciModel
|
||||
}
|
||||
}
|
||||
|
||||
if model == "" || isGenericGPUModelLabel(model) {
|
||||
model = pcieClassToString(classCode, subClass)
|
||||
}
|
||||
|
||||
// Last fallback for unknown NVIDIA display devices: expose PCI DeviceID
|
||||
// instead of generic "3D Controller".
|
||||
if (model == "" || strings.EqualFold(model, "3D Controller")) && vendorID == 0x10de && deviceID > 0 {
|
||||
return fmt.Sprintf("0x%04X", deviceID)
|
||||
}
|
||||
|
||||
return model
|
||||
}
|
||||
|
||||
func isGenericGPUModelLabel(model string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(model)) {
|
||||
case "", "gpu", "display", "display controller", "vga", "3d controller", "other", "unknown":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func memoryTypeToString(memType int) string {
|
||||
switch memType {
|
||||
case 26:
|
||||
@@ -282,6 +342,29 @@ func diskInterfaceToString(ifType int) string {
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeAssetHDDSlot(locationString string, location int, diskInterfaceType int) string {
|
||||
slot := strings.TrimSpace(locationString)
|
||||
if slot != "" {
|
||||
return slot
|
||||
}
|
||||
if location < 0 {
|
||||
return ""
|
||||
}
|
||||
if diskInterfaceType == 5 {
|
||||
return fmt.Sprintf("OB%02d", location+1)
|
||||
}
|
||||
return fmt.Sprintf("%d", location)
|
||||
}
|
||||
|
||||
func bitmapHasAnyValue(values []int) bool {
|
||||
for _, v := range values {
|
||||
if v != 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func pcieLinkSpeedToString(speed int) string {
|
||||
switch speed {
|
||||
case 1:
|
||||
|
||||
48
internal/parser/vendors/inspur/asset_gpu_model_test.go
vendored
Normal file
48
internal/parser/vendors/inspur/asset_gpu_model_test.go
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
package inspur
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseAssetJSON_NVIDIAGPUModelFromPCIIDs(t *testing.T) {
|
||||
raw := []byte(`{
|
||||
"VersionInfo": [],
|
||||
"CpuInfo": [],
|
||||
"MemInfo": {"MemCommonInfo": [], "DimmInfo": []},
|
||||
"HddInfo": [],
|
||||
"PcieInfo": [{
|
||||
"VendorId": 4318,
|
||||
"DeviceId": 9019,
|
||||
"BusNumber": 12,
|
||||
"DeviceNumber": 0,
|
||||
"FunctionNumber": 0,
|
||||
"MaxLinkWidth": 16,
|
||||
"MaxLinkSpeed": 5,
|
||||
"NegotiatedLinkWidth": 16,
|
||||
"CurrentLinkSpeed": 5,
|
||||
"ClassCode": 3,
|
||||
"SubClassCode": 2,
|
||||
"PcieSlot": 11,
|
||||
"LocString": "#CPU0_PCIE2",
|
||||
"PartNumber": null,
|
||||
"SerialNumber": null,
|
||||
"Mac": []
|
||||
}]
|
||||
}`)
|
||||
|
||||
hw, err := ParseAssetJSON(raw, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseAssetJSON failed: %v", err)
|
||||
}
|
||||
if len(hw.GPUs) != 1 {
|
||||
t.Fatalf("expected 1 GPU, got %d", len(hw.GPUs))
|
||||
}
|
||||
if hw.GPUs[0].Model != "GH100 [H200 NVL]" {
|
||||
t.Fatalf("expected model GH100 [H200 NVL], got %q", hw.GPUs[0].Model)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeGPUModel_FallbackToDeviceIDForUnknownNVIDIA(t *testing.T) {
|
||||
got := normalizeGPUModel(0x10de, 0xbeef, "0xBEEF\t", 3, 2)
|
||||
if got != "0xBEEF" {
|
||||
t.Fatalf("expected 0xBEEF, got %q", got)
|
||||
}
|
||||
}
|
||||
94
internal/parser/vendors/inspur/audit.go
vendored
Normal file
94
internal/parser/vendors/inspur/audit.go
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// auditSNChangedNVMeRegex matches:
|
||||
// "Front Back Plane N NVMe DiskM SN changed from X to Y"
|
||||
// Captures: disk_num, new_serial
|
||||
var auditSNChangedNVMeRegex = regexp.MustCompile(`NVMe Disk(\d+)\s+SN changed from \S+\s+to\s+(\S+)`)
|
||||
|
||||
// auditSNChangedRAIDRegex matches:
|
||||
// "Raid(Pcie Slot:N) HDD(enclosure id:E slot:S) SN changed from X to Y"
|
||||
// Captures: pcie_slot, enclosure_id, slot_num, new_serial
|
||||
var auditSNChangedRAIDRegex = regexp.MustCompile(`Raid\(Pcie Slot:(\d+)\) HDD\(enclosure id:(\d+) slot:(\d+)\)\s+SN changed from \S+\s+to\s+(\S+)`)
|
||||
|
||||
// ParseAuditLogNVMeSerials parses audit.log and returns the final (latest) serial number
|
||||
// per NVMe disk number. The disk number matches the numeric suffix in PCIe location
|
||||
// strings like "#NVME0", "#NVME2", etc. from devicefrusdr.log.
|
||||
// Entries where the serial changed to "NULL" are excluded.
|
||||
func ParseAuditLogNVMeSerials(content []byte) map[int]string {
|
||||
serials := make(map[int]string)
|
||||
|
||||
for _, line := range strings.Split(string(content), "\n") {
|
||||
m := auditSNChangedNVMeRegex.FindStringSubmatch(line)
|
||||
if m == nil {
|
||||
continue
|
||||
}
|
||||
diskNum, err := strconv.Atoi(m[1])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
serial := strings.TrimSpace(m[2])
|
||||
if strings.EqualFold(serial, "NULL") || serial == "" {
|
||||
delete(serials, diskNum)
|
||||
} else {
|
||||
serials[diskNum] = serial
|
||||
}
|
||||
}
|
||||
if len(serials) == 0 {
|
||||
return nil
|
||||
}
|
||||
return serials
|
||||
}
|
||||
|
||||
// ParseAuditLogRAIDSerials parses audit.log and returns the final (latest) serial number
|
||||
// per RAID backplane disk. Key format is "BP{enclosure_id-1}:{slot_num}" (e.g. "BP0:0").
|
||||
//
|
||||
// Each disk slot is claimed by a specific RAID controller (Pcie Slot:N). NULL events from
|
||||
// an old controller do not clear serials assigned by a newer controller, preventing stale
|
||||
// deletions when disks are migrated between RAID arrays.
|
||||
func ParseAuditLogRAIDSerials(content []byte) map[string]string {
|
||||
// owner tracks which PCIe RAID controller slot last assigned a serial to a disk key.
|
||||
serials := make(map[string]string)
|
||||
owner := make(map[string]int)
|
||||
|
||||
for _, line := range strings.Split(string(content), "\n") {
|
||||
m := auditSNChangedRAIDRegex.FindStringSubmatch(line)
|
||||
if m == nil {
|
||||
continue
|
||||
}
|
||||
pcieSlot, err := strconv.Atoi(m[1])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
enclosureID, err := strconv.Atoi(m[2])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
slotNum, err := strconv.Atoi(m[3])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
serial := strings.TrimSpace(m[4])
|
||||
key := fmt.Sprintf("BP%d:%d", enclosureID-1, slotNum)
|
||||
if strings.EqualFold(serial, "NULL") || serial == "" {
|
||||
// Only clear if this controller was the last to set the serial.
|
||||
if owner[key] == pcieSlot {
|
||||
delete(serials, key)
|
||||
delete(owner, key)
|
||||
}
|
||||
} else {
|
||||
serials[key] = serial
|
||||
owner[key] = pcieSlot
|
||||
}
|
||||
}
|
||||
if len(serials) == 0 {
|
||||
return nil
|
||||
}
|
||||
return serials
|
||||
}
|
||||
832
internal/parser/vendors/inspur/component.go
vendored
832
internal/parser/vendors/inspur/component.go
vendored
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser/vendors/pciids"
|
||||
)
|
||||
|
||||
// ParseComponentLog parses component.log file and extracts detailed hardware info
|
||||
@@ -27,6 +28,9 @@ func ParseComponentLog(content []byte, hw *models.HardwareConfig) {
|
||||
// Parse RESTful HDD info
|
||||
parseHDDInfo(text, hw)
|
||||
|
||||
// Parse RESTful diskbackplane info
|
||||
parseDiskBackplaneInfo(text, hw)
|
||||
|
||||
// Parse RESTful Network Adapter info
|
||||
parseNetworkAdapterInfo(text, hw)
|
||||
|
||||
@@ -42,26 +46,38 @@ func ParseComponentLogEvents(content []byte) []models.Event {
|
||||
// Parse RESTful Memory info for Warning/Error status
|
||||
memEvents := parseMemoryEvents(text)
|
||||
events = append(events, memEvents...)
|
||||
events = append(events, parseFanEvents(text)...)
|
||||
|
||||
return events
|
||||
}
|
||||
|
||||
// ParseComponentLogSensors extracts sensor readings from component.log JSON sections.
|
||||
func ParseComponentLogSensors(content []byte) []models.SensorReading {
|
||||
text := string(content)
|
||||
var out []models.SensorReading
|
||||
out = append(out, parseFanSensors(text)...)
|
||||
out = append(out, parseDiskBackplaneSensors(text)...)
|
||||
out = append(out, parsePSUSummarySensors(text)...)
|
||||
return out
|
||||
}
|
||||
|
||||
// MemoryRESTInfo represents the RESTful Memory info structure
|
||||
type MemoryRESTInfo struct {
|
||||
MemModules []struct {
|
||||
MemModID int `json:"mem_mod_id"`
|
||||
ConfigStatus int `json:"config_status"`
|
||||
MemModSlot string `json:"mem_mod_slot"`
|
||||
MemModSize int `json:"mem_mod_size"`
|
||||
MemModType string `json:"mem_mod_type"`
|
||||
MemModTechnology string `json:"mem_mod_technology"`
|
||||
MemModFrequency int `json:"mem_mod_frequency"`
|
||||
MemModCurrentFreq int `json:"mem_mod_current_frequency"`
|
||||
MemModVendor string `json:"mem_mod_vendor"`
|
||||
MemModPartNum string `json:"mem_mod_part_num"`
|
||||
MemModSerial string `json:"mem_mod_serial_num"`
|
||||
MemModRanks int `json:"mem_mod_ranks"`
|
||||
Status string `json:"status"`
|
||||
MemModID int `json:"mem_mod_id"`
|
||||
ConfigStatus int `json:"config_status"`
|
||||
MemModSlot string `json:"mem_mod_slot"`
|
||||
MemModStatus int `json:"mem_mod_status"`
|
||||
MemModSize int `json:"mem_mod_size"`
|
||||
MemModType string `json:"mem_mod_type"`
|
||||
MemModTechnology string `json:"mem_mod_technology"`
|
||||
MemModFrequency int `json:"mem_mod_frequency"`
|
||||
MemModCurrentFreq int `json:"mem_mod_current_frequency"`
|
||||
MemModVendor string `json:"mem_mod_vendor"`
|
||||
MemModPartNum string `json:"mem_mod_part_num"`
|
||||
MemModSerial string `json:"mem_mod_serial_num"`
|
||||
MemModRanks int `json:"mem_mod_ranks"`
|
||||
Status string `json:"status"`
|
||||
} `json:"mem_modules"`
|
||||
TotalMemoryCount int `json:"total_memory_count"`
|
||||
PresentMemoryCount int `json:"present_memory_count"`
|
||||
@@ -84,13 +100,21 @@ func parseMemoryInfo(text string, hw *models.HardwareConfig) {
|
||||
return
|
||||
}
|
||||
|
||||
// Replace memory data with detailed info from component.log
|
||||
hw.Memory = nil
|
||||
var merged []models.MemoryDIMM
|
||||
seen := make(map[string]int)
|
||||
for _, existing := range hw.Memory {
|
||||
key := inspurMemoryKey(existing)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
seen[key] = len(merged)
|
||||
merged = append(merged, existing)
|
||||
}
|
||||
for _, mem := range memInfo.MemModules {
|
||||
hw.Memory = append(hw.Memory, models.MemoryDIMM{
|
||||
item := models.MemoryDIMM{
|
||||
Slot: mem.MemModSlot,
|
||||
Location: mem.MemModSlot,
|
||||
Present: mem.ConfigStatus == 1,
|
||||
Present: mem.MemModStatus == 1 && mem.MemModSize > 0,
|
||||
SizeMB: mem.MemModSize * 1024, // Convert GB to MB
|
||||
Type: mem.MemModType,
|
||||
Technology: strings.TrimSpace(mem.MemModTechnology),
|
||||
@@ -101,28 +125,38 @@ func parseMemoryInfo(text string, hw *models.HardwareConfig) {
|
||||
PartNumber: strings.TrimSpace(mem.MemModPartNum),
|
||||
Status: mem.Status,
|
||||
Ranks: mem.MemModRanks,
|
||||
})
|
||||
}
|
||||
key := inspurMemoryKey(item)
|
||||
if idx, ok := seen[key]; ok {
|
||||
mergeInspurMemoryDIMM(&merged[idx], item)
|
||||
continue
|
||||
}
|
||||
if key != "" {
|
||||
seen[key] = len(merged)
|
||||
}
|
||||
merged = append(merged, item)
|
||||
}
|
||||
hw.Memory = merged
|
||||
}
|
||||
|
||||
// PSURESTInfo represents the RESTful PSU info structure
|
||||
type PSURESTInfo struct {
|
||||
PowerSupplies []struct {
|
||||
ID int `json:"id"`
|
||||
Present int `json:"present"`
|
||||
VendorID string `json:"vendor_id"`
|
||||
Model string `json:"model"`
|
||||
SerialNum string `json:"serial_num"`
|
||||
PartNum string `json:"part_num"`
|
||||
FwVer string `json:"fw_ver"`
|
||||
InputType string `json:"input_type"`
|
||||
Status string `json:"status"`
|
||||
RatedPower int `json:"rated_power"`
|
||||
PSInPower int `json:"ps_in_power"`
|
||||
PSOutPower int `json:"ps_out_power"`
|
||||
PSInVolt float64 `json:"ps_in_volt"`
|
||||
PSOutVolt float64 `json:"ps_out_volt"`
|
||||
PSUMaxTemp int `json:"psu_max_temperature"`
|
||||
ID int `json:"id"`
|
||||
Present int `json:"present"`
|
||||
VendorID string `json:"vendor_id"`
|
||||
Model string `json:"model"`
|
||||
SerialNum string `json:"serial_num"`
|
||||
PartNum string `json:"part_num"`
|
||||
FwVer string `json:"fw_ver"`
|
||||
InputType string `json:"input_type"`
|
||||
Status string `json:"status"`
|
||||
RatedPower int `json:"rated_power"`
|
||||
PSInPower int `json:"ps_in_power"`
|
||||
PSOutPower int `json:"ps_out_power"`
|
||||
PSInVolt float64 `json:"ps_in_volt"`
|
||||
PSOutVolt float64 `json:"ps_out_volt"`
|
||||
PSUMaxTemp int `json:"psu_max_temperature"`
|
||||
} `json:"power_supplies"`
|
||||
PresentPowerReading int `json:"present_power_reading"`
|
||||
}
|
||||
@@ -143,10 +177,18 @@ func parsePSUInfo(text string, hw *models.HardwareConfig) {
|
||||
return
|
||||
}
|
||||
|
||||
// Clear existing PSU data and populate with RESTful data
|
||||
hw.PowerSupply = nil
|
||||
var merged []models.PSU
|
||||
seen := make(map[string]int)
|
||||
for _, existing := range hw.PowerSupply {
|
||||
key := inspurPSUKey(existing)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
seen[key] = len(merged)
|
||||
merged = append(merged, existing)
|
||||
}
|
||||
for _, psu := range psuInfo.PowerSupplies {
|
||||
hw.PowerSupply = append(hw.PowerSupply, models.PSU{
|
||||
item := models.PSU{
|
||||
Slot: fmt.Sprintf("PSU%d", psu.ID),
|
||||
Present: psu.Present == 1,
|
||||
Model: strings.TrimSpace(psu.Model),
|
||||
@@ -162,8 +204,18 @@ func parsePSUInfo(text string, hw *models.HardwareConfig) {
|
||||
InputVoltage: psu.PSInVolt,
|
||||
OutputVoltage: psu.PSOutVolt,
|
||||
TemperatureC: psu.PSUMaxTemp,
|
||||
})
|
||||
}
|
||||
key := inspurPSUKey(item)
|
||||
if idx, ok := seen[key]; ok {
|
||||
mergeInspurPSU(&merged[idx], item)
|
||||
continue
|
||||
}
|
||||
if key != "" {
|
||||
seen[key] = len(merged)
|
||||
}
|
||||
merged = append(merged, item)
|
||||
}
|
||||
hw.PowerSupply = merged
|
||||
}
|
||||
|
||||
// HDDRESTInfo represents the RESTful HDD info structure
|
||||
@@ -205,20 +257,49 @@ func parseHDDInfo(text string, hw *models.HardwareConfig) {
|
||||
})
|
||||
for _, hdd := range hddInfo {
|
||||
if hdd.Present == 1 {
|
||||
hddMap[hdd.LocationString] = struct {
|
||||
slot := strings.TrimSpace(hdd.LocationString)
|
||||
if slot == "" {
|
||||
slot = fmt.Sprintf("HDD%d", hdd.ID)
|
||||
}
|
||||
hddMap[slot] = struct {
|
||||
SN string
|
||||
Model string
|
||||
Firmware string
|
||||
Mfr string
|
||||
}{
|
||||
SN: strings.TrimSpace(hdd.SN),
|
||||
SN: normalizeRedisValue(hdd.SN),
|
||||
Model: strings.TrimSpace(hdd.Model),
|
||||
Firmware: strings.TrimSpace(hdd.Firmware),
|
||||
Firmware: normalizeRedisValue(hdd.Firmware),
|
||||
Mfr: strings.TrimSpace(hdd.Manufacture),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge into existing inventory first (asset/other sections).
|
||||
for i := range hw.Storage {
|
||||
slot := strings.TrimSpace(hw.Storage[i].Slot)
|
||||
if slot == "" {
|
||||
continue
|
||||
}
|
||||
detail, ok := hddMap[slot]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if normalizeRedisValue(hw.Storage[i].SerialNumber) == "" {
|
||||
hw.Storage[i].SerialNumber = detail.SN
|
||||
}
|
||||
if hw.Storage[i].Model == "" {
|
||||
hw.Storage[i].Model = detail.Model
|
||||
}
|
||||
if normalizeRedisValue(hw.Storage[i].Firmware) == "" {
|
||||
hw.Storage[i].Firmware = detail.Firmware
|
||||
}
|
||||
if hw.Storage[i].Manufacturer == "" {
|
||||
hw.Storage[i].Manufacturer = detail.Mfr
|
||||
}
|
||||
hw.Storage[i].Present = true
|
||||
}
|
||||
|
||||
// If storage is empty, populate from HDD info
|
||||
if len(hw.Storage) == 0 {
|
||||
for _, hdd := range hddInfo {
|
||||
@@ -235,21 +316,42 @@ func parseHDDInfo(text string, hw *models.HardwareConfig) {
|
||||
if hdd.CapableSpeed == 12 {
|
||||
iface = "SAS"
|
||||
}
|
||||
slot := strings.TrimSpace(hdd.LocationString)
|
||||
if slot == "" {
|
||||
slot = fmt.Sprintf("HDD%d", hdd.ID)
|
||||
}
|
||||
|
||||
hw.Storage = append(hw.Storage, models.Storage{
|
||||
Slot: hdd.LocationString,
|
||||
Slot: slot,
|
||||
Type: storType,
|
||||
Model: model,
|
||||
SizeGB: hdd.Capacity,
|
||||
SerialNumber: strings.TrimSpace(hdd.SN),
|
||||
SerialNumber: normalizeRedisValue(hdd.SN),
|
||||
Manufacturer: extractStorageManufacturer(model),
|
||||
Firmware: strings.TrimSpace(hdd.Firmware),
|
||||
Firmware: normalizeRedisValue(hdd.Firmware),
|
||||
Interface: iface,
|
||||
Present: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FanRESTInfo represents the RESTful fan info structure.
|
||||
type FanRESTInfo struct {
|
||||
Fans []struct {
|
||||
ID int `json:"id"`
|
||||
FanName string `json:"fan_name"`
|
||||
Present string `json:"present"`
|
||||
Status string `json:"status"`
|
||||
StatusStr string `json:"status_str"`
|
||||
SpeedRPM int `json:"speed_rpm"`
|
||||
SpeedPercent int `json:"speed_percent"`
|
||||
MaxSpeedRPM int `json:"max_speed_rpm"`
|
||||
FanModel string `json:"fan_model"`
|
||||
} `json:"fans"`
|
||||
FansPower int `json:"fans_power"`
|
||||
}
|
||||
|
||||
// NetworkAdapterRESTInfo represents the RESTful Network Adapter info structure
|
||||
type NetworkAdapterRESTInfo struct {
|
||||
SysAdapters []struct {
|
||||
@@ -291,7 +393,16 @@ func parseNetworkAdapterInfo(text string, hw *models.HardwareConfig) {
|
||||
return
|
||||
}
|
||||
|
||||
hw.NetworkAdapters = nil
|
||||
var merged []models.NetworkAdapter
|
||||
seen := make(map[string]int)
|
||||
for _, existing := range hw.NetworkAdapters {
|
||||
key := inspurNICKey(existing)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
seen[key] = len(merged)
|
||||
merged = append(merged, existing)
|
||||
}
|
||||
for _, adapter := range netInfo.SysAdapters {
|
||||
var macs []string
|
||||
for _, port := range adapter.Ports {
|
||||
@@ -300,23 +411,474 @@ func parseNetworkAdapterInfo(text string, hw *models.HardwareConfig) {
|
||||
}
|
||||
}
|
||||
|
||||
hw.NetworkAdapters = append(hw.NetworkAdapters, models.NetworkAdapter{
|
||||
model := normalizeModelLabel(adapter.Model)
|
||||
if model == "" || looksLikeRawDeviceID(model) {
|
||||
if resolved := normalizeModelLabel(pciids.DeviceName(adapter.VendorID, adapter.DeviceID)); resolved != "" {
|
||||
model = resolved
|
||||
}
|
||||
}
|
||||
vendor := normalizeModelLabel(adapter.Vendor)
|
||||
if vendor == "" {
|
||||
vendor = normalizeModelLabel(pciids.VendorName(adapter.VendorID))
|
||||
}
|
||||
|
||||
item := models.NetworkAdapter{
|
||||
Slot: fmt.Sprintf("Slot %d", adapter.Slot),
|
||||
Location: adapter.Location,
|
||||
Present: adapter.Present == 1,
|
||||
Model: strings.TrimSpace(adapter.Model),
|
||||
Vendor: strings.TrimSpace(adapter.Vendor),
|
||||
Model: model,
|
||||
Vendor: vendor,
|
||||
VendorID: adapter.VendorID,
|
||||
DeviceID: adapter.DeviceID,
|
||||
SerialNumber: strings.TrimSpace(adapter.SN),
|
||||
PartNumber: strings.TrimSpace(adapter.PN),
|
||||
Firmware: adapter.FwVer,
|
||||
SerialNumber: normalizeRedisValue(adapter.SN),
|
||||
PartNumber: normalizeRedisValue(adapter.PN),
|
||||
Firmware: normalizeRedisValue(adapter.FwVer),
|
||||
PortCount: adapter.PortNum,
|
||||
PortType: adapter.PortType,
|
||||
MACAddresses: macs,
|
||||
Status: adapter.Status,
|
||||
}
|
||||
key := inspurNICKey(item)
|
||||
if idx, ok := seen[key]; ok {
|
||||
mergeInspurNIC(&merged[idx], item)
|
||||
continue
|
||||
}
|
||||
if slotIdx := inspurFindNICBySlot(merged, item.Slot); slotIdx >= 0 {
|
||||
mergeInspurNIC(&merged[slotIdx], item)
|
||||
if key != "" {
|
||||
seen[key] = slotIdx
|
||||
}
|
||||
continue
|
||||
}
|
||||
if key != "" {
|
||||
seen[key] = len(merged)
|
||||
}
|
||||
merged = append(merged, item)
|
||||
}
|
||||
hw.NetworkAdapters = merged
|
||||
}
|
||||
|
||||
func inspurMemoryKey(item models.MemoryDIMM) string {
|
||||
return strings.ToLower(strings.TrimSpace(inspurFirstNonEmpty(item.SerialNumber, item.Slot, item.Location)))
|
||||
}
|
||||
|
||||
func mergeInspurMemoryDIMM(dst *models.MemoryDIMM, src models.MemoryDIMM) {
|
||||
if dst == nil {
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(dst.Slot) == "" {
|
||||
dst.Slot = src.Slot
|
||||
}
|
||||
if strings.TrimSpace(dst.Location) == "" {
|
||||
dst.Location = src.Location
|
||||
}
|
||||
dst.Present = dst.Present || src.Present
|
||||
if dst.SizeMB == 0 {
|
||||
dst.SizeMB = src.SizeMB
|
||||
}
|
||||
if strings.TrimSpace(dst.Type) == "" {
|
||||
dst.Type = src.Type
|
||||
}
|
||||
if strings.TrimSpace(dst.Technology) == "" {
|
||||
dst.Technology = src.Technology
|
||||
}
|
||||
if dst.MaxSpeedMHz == 0 {
|
||||
dst.MaxSpeedMHz = src.MaxSpeedMHz
|
||||
}
|
||||
if dst.CurrentSpeedMHz == 0 {
|
||||
dst.CurrentSpeedMHz = src.CurrentSpeedMHz
|
||||
}
|
||||
if strings.TrimSpace(dst.Manufacturer) == "" {
|
||||
dst.Manufacturer = src.Manufacturer
|
||||
}
|
||||
if strings.TrimSpace(dst.SerialNumber) == "" {
|
||||
dst.SerialNumber = src.SerialNumber
|
||||
}
|
||||
if strings.TrimSpace(dst.PartNumber) == "" {
|
||||
dst.PartNumber = src.PartNumber
|
||||
}
|
||||
if strings.TrimSpace(dst.Status) == "" {
|
||||
dst.Status = src.Status
|
||||
}
|
||||
if dst.Ranks == 0 {
|
||||
dst.Ranks = src.Ranks
|
||||
}
|
||||
}
|
||||
|
||||
func inspurPSUKey(item models.PSU) string {
|
||||
return strings.ToLower(strings.TrimSpace(inspurFirstNonEmpty(item.SerialNumber, item.Slot, item.Model)))
|
||||
}
|
||||
|
||||
func mergeInspurPSU(dst *models.PSU, src models.PSU) {
|
||||
if dst == nil {
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(dst.Slot) == "" {
|
||||
dst.Slot = src.Slot
|
||||
}
|
||||
dst.Present = dst.Present || src.Present
|
||||
if strings.TrimSpace(dst.Model) == "" {
|
||||
dst.Model = src.Model
|
||||
}
|
||||
if strings.TrimSpace(dst.Vendor) == "" {
|
||||
dst.Vendor = src.Vendor
|
||||
}
|
||||
if dst.WattageW == 0 {
|
||||
dst.WattageW = src.WattageW
|
||||
}
|
||||
if strings.TrimSpace(dst.SerialNumber) == "" {
|
||||
dst.SerialNumber = src.SerialNumber
|
||||
}
|
||||
if strings.TrimSpace(dst.PartNumber) == "" {
|
||||
dst.PartNumber = src.PartNumber
|
||||
}
|
||||
if strings.TrimSpace(dst.Firmware) == "" {
|
||||
dst.Firmware = src.Firmware
|
||||
}
|
||||
if strings.TrimSpace(dst.Status) == "" {
|
||||
dst.Status = src.Status
|
||||
}
|
||||
if strings.TrimSpace(dst.InputType) == "" {
|
||||
dst.InputType = src.InputType
|
||||
}
|
||||
if dst.InputPowerW == 0 {
|
||||
dst.InputPowerW = src.InputPowerW
|
||||
}
|
||||
if dst.OutputPowerW == 0 {
|
||||
dst.OutputPowerW = src.OutputPowerW
|
||||
}
|
||||
if dst.InputVoltage == 0 {
|
||||
dst.InputVoltage = src.InputVoltage
|
||||
}
|
||||
if dst.OutputVoltage == 0 {
|
||||
dst.OutputVoltage = src.OutputVoltage
|
||||
}
|
||||
if dst.TemperatureC == 0 {
|
||||
dst.TemperatureC = src.TemperatureC
|
||||
}
|
||||
}
|
||||
|
||||
func inspurNICKey(item models.NetworkAdapter) string {
|
||||
return strings.ToLower(strings.TrimSpace(inspurFirstNonEmpty(item.SerialNumber, strings.Join(item.MACAddresses, ","), item.Slot, item.Location)))
|
||||
}
|
||||
|
||||
func mergeInspurNIC(dst *models.NetworkAdapter, src models.NetworkAdapter) {
|
||||
if dst == nil {
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(dst.Slot) == "" {
|
||||
dst.Slot = src.Slot
|
||||
}
|
||||
if strings.TrimSpace(dst.Location) == "" {
|
||||
dst.Location = src.Location
|
||||
}
|
||||
dst.Present = dst.Present || src.Present
|
||||
if strings.TrimSpace(dst.BDF) == "" {
|
||||
dst.BDF = src.BDF
|
||||
}
|
||||
if strings.TrimSpace(dst.Model) == "" {
|
||||
dst.Model = src.Model
|
||||
}
|
||||
if strings.TrimSpace(dst.Description) == "" {
|
||||
dst.Description = src.Description
|
||||
}
|
||||
if strings.TrimSpace(dst.Vendor) == "" {
|
||||
dst.Vendor = src.Vendor
|
||||
}
|
||||
if dst.VendorID == 0 {
|
||||
dst.VendorID = src.VendorID
|
||||
}
|
||||
if dst.DeviceID == 0 {
|
||||
dst.DeviceID = src.DeviceID
|
||||
}
|
||||
if strings.TrimSpace(dst.SerialNumber) == "" {
|
||||
dst.SerialNumber = src.SerialNumber
|
||||
}
|
||||
if strings.TrimSpace(dst.PartNumber) == "" {
|
||||
dst.PartNumber = src.PartNumber
|
||||
}
|
||||
if strings.TrimSpace(dst.Firmware) == "" {
|
||||
dst.Firmware = src.Firmware
|
||||
}
|
||||
if dst.PortCount == 0 {
|
||||
dst.PortCount = src.PortCount
|
||||
}
|
||||
if strings.TrimSpace(dst.PortType) == "" {
|
||||
dst.PortType = src.PortType
|
||||
}
|
||||
if dst.LinkWidth == 0 {
|
||||
dst.LinkWidth = src.LinkWidth
|
||||
}
|
||||
if strings.TrimSpace(dst.LinkSpeed) == "" {
|
||||
dst.LinkSpeed = src.LinkSpeed
|
||||
}
|
||||
if dst.MaxLinkWidth == 0 {
|
||||
dst.MaxLinkWidth = src.MaxLinkWidth
|
||||
}
|
||||
if strings.TrimSpace(dst.MaxLinkSpeed) == "" {
|
||||
dst.MaxLinkSpeed = src.MaxLinkSpeed
|
||||
}
|
||||
if dst.NUMANode == 0 {
|
||||
dst.NUMANode = src.NUMANode
|
||||
}
|
||||
if strings.TrimSpace(dst.Status) == "" {
|
||||
dst.Status = src.Status
|
||||
}
|
||||
for _, mac := range src.MACAddresses {
|
||||
mac = strings.TrimSpace(mac)
|
||||
if mac == "" {
|
||||
continue
|
||||
}
|
||||
found := false
|
||||
for _, existing := range dst.MACAddresses {
|
||||
if strings.EqualFold(strings.TrimSpace(existing), mac) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
dst.MACAddresses = append(dst.MACAddresses, mac)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func inspurFindNICBySlot(items []models.NetworkAdapter, slot string) int {
|
||||
slot = strings.ToLower(strings.TrimSpace(slot))
|
||||
if slot == "" {
|
||||
return -1
|
||||
}
|
||||
for i := range items {
|
||||
if strings.ToLower(strings.TrimSpace(items[i].Slot)) == slot {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func inspurFirstNonEmpty(values ...string) string {
|
||||
for _, value := range values {
|
||||
if strings.TrimSpace(value) != "" {
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func parseFanSensors(text string) []models.SensorReading {
|
||||
re := regexp.MustCompile(`RESTful fan info:\s*(\{[\s\S]*?\})\s*RESTful diskbackplane`)
|
||||
match := re.FindStringSubmatch(text)
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
jsonStr := strings.ReplaceAll(match[1], "\n", "")
|
||||
var fanInfo FanRESTInfo
|
||||
if err := json.Unmarshal([]byte(jsonStr), &fanInfo); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
out := make([]models.SensorReading, 0, len(fanInfo.Fans)+1)
|
||||
for _, fan := range fanInfo.Fans {
|
||||
name := strings.TrimSpace(fan.FanName)
|
||||
if name == "" {
|
||||
name = fmt.Sprintf("FAN%d", fan.ID)
|
||||
}
|
||||
status := normalizeComponentStatus(fan.StatusStr, fan.Status, fan.Present)
|
||||
raw := fmt.Sprintf("rpm=%d pct=%d model=%s max_rpm=%d", fan.SpeedRPM, fan.SpeedPercent, fan.FanModel, fan.MaxSpeedRPM)
|
||||
out = append(out, models.SensorReading{
|
||||
Name: name,
|
||||
Type: "fan_speed",
|
||||
Value: float64(fan.SpeedRPM),
|
||||
Unit: "RPM",
|
||||
RawValue: raw,
|
||||
Status: status,
|
||||
})
|
||||
}
|
||||
|
||||
if fanInfo.FansPower > 0 {
|
||||
out = append(out, models.SensorReading{
|
||||
Name: "Fans_Power",
|
||||
Type: "power",
|
||||
Value: float64(fanInfo.FansPower),
|
||||
Unit: "W",
|
||||
RawValue: fmt.Sprintf("%d", fanInfo.FansPower),
|
||||
Status: "OK",
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func parseFanEvents(text string) []models.Event {
|
||||
re := regexp.MustCompile(`RESTful fan info:\s*(\{[\s\S]*?\})\s*RESTful diskbackplane`)
|
||||
match := re.FindStringSubmatch(text)
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
jsonStr := strings.ReplaceAll(match[1], "\n", "")
|
||||
var fanInfo FanRESTInfo
|
||||
if err := json.Unmarshal([]byte(jsonStr), &fanInfo); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var events []models.Event
|
||||
for _, fan := range fanInfo.Fans {
|
||||
status := normalizeComponentStatus(fan.StatusStr, fan.Status, fan.Present)
|
||||
if isHealthyComponentStatus(status) {
|
||||
continue
|
||||
}
|
||||
|
||||
name := strings.TrimSpace(fan.FanName)
|
||||
if name == "" {
|
||||
name = fmt.Sprintf("FAN%d", fan.ID)
|
||||
}
|
||||
|
||||
severity := models.SeverityWarning
|
||||
lowStatus := strings.ToLower(status)
|
||||
if strings.Contains(lowStatus, "critical") || strings.Contains(lowStatus, "fail") || strings.Contains(lowStatus, "error") {
|
||||
severity = models.SeverityCritical
|
||||
}
|
||||
|
||||
events = append(events, models.Event{
|
||||
ID: fmt.Sprintf("fan_%d_status", fan.ID),
|
||||
Timestamp: time.Now(),
|
||||
Source: "Fan",
|
||||
SensorType: "fan",
|
||||
SensorName: name,
|
||||
EventType: "Fan Status",
|
||||
Severity: severity,
|
||||
Description: fmt.Sprintf("%s reports %s", name, status),
|
||||
RawData: fmt.Sprintf("rpm=%d pct=%d model=%s", fan.SpeedRPM, fan.SpeedPercent, fan.FanModel),
|
||||
})
|
||||
}
|
||||
|
||||
return events
|
||||
}
|
||||
|
||||
func parseDiskBackplaneSensors(text string) []models.SensorReading {
|
||||
re := regexp.MustCompile(`RESTful diskbackplane info:\s*(\[[\s\S]*?\])\s*BMC`)
|
||||
match := re.FindStringSubmatch(text)
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
jsonStr := strings.ReplaceAll(match[1], "\n", "")
|
||||
var backplaneInfo DiskBackplaneRESTInfo
|
||||
if err := json.Unmarshal([]byte(jsonStr), &backplaneInfo); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
out := make([]models.SensorReading, 0, len(backplaneInfo))
|
||||
for _, bp := range backplaneInfo {
|
||||
if bp.Present != 1 {
|
||||
continue
|
||||
}
|
||||
name := fmt.Sprintf("Backplane%d_Temp", bp.BackplaneIndex)
|
||||
status := "OK"
|
||||
if bp.Temperature <= 0 {
|
||||
status = "unknown"
|
||||
}
|
||||
raw := fmt.Sprintf("front=%d ports=%d drives=%d cpld=%s", bp.Front, bp.PortCount, bp.DriverCount, bp.CPLDVersion)
|
||||
out = append(out, models.SensorReading{
|
||||
Name: name,
|
||||
Type: "temperature",
|
||||
Value: float64(bp.Temperature),
|
||||
Unit: "C",
|
||||
RawValue: raw,
|
||||
Status: status,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func parsePSUSummarySensors(text string) []models.SensorReading {
|
||||
re := regexp.MustCompile(`RESTful PSU info:\s*(\{[\s\S]*?\})\s*RESTful Network`)
|
||||
match := re.FindStringSubmatch(text)
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
jsonStr := strings.ReplaceAll(match[1], "\n", "")
|
||||
var psuInfo PSURESTInfo
|
||||
if err := json.Unmarshal([]byte(jsonStr), &psuInfo); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
out := make([]models.SensorReading, 0, len(psuInfo.PowerSupplies)*3+1)
|
||||
if psuInfo.PresentPowerReading > 0 {
|
||||
out = append(out, models.SensorReading{
|
||||
Name: "PSU_Present_Power_Reading",
|
||||
Type: "power",
|
||||
Value: float64(psuInfo.PresentPowerReading),
|
||||
Unit: "W",
|
||||
RawValue: fmt.Sprintf("%d", psuInfo.PresentPowerReading),
|
||||
Status: "OK",
|
||||
})
|
||||
}
|
||||
|
||||
for _, psu := range psuInfo.PowerSupplies {
|
||||
if psu.Present != 1 {
|
||||
continue
|
||||
}
|
||||
status := normalizeComponentStatus(psu.Status)
|
||||
out = append(out, models.SensorReading{
|
||||
Name: fmt.Sprintf("PSU%d_InputPower", psu.ID),
|
||||
Type: "power",
|
||||
Value: float64(psu.PSInPower),
|
||||
Unit: "W",
|
||||
RawValue: fmt.Sprintf("%d", psu.PSInPower),
|
||||
Status: status,
|
||||
})
|
||||
out = append(out, models.SensorReading{
|
||||
Name: fmt.Sprintf("PSU%d_OutputPower", psu.ID),
|
||||
Type: "power",
|
||||
Value: float64(psu.PSOutPower),
|
||||
Unit: "W",
|
||||
RawValue: fmt.Sprintf("%d", psu.PSOutPower),
|
||||
Status: status,
|
||||
})
|
||||
out = append(out, models.SensorReading{
|
||||
Name: fmt.Sprintf("PSU%d_Temp", psu.ID),
|
||||
Type: "temperature",
|
||||
Value: float64(psu.PSUMaxTemp),
|
||||
Unit: "C",
|
||||
RawValue: fmt.Sprintf("%d", psu.PSUMaxTemp),
|
||||
Status: status,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeComponentStatus(values ...string) string {
|
||||
for _, v := range values {
|
||||
s := strings.TrimSpace(v)
|
||||
if s == "" {
|
||||
continue
|
||||
}
|
||||
return s
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func isHealthyComponentStatus(status string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(status)) {
|
||||
case "", "ok", "normal", "present", "enabled":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
var rawDeviceIDLikeRegex = regexp.MustCompile(`(?i)^(?:0x)?[0-9a-f]{3,4}$`)
|
||||
|
||||
func looksLikeRawDeviceID(v string) bool {
|
||||
v = strings.TrimSpace(v)
|
||||
if v == "" {
|
||||
return true
|
||||
}
|
||||
return rawDeviceIDLikeRegex.MatchString(v)
|
||||
}
|
||||
|
||||
func parseMemoryEvents(text string) []models.Event {
|
||||
@@ -419,4 +981,174 @@ func extractComponentFirmware(text string, hw *models.HardwareConfig) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract BMC, CPLD and VR firmware from RESTful version info section.
|
||||
// The JSON is a flat array: [{"id":N,"dev_name":"...","dev_version":"..."}, ...]
|
||||
reVer := regexp.MustCompile(`RESTful version info:\s*(\[[\s\S]*?\])\s*RESTful`)
|
||||
if match := reVer.FindStringSubmatch(text); match != nil {
|
||||
type verEntry struct {
|
||||
DevName string `json:"dev_name"`
|
||||
DevVersion string `json:"dev_version"`
|
||||
}
|
||||
var entries []verEntry
|
||||
if err := json.Unmarshal([]byte(match[1]), &entries); err == nil {
|
||||
for _, e := range entries {
|
||||
name := normalizeVersionInfoName(e.DevName)
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
version := strings.TrimSpace(e.DevVersion)
|
||||
if version == "" {
|
||||
continue
|
||||
}
|
||||
if existingFW[name] {
|
||||
continue
|
||||
}
|
||||
hw.Firmware = append(hw.Firmware, models.FirmwareInfo{
|
||||
DeviceName: name,
|
||||
Version: version,
|
||||
})
|
||||
existingFW[name] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// normalizeVersionInfoName converts RESTful version info dev_name to a clean label.
|
||||
// Returns "" for entries that should be skipped (inactive BMC, PSU slots).
|
||||
func normalizeVersionInfoName(name string) string {
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" {
|
||||
return ""
|
||||
}
|
||||
// Skip PSU_N entries — firmware already extracted from PSU info section.
|
||||
if regexp.MustCompile(`(?i)^PSU_\d+$`).MatchString(name) {
|
||||
return ""
|
||||
}
|
||||
// Skip the inactive BMC partition.
|
||||
if strings.HasPrefix(strings.ToLower(name), "inactivate(") {
|
||||
return ""
|
||||
}
|
||||
// Active BMC: "Activate(BMC1)" → "BMC"
|
||||
if strings.HasPrefix(strings.ToLower(name), "activate(") {
|
||||
return "BMC"
|
||||
}
|
||||
// Strip trailing "Version" suffix (case-insensitive), e.g. "MainBoard0CPLDVersion" → "MainBoard0CPLD"
|
||||
if strings.HasSuffix(strings.ToLower(name), "version") {
|
||||
name = name[:len(name)-len("version")]
|
||||
}
|
||||
return strings.TrimSpace(name)
|
||||
}
|
||||
|
||||
// DiskBackplaneRESTInfo represents the RESTful diskbackplane info structure
|
||||
type DiskBackplaneRESTInfo []struct {
|
||||
PortCount int `json:"port_count"`
|
||||
DriverCount int `json:"driver_count"`
|
||||
Front int `json:"front"`
|
||||
BackplaneIndex int `json:"backplane_index"`
|
||||
Present int `json:"present"`
|
||||
CPLDVersion string `json:"cpld_version"`
|
||||
Temperature int `json:"temperature"`
|
||||
}
|
||||
|
||||
func parseDiskBackplaneInfo(text string, hw *models.HardwareConfig) {
|
||||
// Find RESTful diskbackplane info section
|
||||
re := regexp.MustCompile(`RESTful diskbackplane info:\s*(\[[\s\S]*?\])\s*BMC`)
|
||||
match := re.FindStringSubmatch(text)
|
||||
if match == nil {
|
||||
return
|
||||
}
|
||||
|
||||
jsonStr := match[1]
|
||||
jsonStr = strings.ReplaceAll(jsonStr, "\n", "")
|
||||
|
||||
var backplaneInfo DiskBackplaneRESTInfo
|
||||
if err := json.Unmarshal([]byte(jsonStr), &backplaneInfo); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
presentByBackplane := make(map[int]int)
|
||||
totalPresent := 0
|
||||
for _, bp := range backplaneInfo {
|
||||
if bp.Present != 1 {
|
||||
continue
|
||||
}
|
||||
if bp.DriverCount <= 0 {
|
||||
continue
|
||||
}
|
||||
limit := bp.DriverCount
|
||||
if bp.PortCount > 0 && limit > bp.PortCount {
|
||||
limit = bp.PortCount
|
||||
}
|
||||
presentByBackplane[bp.BackplaneIndex] = limit
|
||||
totalPresent += limit
|
||||
}
|
||||
|
||||
if totalPresent == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
existingPresent := countPresentStorage(hw.Storage)
|
||||
remaining := totalPresent - existingPresent
|
||||
if remaining <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for _, bp := range backplaneInfo {
|
||||
if bp.Present != 1 || remaining <= 0 {
|
||||
continue
|
||||
}
|
||||
driveCount := presentByBackplane[bp.BackplaneIndex]
|
||||
if driveCount <= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
location := "Rear"
|
||||
if bp.Front == 1 {
|
||||
location = "Front"
|
||||
}
|
||||
|
||||
for i := 0; i < driveCount && remaining > 0; i++ {
|
||||
slot := fmt.Sprintf("BP%d:%d", bp.BackplaneIndex, i)
|
||||
if hasStorageSlot(hw.Storage, slot) {
|
||||
continue
|
||||
}
|
||||
|
||||
hw.Storage = append(hw.Storage, models.Storage{
|
||||
Slot: slot,
|
||||
Present: true,
|
||||
Location: location,
|
||||
BackplaneID: bp.BackplaneIndex,
|
||||
Type: "HDD",
|
||||
})
|
||||
remaining--
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func countPresentStorage(storage []models.Storage) int {
|
||||
count := 0
|
||||
for _, dev := range storage {
|
||||
if dev.Present {
|
||||
count++
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(dev.Slot) != "" && (normalizeRedisValue(dev.Model) != "" || normalizeRedisValue(dev.SerialNumber) != "" || dev.SizeGB > 0) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func hasStorageSlot(storage []models.Storage, slot string) bool {
|
||||
slot = strings.ToLower(strings.TrimSpace(slot))
|
||||
if slot == "" {
|
||||
return false
|
||||
}
|
||||
for _, dev := range storage {
|
||||
if strings.ToLower(strings.TrimSpace(dev.Slot)) == slot {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
224
internal/parser/vendors/inspur/component_test.go
vendored
Normal file
224
internal/parser/vendors/inspur/component_test.go
vendored
Normal file
@@ -0,0 +1,224 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func TestParseNetworkAdapterInfo_ResolvesModelFromPCIIDsForRawHexModel(t *testing.T) {
|
||||
text := `RESTful Network Adapter info:
|
||||
{
|
||||
"sys_adapters": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "NIC1",
|
||||
"Location": "#CPU0_PCIE4",
|
||||
"present": 1,
|
||||
"slot": 4,
|
||||
"vendor_id": 32902,
|
||||
"device_id": 5409,
|
||||
"vendor": "",
|
||||
"model": "0x1521",
|
||||
"fw_ver": "",
|
||||
"status": "OK",
|
||||
"sn": "",
|
||||
"pn": "",
|
||||
"port_num": 4,
|
||||
"port_type": "Base-T",
|
||||
"ports": []
|
||||
}
|
||||
]
|
||||
}
|
||||
RESTful fan`
|
||||
|
||||
hw := &models.HardwareConfig{}
|
||||
parseNetworkAdapterInfo(text, hw)
|
||||
|
||||
if len(hw.NetworkAdapters) != 1 {
|
||||
t.Fatalf("expected 1 network adapter, got %d", len(hw.NetworkAdapters))
|
||||
}
|
||||
got := hw.NetworkAdapters[0]
|
||||
if got.Model == "" {
|
||||
t.Fatalf("expected NIC model resolved from pci.ids, got empty")
|
||||
}
|
||||
if !strings.Contains(strings.ToUpper(got.Model), "I350") {
|
||||
t.Fatalf("expected I350 in model, got %q", got.Model)
|
||||
}
|
||||
if got.Vendor == "" {
|
||||
t.Fatalf("expected NIC vendor resolved from pci.ids")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNetworkAdapterInfo_MergesIntoExistingInventory(t *testing.T) {
|
||||
text := `RESTful Network Adapter info:
|
||||
{
|
||||
"sys_adapters": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "NIC1",
|
||||
"Location": "#CPU0_PCIE4",
|
||||
"present": 1,
|
||||
"slot": 4,
|
||||
"vendor_id": 32902,
|
||||
"device_id": 5409,
|
||||
"vendor": "Mellanox",
|
||||
"model": "ConnectX-6",
|
||||
"fw_ver": "22.1.0",
|
||||
"status": "OK",
|
||||
"sn": "",
|
||||
"pn": "",
|
||||
"port_num": 2,
|
||||
"port_type": "QSFP",
|
||||
"ports": [
|
||||
{ "id": 1, "mac_addr": "00:11:22:33:44:55" }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
RESTful fan`
|
||||
|
||||
hw := &models.HardwareConfig{
|
||||
NetworkAdapters: []models.NetworkAdapter{
|
||||
{
|
||||
Slot: "Slot 4",
|
||||
BDF: "0000:17:00.0",
|
||||
SerialNumber: "NIC-SN-1",
|
||||
Present: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
parseNetworkAdapterInfo(text, hw)
|
||||
|
||||
if len(hw.NetworkAdapters) != 1 {
|
||||
t.Fatalf("expected merged single adapter, got %d", len(hw.NetworkAdapters))
|
||||
}
|
||||
got := hw.NetworkAdapters[0]
|
||||
if got.BDF != "0000:17:00.0" {
|
||||
t.Fatalf("expected existing BDF to survive merge, got %q", got.BDF)
|
||||
}
|
||||
if got.Model != "ConnectX-6" {
|
||||
t.Fatalf("expected model from component log, got %q", got.Model)
|
||||
}
|
||||
if got.SerialNumber != "NIC-SN-1" {
|
||||
t.Fatalf("expected serial from existing inventory to survive merge, got %q", got.SerialNumber)
|
||||
}
|
||||
if len(got.MACAddresses) != 1 || got.MACAddresses[0] != "00:11:22:33:44:55" {
|
||||
t.Fatalf("expected MAC addresses from component log, got %#v", got.MACAddresses)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseComponentLogSensors_ExtractsFanBackplaneAndPSUSummary(t *testing.T) {
|
||||
text := `RESTful PSU info:
|
||||
{
|
||||
"power_supplies": [
|
||||
{ "id": 0, "present": 1, "status": "OK", "ps_in_power": 123, "ps_out_power": 110, "psu_max_temperature": 41 }
|
||||
],
|
||||
"present_power_reading": 999
|
||||
}
|
||||
RESTful Network Adapter info:
|
||||
{ "sys_adapters": [] }
|
||||
RESTful fan info:
|
||||
{
|
||||
"fans": [
|
||||
{ "id": 1, "fan_name": "FAN0_F_Speed", "present": "OK", "status": "OK", "status_str": "OK", "speed_rpm": 9200, "speed_percent": 35, "max_speed_rpm": 20000, "fan_model": "6056" }
|
||||
],
|
||||
"fans_power": 33
|
||||
}
|
||||
RESTful diskbackplane info:
|
||||
[
|
||||
{ "port_count": 8, "driver_count": 4, "front": 1, "backplane_index": 0, "present": 1, "cpld_version": "3.1", "temperature": 18 }
|
||||
]
|
||||
BMC`
|
||||
|
||||
sensors := ParseComponentLogSensors([]byte(text))
|
||||
if len(sensors) == 0 {
|
||||
t.Fatalf("expected sensors from component.log, got none")
|
||||
}
|
||||
|
||||
has := func(name string) bool {
|
||||
for _, s := range sensors {
|
||||
if s.Name == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
if !has("FAN0_F_Speed") {
|
||||
t.Fatalf("expected FAN0_F_Speed sensor in parsed output")
|
||||
}
|
||||
if !has("Backplane0_Temp") {
|
||||
t.Fatalf("expected Backplane0_Temp sensor in parsed output")
|
||||
}
|
||||
if !has("PSU_Present_Power_Reading") {
|
||||
t.Fatalf("expected PSU_Present_Power_Reading sensor in parsed output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseComponentLogEvents_FanCriticalStatus(t *testing.T) {
|
||||
text := `RESTful fan info:
|
||||
{
|
||||
"fans": [
|
||||
{ "id": 7, "fan_name": "FAN3_R_Speed", "present": "OK", "status": "Critical", "status_str": "Critical", "speed_rpm": 0, "speed_percent": 0, "max_speed_rpm": 20000, "fan_model": "6056" }
|
||||
],
|
||||
"fans_power": 0
|
||||
}
|
||||
RESTful diskbackplane info:
|
||||
[]
|
||||
BMC`
|
||||
|
||||
events := ParseComponentLogEvents([]byte(text))
|
||||
if len(events) != 1 {
|
||||
t.Fatalf("expected 1 fan event, got %d", len(events))
|
||||
}
|
||||
if events[0].EventType != "Fan Status" {
|
||||
t.Fatalf("expected Fan Status event type, got %q", events[0].EventType)
|
||||
}
|
||||
if events[0].Severity != models.SeverityCritical {
|
||||
t.Fatalf("expected critical severity, got %q", events[0].Severity)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseHDDInfo_MergesIntoExistingStorage(t *testing.T) {
|
||||
text := `RESTful HDD info:
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"present": 1,
|
||||
"enable": 1,
|
||||
"SN": "SER123",
|
||||
"model": "Sample SSD",
|
||||
"capacity": 1024,
|
||||
"manufacture": "ACME",
|
||||
"firmware": "1.0.0",
|
||||
"locationstring": "OB01",
|
||||
"capablespeed": 6
|
||||
}
|
||||
]
|
||||
RESTful PSU`
|
||||
|
||||
hw := &models.HardwareConfig{
|
||||
Storage: []models.Storage{
|
||||
{
|
||||
Slot: "OB01",
|
||||
Type: "SSD",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
parseHDDInfo(text, hw)
|
||||
if len(hw.Storage) != 1 {
|
||||
t.Fatalf("expected 1 storage item, got %d", len(hw.Storage))
|
||||
}
|
||||
if hw.Storage[0].SerialNumber != "SER123" {
|
||||
t.Fatalf("expected serial from HDD section, got %q", hw.Storage[0].SerialNumber)
|
||||
}
|
||||
if hw.Storage[0].Model != "Sample SSD" {
|
||||
t.Fatalf("expected model from HDD section, got %q", hw.Storage[0].Model)
|
||||
}
|
||||
if hw.Storage[0].Firmware != "1.0.0" {
|
||||
t.Fatalf("expected firmware from HDD section, got %q", hw.Storage[0].Firmware)
|
||||
}
|
||||
}
|
||||
33
internal/parser/vendors/inspur/event_logs_test.go
vendored
Normal file
33
internal/parser/vendors/inspur/event_logs_test.go
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
package inspur
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseIDLLog_UsesBMCSourceForEventLogs(t *testing.T) {
|
||||
content := []byte(`|2025-12-02T17:54:27+08:00|MEMORY|Assert|Warning|0C180401|CPU1_C4D0 Memory Device Disabled - Assert|`)
|
||||
|
||||
events := ParseIDLLog(content)
|
||||
if len(events) != 1 {
|
||||
t.Fatalf("expected 1 event, got %d", len(events))
|
||||
}
|
||||
if events[0].Source != "BMC" {
|
||||
t.Fatalf("expected IDL events to use BMC source, got %#v", events[0])
|
||||
}
|
||||
if events[0].SensorName != "CPU1_C4D0" {
|
||||
t.Fatalf("expected extracted DIMM component ref, got %#v", events[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSyslog_UsesHostSourceAndProcessAsSensorName(t *testing.T) {
|
||||
content := []byte(`<13>2026-03-15T14:03:11+00:00 host123 systemd[1]: Started Example Service`)
|
||||
|
||||
events := ParseSyslog(content, "syslog/info")
|
||||
if len(events) != 1 {
|
||||
t.Fatalf("expected 1 event, got %d", len(events))
|
||||
}
|
||||
if events[0].Source != "syslog" {
|
||||
t.Fatalf("expected syslog source, got %#v", events[0])
|
||||
}
|
||||
if events[0].SensorName != "systemd[1]" {
|
||||
t.Fatalf("expected process name in sensor/component slot, got %#v", events[0])
|
||||
}
|
||||
}
|
||||
67
internal/parser/vendors/inspur/fru.go
vendored
67
internal/parser/vendors/inspur/fru.go
vendored
@@ -9,8 +9,9 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
fruDeviceRegex = regexp.MustCompile(`^FRU Device Description\s*:\s*(.+)$`)
|
||||
fruFieldRegex = regexp.MustCompile(`^\s+(.+?)\s*:\s*(.*)$`)
|
||||
fruDeviceRegex = regexp.MustCompile(`^FRU Device Description\s*:\s*(.+)$`)
|
||||
fruFieldRegex = regexp.MustCompile(`^\s+(.+?)\s*:\s*(.*)$`)
|
||||
platformIdRegex = regexp.MustCompile(`(?i)PlatformId\s*=\s*(\S+)`)
|
||||
)
|
||||
|
||||
// ParseFRU parses BMC FRU (Field Replaceable Unit) output
|
||||
@@ -95,3 +96,65 @@ func ParseFRU(content []byte) []models.FRUInfo {
|
||||
|
||||
return fruList
|
||||
}
|
||||
|
||||
// extractBoardInfo extracts main board/chassis information from FRU data
|
||||
func extractBoardInfo(fruList []models.FRUInfo, hw *models.HardwareConfig) {
|
||||
if hw == nil || len(fruList) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Look for the main board/chassis FRU entry.
|
||||
// Keep the first non-empty serial as the server serial and avoid overwriting it
|
||||
// with module-specific serials (e.g., SCM_FRU).
|
||||
for _, fru := range fruList {
|
||||
// Skip empty entries
|
||||
if fru.ProductName == "" && fru.SerialNumber == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Prioritize entries that look like main board info
|
||||
desc := strings.ToLower(fru.Description)
|
||||
isMainBoard := strings.Contains(desc, "builtin") ||
|
||||
strings.Contains(desc, "fru device") ||
|
||||
strings.Contains(desc, "chassis") ||
|
||||
strings.Contains(desc, "board")
|
||||
|
||||
if fru.SerialNumber != "" && hw.BoardInfo.SerialNumber == "" {
|
||||
hw.BoardInfo.SerialNumber = fru.SerialNumber
|
||||
}
|
||||
if fru.ProductName != "" && (hw.BoardInfo.ProductName == "" || isMainBoard) {
|
||||
hw.BoardInfo.ProductName = fru.ProductName
|
||||
}
|
||||
// Manufacturer from non-main FRU entries (e.g. PSU vendor) should not become server vendor.
|
||||
if fru.Manufacturer != "" && isMainBoard && hw.BoardInfo.Manufacturer == "" {
|
||||
hw.BoardInfo.Manufacturer = fru.Manufacturer
|
||||
}
|
||||
if fru.PartNumber != "" && (hw.BoardInfo.PartNumber == "" || isMainBoard) {
|
||||
hw.BoardInfo.PartNumber = fru.PartNumber
|
||||
}
|
||||
|
||||
// Main board entry with complete data is good enough to stop.
|
||||
if isMainBoard && hw.BoardInfo.ProductName != "" && hw.BoardInfo.SerialNumber != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extractPlatformId extracts server model from ThermalConfig (PlatformId)
|
||||
func extractPlatformId(content []byte, hw *models.HardwareConfig) {
|
||||
if hw == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if match := platformIdRegex.FindSubmatch(content); match != nil {
|
||||
platformId := strings.TrimSpace(string(match[1]))
|
||||
if platformId != "" {
|
||||
// Set as ProductName (server model) - this takes priority over FRU data
|
||||
hw.BoardInfo.ProductName = platformId
|
||||
// Also set manufacturer as Inspur if not already set
|
||||
if hw.BoardInfo.Manufacturer == "" {
|
||||
hw.BoardInfo.Manufacturer = "Inspur"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
59
internal/parser/vendors/inspur/fru_test.go
vendored
Normal file
59
internal/parser/vendors/inspur/fru_test.go
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func TestExtractBoardInfo_PreservesBuiltinSerial(t *testing.T) {
|
||||
hw := &models.HardwareConfig{}
|
||||
fruList := []models.FRUInfo{
|
||||
{
|
||||
Description: "Builtin FRU Device (ID 0)",
|
||||
SerialNumber: "21D634101",
|
||||
},
|
||||
{
|
||||
Description: "SCM_FRU (ID 8)",
|
||||
SerialNumber: "CAR509K10613C10",
|
||||
ProductName: "CA",
|
||||
Manufacturer: "inagile",
|
||||
PartNumber: "YZCA-02758-105",
|
||||
},
|
||||
}
|
||||
|
||||
extractBoardInfo(fruList, hw)
|
||||
|
||||
if hw.BoardInfo.SerialNumber != "21D634101" {
|
||||
t.Fatalf("expected board serial 21D634101, got %q", hw.BoardInfo.SerialNumber)
|
||||
}
|
||||
if hw.BoardInfo.ProductName != "CA" {
|
||||
t.Fatalf("expected product name CA, got %q", hw.BoardInfo.ProductName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractBoardInfo_DoesNotUsePSUVendorAsBoardManufacturer(t *testing.T) {
|
||||
hw := &models.HardwareConfig{}
|
||||
fruList := []models.FRUInfo{
|
||||
{
|
||||
Description: "Builtin FRU Device (ID 0)",
|
||||
SerialNumber: "2KD605238",
|
||||
},
|
||||
{
|
||||
Description: "PSU0_FRU (ID 30)",
|
||||
SerialNumber: "PMR315HS10F1A",
|
||||
ProductName: "AP-CR3000F12BY",
|
||||
Manufacturer: "APLUSPOWER",
|
||||
PartNumber: "18XA1M43400C2",
|
||||
},
|
||||
}
|
||||
|
||||
extractBoardInfo(fruList, hw)
|
||||
|
||||
if hw.BoardInfo.SerialNumber != "2KD605238" {
|
||||
t.Fatalf("expected board serial 2KD605238, got %q", hw.BoardInfo.SerialNumber)
|
||||
}
|
||||
if hw.BoardInfo.Manufacturer != "" {
|
||||
t.Fatalf("expected empty board manufacturer, got %q", hw.BoardInfo.Manufacturer)
|
||||
}
|
||||
}
|
||||
117
internal/parser/vendors/inspur/gpu_status.go
vendored
Normal file
117
internal/parser/vendors/inspur/gpu_status.go
vendored
Normal file
@@ -0,0 +1,117 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
var reFaultGPU = regexp.MustCompile(`\bF_GPU(\d+)\b`)
|
||||
|
||||
func applyGPUStatusFromEvents(hw *models.HardwareConfig, events []models.Event) {
|
||||
if hw == nil || len(hw.GPUs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
gpuByIndex := make(map[int]*models.GPU)
|
||||
for i := range hw.GPUs {
|
||||
gpu := &hw.GPUs[i]
|
||||
idx, ok := extractLogicalGPUIndex(gpu.Slot)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
gpuByIndex[idx] = gpu
|
||||
gpu.StatusHistory = nil
|
||||
gpu.ErrorDescription = ""
|
||||
}
|
||||
|
||||
relevantEvents := make([]models.Event, 0)
|
||||
for _, e := range events {
|
||||
if !isGPUFaultEvent(e) || len(extractFaultyGPUSet(e.Description)) == 0 {
|
||||
continue
|
||||
}
|
||||
relevantEvents = append(relevantEvents, e)
|
||||
}
|
||||
|
||||
if len(relevantEvents) == 0 {
|
||||
for _, gpu := range gpuByIndex {
|
||||
if strings.TrimSpace(gpu.Status) == "" {
|
||||
gpu.Status = "OK"
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
sort.Slice(relevantEvents, func(i, j int) bool {
|
||||
return relevantEvents[i].Timestamp.Before(relevantEvents[j].Timestamp)
|
||||
})
|
||||
|
||||
currentStatus := make(map[int]string, len(gpuByIndex))
|
||||
lastCriticalDetails := make(map[int]string, len(gpuByIndex))
|
||||
for idx := range gpuByIndex {
|
||||
currentStatus[idx] = "OK"
|
||||
}
|
||||
|
||||
for _, e := range relevantEvents {
|
||||
faultySet := extractFaultyGPUSet(e.Description)
|
||||
for idx, gpu := range gpuByIndex {
|
||||
newStatus := "OK"
|
||||
if faultySet[idx] {
|
||||
newStatus = "Critical"
|
||||
lastCriticalDetails[idx] = strings.TrimSpace(e.Description)
|
||||
}
|
||||
|
||||
if currentStatus[idx] != newStatus {
|
||||
gpu.StatusHistory = append(gpu.StatusHistory, models.StatusHistoryEntry{
|
||||
Status: newStatus,
|
||||
ChangedAt: e.Timestamp,
|
||||
Details: strings.TrimSpace(e.Description),
|
||||
})
|
||||
ts := e.Timestamp
|
||||
gpu.StatusChangedAt = &ts
|
||||
currentStatus[idx] = newStatus
|
||||
}
|
||||
|
||||
ts := e.Timestamp
|
||||
gpu.StatusCheckedAt = &ts
|
||||
}
|
||||
}
|
||||
|
||||
for idx, gpu := range gpuByIndex {
|
||||
gpu.Status = currentStatus[idx]
|
||||
if gpu.Status == "Critical" {
|
||||
gpu.ErrorDescription = lastCriticalDetails[idx]
|
||||
} else {
|
||||
gpu.ErrorDescription = ""
|
||||
}
|
||||
if gpu.StatusCheckedAt == nil && strings.TrimSpace(gpu.Status) == "" {
|
||||
gpu.Status = "OK"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func extractFaultyGPUSet(description string) map[int]bool {
|
||||
faulty := make(map[int]bool)
|
||||
matches := reFaultGPU.FindAllStringSubmatch(description, -1)
|
||||
for _, m := range matches {
|
||||
if len(m) < 2 {
|
||||
continue
|
||||
}
|
||||
idx, err := strconv.Atoi(m[1])
|
||||
if err == nil && idx >= 0 {
|
||||
faulty[idx] = true
|
||||
}
|
||||
}
|
||||
return faulty
|
||||
}
|
||||
|
||||
func isGPUFaultEvent(e models.Event) bool {
|
||||
desc := strings.ToLower(e.Description)
|
||||
if strings.Contains(desc, "bios miss f_gpu") {
|
||||
return true
|
||||
}
|
||||
return strings.EqualFold(strings.TrimSpace(e.ID), "17FFB002")
|
||||
}
|
||||
69
internal/parser/vendors/inspur/hgx_firmware_test.go
vendored
Normal file
69
internal/parser/vendors/inspur/hgx_firmware_test.go
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func TestAppendHGXFirmwareFromHWInfo_AppendsInventoryEntries(t *testing.T) {
|
||||
hw := &models.HardwareConfig{
|
||||
Firmware: []models.FirmwareInfo{
|
||||
{DeviceName: "BIOS", Version: "1.0.0"},
|
||||
},
|
||||
}
|
||||
|
||||
content := []byte(`
|
||||
{
|
||||
"@odata.id": "/redfish/v1/UpdateService/FirmwareInventory/HGX_FW_BMC_0",
|
||||
"Id": "HGX_FW_BMC_0",
|
||||
"Oem": {
|
||||
"Nvidia": {
|
||||
"ActiveFirmwareSlot": {"Version": "25.05-A"},
|
||||
"InactiveFirmwareSlot": {"Version": "25.04-B"}
|
||||
}
|
||||
},
|
||||
"Version": "25.05-A",
|
||||
"WriteProtected": false
|
||||
}
|
||||
{
|
||||
"@odata.id": "/redfish/v1/UpdateService/FirmwareInventory/HGX_FW_GPU_SXM_1",
|
||||
"Id": "HGX_FW_GPU_SXM_1",
|
||||
"Version": "97.00.C5.00.0E",
|
||||
"WriteProtected": false
|
||||
}
|
||||
{
|
||||
"@odata.id": "/redfish/v1/UpdateService/FirmwareInventory/HGX_Driver_GPU_SXM_1",
|
||||
"Id": "HGX_Driver_GPU_SXM_1",
|
||||
"Version": "",
|
||||
"WriteProtected": false
|
||||
}
|
||||
`)
|
||||
|
||||
appendHGXFirmwareFromHWInfo(content, hw)
|
||||
|
||||
if len(hw.Firmware) != 5 {
|
||||
t.Fatalf("expected 5 firmware entries after append, got %d", len(hw.Firmware))
|
||||
}
|
||||
|
||||
seen := make(map[string]string)
|
||||
for _, fw := range hw.Firmware {
|
||||
seen[fw.DeviceName] = fw.Version
|
||||
}
|
||||
|
||||
if seen["HGX_FW_BMC_0"] != "25.05-A" {
|
||||
t.Fatalf("expected HGX_FW_BMC_0 version 25.05-A, got %q", seen["HGX_FW_BMC_0"])
|
||||
}
|
||||
if seen["HGX_FW_BMC_0 Active Slot"] != "25.05-A" {
|
||||
t.Fatalf("expected active slot version, got %q", seen["HGX_FW_BMC_0 Active Slot"])
|
||||
}
|
||||
if seen["HGX_FW_BMC_0 Inactive Slot"] != "25.04-B" {
|
||||
t.Fatalf("expected inactive slot version, got %q", seen["HGX_FW_BMC_0 Inactive Slot"])
|
||||
}
|
||||
if seen["HGX_FW_GPU_SXM_1"] != "97.00.C5.00.0E" {
|
||||
t.Fatalf("expected GPU FW entry, got %q", seen["HGX_FW_GPU_SXM_1"])
|
||||
}
|
||||
if _, ok := seen["HGX_Driver_GPU_SXM_1"]; ok {
|
||||
t.Fatalf("did not expect empty version driver entry")
|
||||
}
|
||||
}
|
||||
174
internal/parser/vendors/inspur/hgx_gpu_status_test.go
vendored
Normal file
174
internal/parser/vendors/inspur/hgx_gpu_status_test.go
vendored
Normal file
@@ -0,0 +1,174 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func TestEnrichGPUsFromHGXHWInfo_UsesHGXLogicalMapping(t *testing.T) {
|
||||
hw := &models.HardwareConfig{
|
||||
GPUs: []models.GPU{
|
||||
{Slot: "#GPU6"},
|
||||
{Slot: "#GPU7"},
|
||||
{Slot: "#GPU0"},
|
||||
{Slot: "#CPU0_PE1_E_BMC", Model: "AST2500 VGA"},
|
||||
},
|
||||
}
|
||||
|
||||
content := []byte(`
|
||||
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_1/Assembly
|
||||
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN1","SerialNumber":"SXM1SN"}
|
||||
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_3/Assembly
|
||||
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN3","SerialNumber":"SXM3SN"}
|
||||
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_5/Assembly
|
||||
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN5","SerialNumber":"SXM5SN"}
|
||||
{"Id":"HGX_FW_GPU_SXM_1","Version":"FW1"}
|
||||
{"Id":"HGX_FW_GPU_SXM_3","Version":"FW3"}
|
||||
{"Id":"HGX_FW_GPU_SXM_5","Version":"FW5"}
|
||||
{"Id":"HGX_InfoROM_GPU_SXM_3","Version":"IR3"}
|
||||
`)
|
||||
|
||||
enrichGPUsFromHGXHWInfo(content, hw)
|
||||
|
||||
if hw.GPUs[0].SerialNumber != "SXM3SN" {
|
||||
t.Fatalf("expected #GPU6 to map to SXM3 serial, got %q", hw.GPUs[0].SerialNumber)
|
||||
}
|
||||
if hw.GPUs[1].SerialNumber != "SXM1SN" {
|
||||
t.Fatalf("expected #GPU7 to map to SXM1 serial, got %q", hw.GPUs[1].SerialNumber)
|
||||
}
|
||||
if hw.GPUs[2].SerialNumber != "SXM5SN" {
|
||||
t.Fatalf("expected #GPU0 to map to SXM5 serial, got %q", hw.GPUs[2].SerialNumber)
|
||||
}
|
||||
if hw.GPUs[0].Firmware != "FW3" {
|
||||
t.Fatalf("expected #GPU6 firmware FW3, got %q", hw.GPUs[0].Firmware)
|
||||
}
|
||||
if hw.GPUs[0].VideoBIOS != "IR3" {
|
||||
t.Fatalf("expected #GPU6 InfoROM in VideoBIOS IR3, got %q", hw.GPUs[0].VideoBIOS)
|
||||
}
|
||||
if hw.GPUs[2].Firmware != "FW5" {
|
||||
t.Fatalf("expected #GPU0 firmware FW5, got %q", hw.GPUs[2].Firmware)
|
||||
}
|
||||
for _, g := range hw.GPUs {
|
||||
if g.Slot == "#CPU0_PE1_E_BMC" {
|
||||
t.Fatalf("expected non-HGX BMC VGA entry to be filtered out")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnrichGPUsFromHGXHWInfo_AddsMissingLogicalGPU(t *testing.T) {
|
||||
hw := &models.HardwareConfig{
|
||||
GPUs: []models.GPU{
|
||||
{Slot: "#GPU0"},
|
||||
{Slot: "#GPU1"},
|
||||
{Slot: "#GPU2"},
|
||||
{Slot: "#GPU3"},
|
||||
{Slot: "#GPU4"},
|
||||
{Slot: "#GPU5"},
|
||||
{Slot: "#GPU7"},
|
||||
},
|
||||
}
|
||||
|
||||
content := []byte(`
|
||||
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_3/Assembly
|
||||
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN3","SerialNumber":"SXM3SN"}
|
||||
`)
|
||||
|
||||
enrichGPUsFromHGXHWInfo(content, hw)
|
||||
|
||||
found := false
|
||||
for _, g := range hw.GPUs {
|
||||
if g.Slot == "#GPU6" {
|
||||
found = true
|
||||
if g.SerialNumber != "SXM3SN" {
|
||||
t.Fatalf("expected synthesized #GPU6 serial SXM3SN, got %q", g.SerialNumber)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatalf("expected synthesized #GPU6 entry")
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyGPUStatusFromEvents_MarksFaultedGPU(t *testing.T) {
|
||||
hw := &models.HardwareConfig{
|
||||
GPUs: []models.GPU{
|
||||
{Slot: "#GPU6"},
|
||||
{Slot: "#GPU5"},
|
||||
},
|
||||
}
|
||||
|
||||
events := []models.Event{
|
||||
{
|
||||
ID: "17FFB002",
|
||||
Timestamp: time.Now(),
|
||||
Description: "PCIe Present mismatch BIOS miss F_GPU6",
|
||||
},
|
||||
}
|
||||
|
||||
applyGPUStatusFromEvents(hw, events)
|
||||
|
||||
if hw.GPUs[0].Status != "Critical" {
|
||||
t.Fatalf("expected #GPU6 status Critical, got %q", hw.GPUs[0].Status)
|
||||
}
|
||||
if hw.GPUs[1].Status != "OK" {
|
||||
t.Fatalf("expected healthy GPU status OK, got %q", hw.GPUs[1].Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyGPUStatusFromEvents_UsesLatestEventAsCurrentStatusAndKeepsHistory(t *testing.T) {
|
||||
hw := &models.HardwareConfig{
|
||||
GPUs: []models.GPU{
|
||||
{Slot: "#GPU1"},
|
||||
{Slot: "#GPU3"},
|
||||
{Slot: "#GPU6"},
|
||||
},
|
||||
}
|
||||
|
||||
events := []models.Event{
|
||||
{
|
||||
ID: "17FFB002",
|
||||
Timestamp: time.Date(2026, 1, 12, 22, 51, 16, 0, time.FixedZone("UTC+8", 8*3600)),
|
||||
Description: "PCIe Present mismatch BIOS miss F_GPU1 F_GPU3 F_GPU6",
|
||||
},
|
||||
{
|
||||
ID: "17FFB002",
|
||||
Timestamp: time.Date(2026, 1, 12, 23, 5, 18, 0, time.FixedZone("UTC+8", 8*3600)),
|
||||
Description: "PCIe Present mismatch BIOS miss F_GPU6",
|
||||
},
|
||||
}
|
||||
|
||||
applyGPUStatusFromEvents(hw, events)
|
||||
|
||||
if hw.GPUs[0].Status != "OK" {
|
||||
t.Fatalf("expected #GPU1 to recover to OK on latest event, got %q", hw.GPUs[0].Status)
|
||||
}
|
||||
if hw.GPUs[1].Status != "OK" {
|
||||
t.Fatalf("expected #GPU3 to recover to OK on latest event, got %q", hw.GPUs[1].Status)
|
||||
}
|
||||
if hw.GPUs[2].Status != "Critical" {
|
||||
t.Fatalf("expected #GPU6 to remain Critical, got %q", hw.GPUs[2].Status)
|
||||
}
|
||||
if len(hw.GPUs[0].StatusHistory) == 0 {
|
||||
t.Fatalf("expected #GPU1 status history to be populated")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseIDLLog_ParsesStructuredJSONLine(t *testing.T) {
|
||||
content := []byte(`{ "MESSAGE": "|2026-01-12T23:05:18+08:00|PCIE|Assert|Critical|17FFB002|PCIe Present mismatch BIOS miss F_GPU6 - Assert|" }`)
|
||||
|
||||
events := ParseIDLLog(content)
|
||||
if len(events) != 1 {
|
||||
t.Fatalf("expected 1 event from JSON line, got %d", len(events))
|
||||
}
|
||||
if events[0].ID != "17FFB002" {
|
||||
t.Fatalf("expected event ID 17FFB002, got %q", events[0].ID)
|
||||
}
|
||||
if events[0].Source != "BMC" {
|
||||
t.Fatalf("expected BMC source for IDL event, got %q", events[0].Source)
|
||||
}
|
||||
if events[0].SensorType != "pcie" {
|
||||
t.Fatalf("expected component type pcie, got %#v", events[0])
|
||||
}
|
||||
}
|
||||
360
internal/parser/vendors/inspur/hgx_hwinfo.go
vendored
Normal file
360
internal/parser/vendors/inspur/hgx_hwinfo.go
vendored
Normal file
@@ -0,0 +1,360 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
type hgxGPUAssemblyInfo struct {
|
||||
Model string
|
||||
Part string
|
||||
Serial string
|
||||
}
|
||||
|
||||
type hgxGPUFirmwareInfo struct {
|
||||
Firmware string
|
||||
InfoROM string
|
||||
}
|
||||
|
||||
type hgxFirmwareInventoryEntry struct {
|
||||
ID string
|
||||
Version string
|
||||
ActiveVersion string
|
||||
InactiveVersion string
|
||||
}
|
||||
|
||||
// Logical GPU index mapping used by HGX B200 UI ordering.
|
||||
// Example from real logs/UI:
|
||||
// GPU0->SXM5, GPU1->SXM7, GPU2->SXM6, GPU3->SXM8, GPU4->SXM2, GPU5->SXM4, GPU6->SXM3, GPU7->SXM1.
|
||||
var hgxLogicalToSXM = map[int]int{
|
||||
0: 5,
|
||||
1: 7,
|
||||
2: 6,
|
||||
3: 8,
|
||||
4: 2,
|
||||
5: 4,
|
||||
6: 3,
|
||||
7: 1,
|
||||
}
|
||||
|
||||
var (
|
||||
reHGXGPUBlock = regexp.MustCompile(`(?s)/redfish/v1/Chassis/HGX_GPU_SXM_(\d+)/Assembly.*?"Name":\s*"GPU Board Assembly".*?"Model":\s*"([^"]+)".*?"PartNumber":\s*"([^"]+)".*?"SerialNumber":\s*"([^"]+)"`)
|
||||
reHGXFWBlock = regexp.MustCompile(`(?s)"Id":\s*"HGX_FW_GPU_SXM_(\d+)".*?"Version":\s*"([^"]*)"`)
|
||||
reHGXInfoROM = regexp.MustCompile(`(?s)"Id":\s*"HGX_InfoROM_GPU_SXM_(\d+)".*?"Version":\s*"([^"]*)"`)
|
||||
reIDLine = regexp.MustCompile(`"Id":\s*"([^"]+)"`)
|
||||
reVersion = regexp.MustCompile(`"Version":\s*"([^"]*)"`)
|
||||
reSlotGPU = regexp.MustCompile(`(?i)gpu\s*#?\s*(\d+)`)
|
||||
)
|
||||
|
||||
func enrichGPUsFromHGXHWInfo(content []byte, hw *models.HardwareConfig) {
|
||||
if hw == nil || len(hw.GPUs) == 0 || len(content) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
bySXM := parseHGXGPUAssembly(content)
|
||||
if len(bySXM) == 0 {
|
||||
return
|
||||
}
|
||||
fwBySXM := parseHGXGPUFirmware(content)
|
||||
|
||||
normalizeHGXGPUInventory(hw, bySXM)
|
||||
|
||||
for i := range hw.GPUs {
|
||||
gpu := &hw.GPUs[i]
|
||||
logicalIdx, ok := extractLogicalGPUIndex(gpu.Slot)
|
||||
if !ok {
|
||||
// Keep existing info if slot index cannot be determined.
|
||||
continue
|
||||
}
|
||||
|
||||
sxm := resolveSXMIndex(logicalIdx, bySXM)
|
||||
info, found := bySXM[sxm]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.TrimSpace(gpu.SerialNumber) == "" {
|
||||
gpu.SerialNumber = info.Serial
|
||||
}
|
||||
if shouldReplaceGPUModel(gpu.Model) {
|
||||
gpu.Model = info.Model
|
||||
}
|
||||
if strings.TrimSpace(gpu.PartNumber) == "" {
|
||||
gpu.PartNumber = info.Part
|
||||
}
|
||||
if strings.TrimSpace(gpu.Manufacturer) == "" {
|
||||
gpu.Manufacturer = "NVIDIA"
|
||||
}
|
||||
if fw, ok := fwBySXM[sxm]; ok {
|
||||
if strings.TrimSpace(gpu.Firmware) == "" && strings.TrimSpace(fw.Firmware) != "" {
|
||||
gpu.Firmware = fw.Firmware
|
||||
}
|
||||
if strings.TrimSpace(gpu.VideoBIOS) == "" && strings.TrimSpace(fw.InfoROM) != "" {
|
||||
gpu.VideoBIOS = fw.InfoROM
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func appendHGXFirmwareFromHWInfo(content []byte, hw *models.HardwareConfig) {
|
||||
if hw == nil || len(content) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
entries := parseHGXFirmwareInventory(content)
|
||||
if len(entries) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
existing := make(map[string]bool, len(hw.Firmware))
|
||||
for _, fw := range hw.Firmware {
|
||||
key := strings.ToLower(strings.TrimSpace(fw.DeviceName) + "|" + strings.TrimSpace(fw.Version))
|
||||
existing[key] = true
|
||||
}
|
||||
|
||||
appendFW := func(name, version string) {
|
||||
name = strings.TrimSpace(name)
|
||||
version = strings.TrimSpace(version)
|
||||
if name == "" || version == "" {
|
||||
return
|
||||
}
|
||||
key := strings.ToLower(name + "|" + version)
|
||||
if existing[key] {
|
||||
return
|
||||
}
|
||||
existing[key] = true
|
||||
hw.Firmware = append(hw.Firmware, models.FirmwareInfo{
|
||||
DeviceName: name,
|
||||
Version: version,
|
||||
})
|
||||
}
|
||||
|
||||
for _, e := range entries {
|
||||
appendFW(e.ID, e.Version)
|
||||
|
||||
if e.ActiveVersion != "" && e.InactiveVersion != "" && e.ActiveVersion != e.InactiveVersion {
|
||||
appendFW(e.ID+" Active Slot", e.ActiveVersion)
|
||||
appendFW(e.ID+" Inactive Slot", e.InactiveVersion)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseHGXGPUAssembly(content []byte) map[int]hgxGPUAssemblyInfo {
|
||||
result := make(map[int]hgxGPUAssemblyInfo)
|
||||
matches := reHGXGPUBlock.FindAllSubmatch(content, -1)
|
||||
for _, m := range matches {
|
||||
if len(m) != 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
sxmIdx, err := strconv.Atoi(string(m[1]))
|
||||
if err != nil || sxmIdx <= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
result[sxmIdx] = hgxGPUAssemblyInfo{
|
||||
Model: strings.TrimSpace(string(m[2])),
|
||||
Part: strings.TrimSpace(string(m[3])),
|
||||
Serial: strings.TrimSpace(string(m[4])),
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func parseHGXGPUFirmware(content []byte) map[int]hgxGPUFirmwareInfo {
|
||||
result := make(map[int]hgxGPUFirmwareInfo)
|
||||
|
||||
matchesFW := reHGXFWBlock.FindAllSubmatch(content, -1)
|
||||
for _, m := range matchesFW {
|
||||
if len(m) != 3 {
|
||||
continue
|
||||
}
|
||||
sxmIdx, err := strconv.Atoi(string(m[1]))
|
||||
if err != nil || sxmIdx <= 0 {
|
||||
continue
|
||||
}
|
||||
version := strings.TrimSpace(string(m[2]))
|
||||
if version == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
current := result[sxmIdx]
|
||||
if current.Firmware == "" {
|
||||
current.Firmware = version
|
||||
}
|
||||
result[sxmIdx] = current
|
||||
}
|
||||
|
||||
matchesInfoROM := reHGXInfoROM.FindAllSubmatch(content, -1)
|
||||
for _, m := range matchesInfoROM {
|
||||
if len(m) != 3 {
|
||||
continue
|
||||
}
|
||||
sxmIdx, err := strconv.Atoi(string(m[1]))
|
||||
if err != nil || sxmIdx <= 0 {
|
||||
continue
|
||||
}
|
||||
version := strings.TrimSpace(string(m[2]))
|
||||
if version == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
current := result[sxmIdx]
|
||||
if current.InfoROM == "" {
|
||||
current.InfoROM = version
|
||||
}
|
||||
result[sxmIdx] = current
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func parseHGXFirmwareInventory(content []byte) []hgxFirmwareInventoryEntry {
|
||||
lines := strings.Split(string(content), "\n")
|
||||
result := make([]hgxFirmwareInventoryEntry, 0)
|
||||
|
||||
var current *hgxFirmwareInventoryEntry
|
||||
section := ""
|
||||
|
||||
flush := func() {
|
||||
if current == nil {
|
||||
return
|
||||
}
|
||||
if current.Version == "" && current.ActiveVersion == "" && current.InactiveVersion == "" {
|
||||
current = nil
|
||||
section = ""
|
||||
return
|
||||
}
|
||||
result = append(result, *current)
|
||||
current = nil
|
||||
section = ""
|
||||
}
|
||||
|
||||
for _, line := range lines {
|
||||
if m := reIDLine.FindStringSubmatch(line); len(m) > 1 {
|
||||
flush()
|
||||
id := strings.TrimSpace(m[1])
|
||||
if strings.HasPrefix(id, "HGX_") {
|
||||
current = &hgxFirmwareInventoryEntry{ID: id}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if current == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(line, `"ActiveFirmwareSlot"`) {
|
||||
section = "active"
|
||||
}
|
||||
if strings.Contains(line, `"InactiveFirmwareSlot"`) {
|
||||
section = "inactive"
|
||||
}
|
||||
|
||||
if m := reVersion.FindStringSubmatch(line); len(m) > 1 {
|
||||
version := strings.TrimSpace(m[1])
|
||||
if version == "" {
|
||||
section = ""
|
||||
continue
|
||||
}
|
||||
switch section {
|
||||
case "active":
|
||||
if current.ActiveVersion == "" {
|
||||
current.ActiveVersion = version
|
||||
}
|
||||
case "inactive":
|
||||
if current.InactiveVersion == "" {
|
||||
current.InactiveVersion = version
|
||||
}
|
||||
default:
|
||||
// Keep top-level version from the last seen plain "Version" in current entry.
|
||||
current.Version = version
|
||||
}
|
||||
section = ""
|
||||
}
|
||||
}
|
||||
flush()
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func extractLogicalGPUIndex(slot string) (int, bool) {
|
||||
m := reSlotGPU.FindStringSubmatch(slot)
|
||||
if len(m) < 2 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
idx, err := strconv.Atoi(m[1])
|
||||
if err != nil || idx < 0 {
|
||||
return 0, false
|
||||
}
|
||||
return idx, true
|
||||
}
|
||||
|
||||
func resolveSXMIndex(logicalIdx int, bySXM map[int]hgxGPUAssemblyInfo) int {
|
||||
if sxm, ok := hgxLogicalToSXM[logicalIdx]; ok {
|
||||
if _, exists := bySXM[sxm]; exists {
|
||||
return sxm
|
||||
}
|
||||
}
|
||||
|
||||
identity := logicalIdx + 1
|
||||
if _, exists := bySXM[identity]; exists {
|
||||
return identity
|
||||
}
|
||||
|
||||
return identity
|
||||
}
|
||||
|
||||
func shouldReplaceGPUModel(model string) bool {
|
||||
trimmed := strings.TrimSpace(model)
|
||||
if trimmed == "" {
|
||||
return true
|
||||
}
|
||||
switch strings.ToLower(trimmed) {
|
||||
case "vga", "3d controller", "display controller", "unknown":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeHGXGPUInventory(hw *models.HardwareConfig, bySXM map[int]hgxGPUAssemblyInfo) {
|
||||
// Keep only logical HGX GPUs (#GPU0..#GPU7) and remove BMC VGA entries.
|
||||
filtered := make([]models.GPU, 0, len(hw.GPUs))
|
||||
present := make(map[int]bool)
|
||||
for _, gpu := range hw.GPUs {
|
||||
idx, ok := extractLogicalGPUIndex(gpu.Slot)
|
||||
if !ok || idx < 0 || idx > 7 {
|
||||
continue
|
||||
}
|
||||
present[idx] = true
|
||||
filtered = append(filtered, gpu)
|
||||
}
|
||||
|
||||
// If some logical GPUs are missing in asset.json, add placeholders from HGX Redfish assembly.
|
||||
for logicalIdx := 0; logicalIdx <= 7; logicalIdx++ {
|
||||
if present[logicalIdx] {
|
||||
continue
|
||||
}
|
||||
sxm := resolveSXMIndex(logicalIdx, bySXM)
|
||||
info, ok := bySXM[sxm]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
filtered = append(filtered, models.GPU{
|
||||
Slot: fmt.Sprintf("#GPU%d", logicalIdx),
|
||||
Model: info.Model,
|
||||
Manufacturer: "NVIDIA",
|
||||
SerialNumber: info.Serial,
|
||||
PartNumber: info.Part,
|
||||
})
|
||||
}
|
||||
|
||||
hw.GPUs = filtered
|
||||
}
|
||||
12
internal/parser/vendors/inspur/idl.go
vendored
12
internal/parser/vendors/inspur/idl.go
vendored
@@ -8,8 +8,10 @@ import (
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
// ParseIDLLog parses the IDL (Inspur Diagnostic Log) file for BMC alarms
|
||||
// Format: |timestamp|component|type|severity|eventID|description|
|
||||
// ParseIDLLog parses IDL-style entries for BMC alarms.
|
||||
// Works for both plain idl.log lines and JSON structured logs (idl_json/run_json)
|
||||
// where MESSAGE/LOG2_FMTMSG contains:
|
||||
// |timestamp|component|type|severity|eventID|description|
|
||||
func ParseIDLLog(content []byte) []models.Event {
|
||||
var events []models.Event
|
||||
|
||||
@@ -21,10 +23,6 @@ func ParseIDLLog(content []byte) []models.Event {
|
||||
seenEvents := make(map[string]bool) // Deduplicate events
|
||||
|
||||
for _, line := range lines {
|
||||
if !strings.Contains(line, "CommerDiagnose") {
|
||||
continue
|
||||
}
|
||||
|
||||
matches := re.FindStringSubmatch(line)
|
||||
if matches == nil {
|
||||
continue
|
||||
@@ -62,7 +60,7 @@ func ParseIDLLog(content []byte) []models.Event {
|
||||
events = append(events, models.Event{
|
||||
ID: eventID,
|
||||
Timestamp: ts,
|
||||
Source: component,
|
||||
Source: "BMC",
|
||||
SensorType: strings.ToLower(component),
|
||||
SensorName: sensorName,
|
||||
EventType: eventType,
|
||||
|
||||
258
internal/parser/vendors/inspur/parser.go
vendored
258
internal/parser/vendors/inspur/parser.go
vendored
@@ -1,14 +1,23 @@
|
||||
// Package inspur provides parser for Inspur/Kaytus BMC diagnostic archives
|
||||
// Tested with: Kaytus KR4268X2 (onekeylog format)
|
||||
// Tested with: Inspur NF5468M7 / Kaytus KR4268X2 (onekeylog format)
|
||||
//
|
||||
// IMPORTANT: Increment parserVersion when modifying parser logic!
|
||||
// This helps track which version was used to parse specific logs.
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
)
|
||||
|
||||
// parserVersion - version of this parser module
|
||||
// IMPORTANT: Increment this version when making changes to parser logic!
|
||||
const parserVersion = "1.8"
|
||||
|
||||
func init() {
|
||||
parser.Register(&Parser{})
|
||||
}
|
||||
@@ -26,6 +35,12 @@ func (p *Parser) Vendor() string {
|
||||
return "inspur"
|
||||
}
|
||||
|
||||
// Version returns parser version
|
||||
// IMPORTANT: Update parserVersion constant when modifying parser logic!
|
||||
func (p *Parser) Version() string {
|
||||
return parserVersion
|
||||
}
|
||||
|
||||
// Detect checks if archive matches Inspur/Kaytus format
|
||||
// Returns confidence 0-100
|
||||
func (p *Parser) Detect(files []parser.ExtractedFile) int {
|
||||
@@ -72,42 +87,103 @@ func containsInspurMarkers(content []byte) bool {
|
||||
|
||||
// Parse parses Inspur/Kaytus archive
|
||||
func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, error) {
|
||||
selLocation := inferInspurArchiveLocation(files)
|
||||
|
||||
result := &models.AnalysisResult{
|
||||
Events: make([]models.Event, 0),
|
||||
FRU: make([]models.FRUInfo, 0),
|
||||
Sensors: make([]models.SensorReading, 0),
|
||||
}
|
||||
|
||||
// Parse devicefrusdr.log (contains SDR and FRU data)
|
||||
// Pre-parse enrichment maps from devicefrusdr.log for use inside ParseAssetJSON.
|
||||
// BMC does not populate HddInfo.ModelName or SerialNumber for NVMe drives.
|
||||
var pcieSlotDeviceNames map[int]string
|
||||
var nvmeLocToSlot map[int]int
|
||||
if f := parser.FindFileByName(files, "devicefrusdr.log"); f != nil {
|
||||
p.parseDeviceFruSDR(f.Content, result)
|
||||
pcieSlotDeviceNames = ParsePCIeSlotDeviceNames(f.Content)
|
||||
nvmeLocToSlot = ParsePCIeNVMeLocToSlot(f.Content)
|
||||
}
|
||||
|
||||
// Parse asset.json
|
||||
// Parse NVMe serial numbers from audit.log: every disk SN change is logged there.
|
||||
// Combine with the NVMe loc→slot mapping to build pcieSlot→serial map.
|
||||
// Also parse RAID disk serials by backplane slot key (e.g. "BP0:0").
|
||||
var pcieSlotSerials map[int]string
|
||||
var raidSlotSerials map[string]string
|
||||
if f := parser.FindFileByName(files, "audit.log"); f != nil {
|
||||
if len(nvmeLocToSlot) > 0 {
|
||||
nvmeDiskSerials := ParseAuditLogNVMeSerials(f.Content)
|
||||
if len(nvmeDiskSerials) > 0 {
|
||||
pcieSlotSerials = make(map[int]string, len(nvmeDiskSerials))
|
||||
for diskNum, serial := range nvmeDiskSerials {
|
||||
if slot, ok := nvmeLocToSlot[diskNum]; ok {
|
||||
pcieSlotSerials[slot] = serial
|
||||
}
|
||||
}
|
||||
if len(pcieSlotSerials) == 0 {
|
||||
pcieSlotSerials = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
raidSlotSerials = ParseAuditLogRAIDSerials(f.Content)
|
||||
}
|
||||
|
||||
// Parse asset.json first (base hardware info)
|
||||
if f := parser.FindFileByName(files, "asset.json"); f != nil {
|
||||
if hw, err := ParseAssetJSON(f.Content); err == nil {
|
||||
if hw, err := ParseAssetJSON(f.Content, pcieSlotDeviceNames, pcieSlotSerials); err == nil {
|
||||
result.Hardware = hw
|
||||
}
|
||||
}
|
||||
|
||||
// Extract BoardInfo from FRU data
|
||||
if result.Hardware == nil {
|
||||
result.Hardware = &models.HardwareConfig{}
|
||||
}
|
||||
|
||||
// Parse devicefrusdr.log (contains SDR, FRU, PCIe and additional data)
|
||||
if f := parser.FindFileByName(files, "devicefrusdr.log"); f != nil {
|
||||
p.parseDeviceFruSDR(f.Content, result)
|
||||
}
|
||||
|
||||
extractBoardInfo(result.FRU, result.Hardware)
|
||||
|
||||
// Extract PlatformId (server model) from ThermalConfig
|
||||
if f := parser.FindFileByName(files, "ThermalConfig_Cur.conf"); f != nil {
|
||||
extractPlatformId(f.Content, result.Hardware)
|
||||
}
|
||||
|
||||
// Parse component.log for additional data (PSU, etc.)
|
||||
if f := parser.FindFileByName(files, "component.log"); f != nil {
|
||||
if result.Hardware == nil {
|
||||
result.Hardware = &models.HardwareConfig{}
|
||||
}
|
||||
ParseComponentLog(f.Content, result.Hardware)
|
||||
|
||||
// Extract events from component.log (memory errors, etc.)
|
||||
componentEvents := ParseComponentLogEvents(f.Content)
|
||||
result.Events = append(result.Events, componentEvents...)
|
||||
|
||||
// Extract additional telemetry sensors from component.log sections
|
||||
// (fan RPM, backplane temperature, PSU summary power, etc.).
|
||||
componentSensors := ParseComponentLogSensors(f.Content)
|
||||
result.Sensors = mergeSensorReadings(result.Sensors, componentSensors)
|
||||
}
|
||||
|
||||
// Parse IDL log (BMC alarms/diagnose events)
|
||||
if f := parser.FindFileByName(files, "idl.log"); f != nil {
|
||||
// Enrich runtime component data from Redis snapshot (serials, FW, telemetry),
|
||||
// when text logs miss these fields.
|
||||
if f := parser.FindFileByName(files, "redis-dump.rdb"); f != nil && result.Hardware != nil {
|
||||
enrichFromRedisDump(f.Content, result.Hardware)
|
||||
}
|
||||
|
||||
// Parse IDL-like logs (plain and structured JSON logs with embedded IDL messages)
|
||||
idlFiles := parser.FindFileByPattern(files, "/idl.log", "idl_json.log", "run_json.log")
|
||||
for _, f := range idlFiles {
|
||||
idlEvents := ParseIDLLog(f.Content)
|
||||
result.Events = append(result.Events, idlEvents...)
|
||||
}
|
||||
|
||||
// Parse SEL list (selelist.csv)
|
||||
if f := parser.FindFileByName(files, "selelist.csv"); f != nil {
|
||||
selEvents := ParseSELListWithLocation(f.Content, selLocation)
|
||||
result.Events = append(result.Events, selEvents...)
|
||||
}
|
||||
|
||||
// Parse syslog files
|
||||
syslogFiles := parser.FindFileByPattern(files, "syslog/alert", "syslog/warning", "syslog/notice", "syslog/info")
|
||||
for _, f := range syslogFiles {
|
||||
@@ -115,9 +191,75 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
||||
result.Events = append(result.Events, events...)
|
||||
}
|
||||
|
||||
// Fallback for archives where board serial is missing in parsed FRU/asset data:
|
||||
// recover it from log content, never from archive filename.
|
||||
if strings.TrimSpace(result.Hardware.BoardInfo.SerialNumber) == "" {
|
||||
if serial := inferBoardSerialFromFallbackLogs(files); serial != "" {
|
||||
result.Hardware.BoardInfo.SerialNumber = serial
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(result.Hardware.BoardInfo.ProductName) == "" {
|
||||
if model := inferBoardModelFromFallbackLogs(files); model != "" {
|
||||
result.Hardware.BoardInfo.ProductName = model
|
||||
}
|
||||
}
|
||||
|
||||
// Enrich GPU inventory from HGX Redfish snapshot (serial/model/part mapping).
|
||||
if f := parser.FindFileByName(files, "HGX_HWInfo_FWVersion.log"); f != nil && result.Hardware != nil {
|
||||
enrichGPUsFromHGXHWInfo(f.Content, result.Hardware)
|
||||
appendHGXFirmwareFromHWInfo(f.Content, result.Hardware)
|
||||
}
|
||||
|
||||
// Mark problematic GPUs from IDL errors like "BIOS miss F_GPU6".
|
||||
if result.Hardware != nil {
|
||||
applyGPUStatusFromEvents(result.Hardware, result.Events)
|
||||
enrichStorageFromSerialFallbackFiles(files, result.Hardware)
|
||||
// Apply RAID disk serials from audit.log (authoritative: last non-NULL SN change).
|
||||
// These override redis/component.log serials which may be stale after disk replacement.
|
||||
applyRAIDSlotSerials(result.Hardware, raidSlotSerials)
|
||||
parser.ApplyManufacturedYearWeekFromFRU(result.FRU, result.Hardware)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func inferInspurArchiveLocation(files []parser.ExtractedFile) *time.Location {
|
||||
fallback := parser.DefaultArchiveLocation()
|
||||
f := parser.FindFileByName(files, "timezone.conf")
|
||||
if f == nil {
|
||||
return fallback
|
||||
}
|
||||
locName := parseTimezoneConfigLocation(f.Content)
|
||||
if strings.TrimSpace(locName) == "" {
|
||||
return fallback
|
||||
}
|
||||
loc, err := time.LoadLocation(locName)
|
||||
if err != nil {
|
||||
return fallback
|
||||
}
|
||||
return loc
|
||||
}
|
||||
|
||||
func parseTimezoneConfigLocation(content []byte) string {
|
||||
lines := strings.Split(string(content), "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" || strings.HasPrefix(line, "[") || strings.HasPrefix(line, "#") || strings.HasPrefix(line, ";") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(strings.TrimSpace(parts[0]))
|
||||
val := strings.TrimSpace(parts[1])
|
||||
if key == "timezone" && val != "" {
|
||||
return val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (p *Parser) parseDeviceFruSDR(content []byte, result *models.AnalysisResult) {
|
||||
lines := string(content)
|
||||
|
||||
@@ -140,4 +282,100 @@ func (p *Parser) parseDeviceFruSDR(content []byte, result *models.AnalysisResult
|
||||
fruContent := lines[fruStart:]
|
||||
result.FRU = ParseFRU([]byte(fruContent))
|
||||
}
|
||||
|
||||
// Parse PCIe devices from RESTful PCIE Device info
|
||||
// This supplements data from asset.json with serial numbers, firmware, etc.
|
||||
pcieDevicesFromREST := ParsePCIeDevices(content)
|
||||
|
||||
// Merge PCIe data: asset.json is the base inventory, RESTful data enriches names/links/serials.
|
||||
if result.Hardware != nil {
|
||||
result.Hardware.PCIeDevices = MergePCIeDevices(result.Hardware.PCIeDevices, pcieDevicesFromREST)
|
||||
}
|
||||
|
||||
// Parse GPU devices and add temperature data from sensors
|
||||
if len(result.Sensors) > 0 && result.Hardware != nil {
|
||||
// Use existing GPU data from asset.json and enrich with sensor data
|
||||
for i := range result.Hardware.GPUs {
|
||||
gpu := &result.Hardware.GPUs[i]
|
||||
|
||||
// Extract GPU number from slot name
|
||||
slotNum := extractSlotNumberFromGPU(gpu.Slot)
|
||||
|
||||
// Find temperature sensors for this GPU
|
||||
for _, sensor := range result.Sensors {
|
||||
sensorName := strings.ToUpper(sensor.Name)
|
||||
|
||||
// Match GPU temperature sensor
|
||||
if strings.Contains(sensorName, fmt.Sprintf("GPU%d_TEMP", slotNum)) && !strings.Contains(sensorName, "MEM") {
|
||||
if sensor.RawValue != "" {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
// Match GPU memory temperature
|
||||
if strings.Contains(sensorName, fmt.Sprintf("GPU%d_MEM_TEMP", slotNum)) {
|
||||
if sensor.RawValue != "" {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.MemTemperature)
|
||||
}
|
||||
}
|
||||
|
||||
// Match PCIe slot temperature as fallback
|
||||
if strings.Contains(sensorName, fmt.Sprintf("PCIE%d_GPU_TLM_T", slotNum)) && gpu.Temperature == 0 {
|
||||
if sensor.RawValue != "" {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.Temperature)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extractSlotNumberFromGPU extracts slot number from GPU slot string
|
||||
func extractSlotNumberFromGPU(slot string) int {
|
||||
parts := strings.Split(slot, "_")
|
||||
for _, part := range parts {
|
||||
if strings.HasPrefix(part, "PCIE") {
|
||||
var num int
|
||||
fmt.Sscanf(part, "PCIE%d", &num)
|
||||
if num > 0 {
|
||||
return num
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func mergeSensorReadings(base, extra []models.SensorReading) []models.SensorReading {
|
||||
if len(extra) == 0 {
|
||||
return base
|
||||
}
|
||||
|
||||
out := append([]models.SensorReading{}, base...)
|
||||
seen := make(map[string]struct{}, len(out))
|
||||
for _, s := range out {
|
||||
if key := sensorMergeKey(s); key != "" {
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
for _, s := range extra {
|
||||
key := sensorMergeKey(s)
|
||||
if key != "" {
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
out = append(out, s)
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func sensorMergeKey(s models.SensorReading) string {
|
||||
name := strings.ToLower(strings.TrimSpace(s.Name))
|
||||
if name == "" {
|
||||
return ""
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
456
internal/parser/vendors/inspur/pcie.go
vendored
Normal file
456
internal/parser/vendors/inspur/pcie.go
vendored
Normal file
@@ -0,0 +1,456 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser/vendors/pciids"
|
||||
)
|
||||
|
||||
// PCIeRESTInfo represents the RESTful PCIE Device info structure
|
||||
type PCIeRESTInfo []struct {
|
||||
ID int `json:"id"`
|
||||
Present int `json:"present"`
|
||||
Enable int `json:"enable"`
|
||||
Status int `json:"status"`
|
||||
VendorID int `json:"vendor_id"`
|
||||
VendorName string `json:"vendor_name"`
|
||||
DeviceID int `json:"device_id"`
|
||||
DeviceName string `json:"device_name"`
|
||||
BusNum int `json:"bus_num"`
|
||||
DevNum int `json:"dev_num"`
|
||||
FuncNum int `json:"func_num"`
|
||||
MaxLinkWidth int `json:"max_link_width"`
|
||||
MaxLinkSpeed int `json:"max_link_speed"`
|
||||
CurrentLinkWidth int `json:"current_link_width"`
|
||||
CurrentLinkSpeed int `json:"current_link_speed"`
|
||||
Slot int `json:"slot"`
|
||||
Location string `json:"location"`
|
||||
DeviceLocator string `json:"DeviceLocator"`
|
||||
DevType int `json:"dev_type"`
|
||||
DevSubtype int `json:"dev_subtype"`
|
||||
PartNum string `json:"part_num"`
|
||||
SerialNum string `json:"serial_num"`
|
||||
FwVer string `json:"fw_ver"`
|
||||
}
|
||||
|
||||
// ParsePCIeSlotDeviceNames parses devicefrusdr.log and returns a map from integer PCIe slot ID
|
||||
// to device name string. Used to enrich HddInfo entries in asset.json that lack model names.
|
||||
func ParsePCIeSlotDeviceNames(content []byte) map[int]string {
|
||||
info, ok := parsePCIeRESTJSON(content)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
result := make(map[int]string, len(info))
|
||||
for _, entry := range info {
|
||||
if entry.Slot <= 0 {
|
||||
continue
|
||||
}
|
||||
name := sanitizePCIeDeviceName(entry.DeviceName)
|
||||
if name != "" {
|
||||
result[entry.Slot] = name
|
||||
}
|
||||
}
|
||||
if len(result) == 0 {
|
||||
return nil
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// parsePCIeRESTJSON parses the RESTful PCIE Device info JSON from devicefrusdr.log content.
|
||||
func parsePCIeRESTJSON(content []byte) (PCIeRESTInfo, bool) {
|
||||
text := string(content)
|
||||
startMarker := "RESTful PCIE Device info:"
|
||||
endMarker := "BMC sdr Info:"
|
||||
|
||||
startIdx := strings.Index(text, startMarker)
|
||||
if startIdx == -1 {
|
||||
return nil, false
|
||||
}
|
||||
endIdx := strings.Index(text[startIdx:], endMarker)
|
||||
if endIdx == -1 {
|
||||
endIdx = len(text) - startIdx
|
||||
}
|
||||
jsonText := strings.TrimSpace(text[startIdx+len(startMarker) : startIdx+endIdx])
|
||||
|
||||
var info PCIeRESTInfo
|
||||
if err := json.Unmarshal([]byte(jsonText), &info); err != nil {
|
||||
return nil, false
|
||||
}
|
||||
return info, true
|
||||
}
|
||||
|
||||
// ParsePCIeNVMeLocToSlot parses devicefrusdr.log and returns a map from NVMe location number
|
||||
// (the numeric suffix in "#NVME0", "#NVME2", etc.) to the integer PCIe slot ID.
|
||||
// This is used to correlate audit.log NVMe disk numbers with HddInfo PcieSlot values.
|
||||
func ParsePCIeNVMeLocToSlot(content []byte) map[int]int {
|
||||
info, ok := parsePCIeRESTJSON(content)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
nvmeLocRegex := regexp.MustCompile(`(?i)^#NVME(\d+)$`)
|
||||
result := make(map[int]int)
|
||||
for _, entry := range info {
|
||||
if entry.Slot <= 0 {
|
||||
continue
|
||||
}
|
||||
loc := strings.TrimSpace(entry.Location)
|
||||
m := nvmeLocRegex.FindStringSubmatch(loc)
|
||||
if m == nil {
|
||||
continue
|
||||
}
|
||||
locNum, err := strconv.Atoi(m[1])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
result[locNum] = entry.Slot
|
||||
}
|
||||
if len(result) == 0 {
|
||||
return nil
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// ParsePCIeDevices parses RESTful PCIE Device info from devicefrusdr.log
|
||||
func ParsePCIeDevices(content []byte) []models.PCIeDevice {
|
||||
text := string(content)
|
||||
|
||||
// Find RESTful PCIE Device info section
|
||||
startMarker := "RESTful PCIE Device info:"
|
||||
endMarker := "BMC sdr Info:"
|
||||
|
||||
startIdx := strings.Index(text, startMarker)
|
||||
if startIdx == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
endIdx := strings.Index(text[startIdx:], endMarker)
|
||||
if endIdx == -1 {
|
||||
endIdx = len(text) - startIdx
|
||||
}
|
||||
|
||||
jsonText := text[startIdx+len(startMarker) : startIdx+endIdx]
|
||||
jsonText = strings.TrimSpace(jsonText)
|
||||
|
||||
var pcieInfo PCIeRESTInfo
|
||||
if err := json.Unmarshal([]byte(jsonText), &pcieInfo); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var devices []models.PCIeDevice
|
||||
for _, pcie := range pcieInfo {
|
||||
if pcie.Present != 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Convert PCIe speed to GEN notation
|
||||
maxSpeed := fmt.Sprintf("GEN%d", pcie.MaxLinkSpeed)
|
||||
currentSpeed := fmt.Sprintf("GEN%d", pcie.CurrentLinkSpeed)
|
||||
|
||||
// Determine device class based on dev_type
|
||||
deviceClass := determineDeviceClass(pcie.DevType, pcie.DevSubtype, pcie.DeviceName)
|
||||
_, pciDeviceName := pciids.DeviceInfo(pcie.VendorID, pcie.DeviceID)
|
||||
|
||||
// Build BDF string in canonical form (bb:dd.f)
|
||||
bdf := formatBDF(pcie.BusNum, pcie.DevNum, pcie.FuncNum)
|
||||
|
||||
partNumber := strings.TrimSpace(pcie.PartNum)
|
||||
if partNumber == "" {
|
||||
partNumber = sanitizePCIeDeviceName(pcie.DeviceName)
|
||||
}
|
||||
if partNumber == "" {
|
||||
partNumber = normalizeModelLabel(pciDeviceName)
|
||||
}
|
||||
if isGenericPCIeClass(deviceClass) {
|
||||
if resolved := normalizeModelLabel(pciDeviceName); resolved != "" {
|
||||
deviceClass = resolved
|
||||
}
|
||||
}
|
||||
manufacturer := strings.TrimSpace(pcie.VendorName)
|
||||
if manufacturer == "" {
|
||||
manufacturer = normalizeModelLabel(pciids.VendorName(pcie.VendorID))
|
||||
}
|
||||
|
||||
device := models.PCIeDevice{
|
||||
Slot: pcie.Location,
|
||||
VendorID: pcie.VendorID,
|
||||
DeviceID: pcie.DeviceID,
|
||||
BDF: bdf,
|
||||
DeviceClass: deviceClass,
|
||||
Manufacturer: manufacturer,
|
||||
LinkWidth: pcie.CurrentLinkWidth,
|
||||
LinkSpeed: currentSpeed,
|
||||
MaxLinkWidth: pcie.MaxLinkWidth,
|
||||
MaxLinkSpeed: maxSpeed,
|
||||
PartNumber: partNumber,
|
||||
SerialNumber: strings.TrimSpace(pcie.SerialNum),
|
||||
}
|
||||
|
||||
devices = append(devices, device)
|
||||
}
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
var rawHexDeviceNameRegex = regexp.MustCompile(`(?i)^0x[0-9a-f]+$`)
|
||||
|
||||
func sanitizePCIeDeviceName(name string) string {
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" {
|
||||
return ""
|
||||
}
|
||||
if strings.EqualFold(name, "N/A") {
|
||||
return ""
|
||||
}
|
||||
if rawHexDeviceNameRegex.MatchString(name) {
|
||||
return ""
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
// MergePCIeDevices enriches base devices (from asset.json) with detailed RESTful PCIe data.
|
||||
// Matching is done by BDF first, then by slot fallback.
|
||||
func MergePCIeDevices(base []models.PCIeDevice, rest []models.PCIeDevice) []models.PCIeDevice {
|
||||
if len(rest) == 0 {
|
||||
return base
|
||||
}
|
||||
if len(base) == 0 {
|
||||
return append([]models.PCIeDevice(nil), rest...)
|
||||
}
|
||||
|
||||
type ref struct {
|
||||
index int
|
||||
}
|
||||
byBDF := make(map[string]ref, len(base))
|
||||
bySlot := make(map[string]ref, len(base))
|
||||
|
||||
for i := range base {
|
||||
bdf := normalizePCIeBDF(base[i].BDF)
|
||||
if bdf != "" {
|
||||
byBDF[bdf] = ref{index: i}
|
||||
}
|
||||
slot := strings.ToLower(strings.TrimSpace(base[i].Slot))
|
||||
if slot != "" {
|
||||
bySlot[slot] = ref{index: i}
|
||||
}
|
||||
}
|
||||
|
||||
for _, detailed := range rest {
|
||||
idx := -1
|
||||
if bdf := normalizePCIeBDF(detailed.BDF); bdf != "" {
|
||||
if found, ok := byBDF[bdf]; ok {
|
||||
idx = found.index
|
||||
}
|
||||
}
|
||||
if idx == -1 {
|
||||
slot := strings.ToLower(strings.TrimSpace(detailed.Slot))
|
||||
if slot != "" {
|
||||
if found, ok := bySlot[slot]; ok {
|
||||
idx = found.index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if idx == -1 {
|
||||
base = append(base, detailed)
|
||||
newIdx := len(base) - 1
|
||||
if bdf := normalizePCIeBDF(detailed.BDF); bdf != "" {
|
||||
byBDF[bdf] = ref{index: newIdx}
|
||||
}
|
||||
if slot := strings.ToLower(strings.TrimSpace(detailed.Slot)); slot != "" {
|
||||
bySlot[slot] = ref{index: newIdx}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
enrichPCIeDevice(&base[idx], detailed)
|
||||
}
|
||||
|
||||
return base
|
||||
}
|
||||
|
||||
func enrichPCIeDevice(dst *models.PCIeDevice, src models.PCIeDevice) {
|
||||
if dst == nil {
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(dst.Slot) == "" {
|
||||
dst.Slot = src.Slot
|
||||
}
|
||||
if strings.TrimSpace(dst.BDF) == "" {
|
||||
dst.BDF = src.BDF
|
||||
}
|
||||
if dst.VendorID == 0 {
|
||||
dst.VendorID = src.VendorID
|
||||
}
|
||||
if dst.DeviceID == 0 {
|
||||
dst.DeviceID = src.DeviceID
|
||||
}
|
||||
if strings.TrimSpace(dst.Manufacturer) == "" {
|
||||
dst.Manufacturer = src.Manufacturer
|
||||
}
|
||||
if strings.TrimSpace(dst.SerialNumber) == "" {
|
||||
dst.SerialNumber = src.SerialNumber
|
||||
}
|
||||
if strings.TrimSpace(dst.PartNumber) == "" {
|
||||
dst.PartNumber = src.PartNumber
|
||||
}
|
||||
if strings.TrimSpace(dst.LinkSpeed) == "" || strings.EqualFold(strings.TrimSpace(dst.LinkSpeed), "unknown") {
|
||||
dst.LinkSpeed = src.LinkSpeed
|
||||
}
|
||||
if strings.TrimSpace(dst.MaxLinkSpeed) == "" || strings.EqualFold(strings.TrimSpace(dst.MaxLinkSpeed), "unknown") {
|
||||
dst.MaxLinkSpeed = src.MaxLinkSpeed
|
||||
}
|
||||
if dst.LinkWidth == 0 {
|
||||
dst.LinkWidth = src.LinkWidth
|
||||
}
|
||||
if dst.MaxLinkWidth == 0 {
|
||||
dst.MaxLinkWidth = src.MaxLinkWidth
|
||||
}
|
||||
if isGenericPCIeClass(dst.DeviceClass) && !isGenericPCIeClass(src.DeviceClass) {
|
||||
dst.DeviceClass = src.DeviceClass
|
||||
}
|
||||
}
|
||||
|
||||
func normalizePCIeBDF(bdf string) string {
|
||||
bdf = strings.TrimSpace(strings.ToLower(bdf))
|
||||
if bdf == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
if strings.Contains(bdf, "/") {
|
||||
parts := strings.Split(bdf, "/")
|
||||
if len(parts) == 4 {
|
||||
return fmt.Sprintf("%s:%s.%s", parts[1], parts[2], parts[3])
|
||||
}
|
||||
}
|
||||
return bdf
|
||||
}
|
||||
|
||||
func isGenericPCIeClass(class string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(class)) {
|
||||
case "", "unknown", "other", "bridge", "network", "storage", "sas", "sata", "display", "vga", "3d controller", "serial bus":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// determineDeviceClass maps device type to human-readable class
|
||||
func determineDeviceClass(devType, devSubtype int, deviceName string) string {
|
||||
// dev_type mapping:
|
||||
// 1 = Mass Storage Controller
|
||||
// 2 = Network Controller
|
||||
// 3 = Display Controller (GPU)
|
||||
// 4 = Multimedia Controller
|
||||
|
||||
switch devType {
|
||||
case 1:
|
||||
if devSubtype == 4 {
|
||||
return "RAID Controller"
|
||||
}
|
||||
return "Storage Controller"
|
||||
case 2:
|
||||
return "Network Controller"
|
||||
case 3:
|
||||
// GPU
|
||||
if strings.Contains(strings.ToUpper(deviceName), "H100") {
|
||||
return "GPU (H100)"
|
||||
}
|
||||
if strings.Contains(strings.ToUpper(deviceName), "A100") {
|
||||
return "GPU (A100)"
|
||||
}
|
||||
if strings.Contains(strings.ToUpper(deviceName), "NVIDIA") {
|
||||
return "GPU"
|
||||
}
|
||||
return "Display Controller"
|
||||
case 4:
|
||||
return "Multimedia Controller"
|
||||
default:
|
||||
return "Unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// ParseGPUs extracts GPU data from PCIe devices and sensors
|
||||
func ParseGPUs(pcieDevices []models.PCIeDevice, sensors []models.SensorReading) []models.GPU {
|
||||
var gpus []models.GPU
|
||||
|
||||
// Find GPU devices
|
||||
for _, pcie := range pcieDevices {
|
||||
if !strings.Contains(strings.ToLower(pcie.DeviceClass), "gpu") &&
|
||||
!strings.Contains(strings.ToLower(pcie.DeviceClass), "display") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip integrated graphics (ASPEED, etc.)
|
||||
if strings.Contains(pcie.Manufacturer, "ASPEED") {
|
||||
continue
|
||||
}
|
||||
|
||||
gpu := models.GPU{
|
||||
Slot: pcie.Slot,
|
||||
Location: pcie.Slot,
|
||||
Model: pcie.DeviceClass,
|
||||
Manufacturer: pcie.Manufacturer,
|
||||
SerialNumber: pcie.SerialNumber,
|
||||
MaxLinkWidth: pcie.MaxLinkWidth,
|
||||
MaxLinkSpeed: pcie.MaxLinkSpeed,
|
||||
CurrentLinkWidth: pcie.LinkWidth,
|
||||
CurrentLinkSpeed: pcie.LinkSpeed,
|
||||
Status: "OK",
|
||||
}
|
||||
|
||||
// Extract GPU number from slot name (e.g., "PCIE7" -> 7)
|
||||
slotNum := extractSlotNumber(pcie.Slot)
|
||||
|
||||
// Find temperature sensors for this GPU
|
||||
for _, sensor := range sensors {
|
||||
sensorName := strings.ToUpper(sensor.Name)
|
||||
|
||||
// Match GPU temperature sensor (e.g., "GPU7_Temp")
|
||||
if strings.Contains(sensorName, fmt.Sprintf("GPU%d_TEMP", slotNum)) {
|
||||
if sensor.RawValue != "" {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
// Match GPU memory temperature (e.g., "GPU7_Mem_Temp")
|
||||
if strings.Contains(sensorName, fmt.Sprintf("GPU%d_MEM_TEMP", slotNum)) {
|
||||
if sensor.RawValue != "" {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.MemTemperature)
|
||||
}
|
||||
}
|
||||
|
||||
// Match PCIe slot temperature (e.g., "PCIE7_GPU_TLM_T")
|
||||
if strings.Contains(sensorName, fmt.Sprintf("PCIE%d_GPU_TLM_T", slotNum)) {
|
||||
if sensor.RawValue != "" && gpu.Temperature == 0 {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.Temperature)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gpus = append(gpus, gpu)
|
||||
}
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
// extractSlotNumber extracts slot number from location string
|
||||
// e.g., "CPU0_PE3_AC_PCIE7" -> 7
|
||||
func extractSlotNumber(location string) int {
|
||||
parts := strings.Split(location, "_")
|
||||
for _, part := range parts {
|
||||
if strings.HasPrefix(part, "PCIE") || strings.HasPrefix(part, "#CPU") {
|
||||
var num int
|
||||
fmt.Sscanf(part, "PCIE%d", &num)
|
||||
if num > 0 {
|
||||
return num
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user