Files
logpile/internal/parser/vendors/inspur/parser.go
Mikhail Chusavitin 27373aa104 feat: surface BMC collection errors in parse-errors panel and event log
When Inspur component.log sections return {"error":"...","code":N} instead
of hardware data, the parser now:
- stores them in AnalysisResult.CollectionErrors (new model field)
- mirrors each one into result.Events with Source="BMC/<section>"
  so the chart viewer event table shows the specific BMC module
- feeds them into /api/parse-errors as bmc_collection_error entries

UI adds a collapsible "Collection diagnostics" panel below the chart
iframe (outside /chart) that appears when /api/parse-errors returns
any items; resets on data clear.

Affected sections in this dump: HDD (1458), PCIe Devices (1458),
Network Adapters (1458), Disk Backplane.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-21 14:30:01 +03:00

402 lines
12 KiB
Go

// Package inspur provides parser for Inspur/Kaytus BMC diagnostic archives
// Tested with: Inspur NF5468M7 / Kaytus KR4268X2 (onekeylog format)
//
// IMPORTANT: Increment parserVersion when modifying parser logic!
// This helps track which version was used to parse specific logs.
package inspur
import (
"fmt"
"strings"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
"git.mchus.pro/mchus/logpile/internal/parser"
)
// parserVersion - version of this parser module
// IMPORTANT: Increment this version when making changes to parser logic!
const parserVersion = "2.0"
func init() {
parser.Register(&Parser{})
}
// Parser implements VendorParser for Inspur/Kaytus servers
type Parser struct{}
// Name returns human-readable parser name
func (p *Parser) Name() string {
return "Inspur/Kaytus BMC Parser"
}
// Vendor returns vendor identifier
func (p *Parser) Vendor() string {
return "inspur"
}
// Version returns parser version
// IMPORTANT: Update parserVersion constant when modifying parser logic!
func (p *Parser) Version() string {
return parserVersion
}
// Detect checks if archive matches Inspur/Kaytus format
// Returns confidence 0-100
func (p *Parser) Detect(files []parser.ExtractedFile) int {
confidence := 0
for _, f := range files {
path := strings.ToLower(f.Path)
// Strong indicators for Inspur/Kaytus onekeylog format
if strings.Contains(path, "onekeylog/") {
confidence += 30
}
if strings.Contains(path, "devicefrusdr.log") {
confidence += 25
}
if strings.Contains(path, "component/component.log") {
confidence += 15
}
// Check for asset.json with Inspur-specific structure
if strings.HasSuffix(path, "asset.json") {
if containsInspurMarkers(f.Content) {
confidence += 20
}
}
// Cap at 100
if confidence >= 100 {
return 100
}
}
return confidence
}
// containsInspurMarkers checks if content has Inspur-specific markers
func containsInspurMarkers(content []byte) bool {
s := string(content)
// Check for typical Inspur asset.json structure
return strings.Contains(s, "VersionInfo") &&
strings.Contains(s, "CpuInfo") &&
strings.Contains(s, "MemInfo")
}
// Parse parses Inspur/Kaytus archive
func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, error) {
selLocation := inferInspurArchiveLocation(files)
result := &models.AnalysisResult{
Events: make([]models.Event, 0),
FRU: make([]models.FRUInfo, 0),
Sensors: make([]models.SensorReading, 0),
}
// Pre-parse enrichment maps from devicefrusdr.log for use inside ParseAssetJSON.
// BMC does not populate HddInfo.ModelName or SerialNumber for NVMe drives.
var pcieSlotDeviceNames map[int]string
var nvmeLocToSlot map[int]int
if f := parser.FindFileByName(files, "devicefrusdr.log"); f != nil {
pcieSlotDeviceNames = ParsePCIeSlotDeviceNames(f.Content)
nvmeLocToSlot = ParsePCIeNVMeLocToSlot(f.Content)
}
// Parse NVMe serial numbers from audit.log: every disk SN change is logged there.
// Combine with the NVMe loc→slot mapping to build pcieSlot→serial map.
// Also parse RAID disk serials by backplane slot key (e.g. "BP0:0").
var pcieSlotSerials map[int]string
var raidSlotSerials map[string]string
if f := parser.FindFileByName(files, "audit.log"); f != nil {
if len(nvmeLocToSlot) > 0 {
nvmeDiskSerials := ParseAuditLogNVMeSerials(f.Content)
if len(nvmeDiskSerials) > 0 {
pcieSlotSerials = make(map[int]string, len(nvmeDiskSerials))
for diskNum, serial := range nvmeDiskSerials {
if slot, ok := nvmeLocToSlot[diskNum]; ok {
pcieSlotSerials[slot] = serial
}
}
if len(pcieSlotSerials) == 0 {
pcieSlotSerials = nil
}
}
}
raidSlotSerials = ParseAuditLogRAIDSerials(f.Content)
}
// Parse asset.json first (base hardware info)
if f := parser.FindFileByName(files, "asset.json"); f != nil {
if hw, err := ParseAssetJSON(f.Content, pcieSlotDeviceNames, pcieSlotSerials); err == nil {
result.Hardware = hw
}
}
// Extract BoardInfo from FRU data
if result.Hardware == nil {
result.Hardware = &models.HardwareConfig{}
}
// Parse devicefrusdr.log (contains SDR, FRU, PCIe and additional data)
if f := parser.FindFileByName(files, "devicefrusdr.log"); f != nil {
p.parseDeviceFruSDR(f.Content, result)
}
extractBoardInfo(result.FRU, result.Hardware)
// Extract PlatformId (server model) from ThermalConfig
if f := parser.FindFileByName(files, "ThermalConfig_Cur.conf"); f != nil {
extractPlatformId(f.Content, result.Hardware)
}
// Parse component.log for additional data (PSU, etc.)
if f := parser.FindFileByName(files, "component.log"); f != nil {
ParseComponentLog(f.Content, result.Hardware)
// Extract events from component.log (memory errors, etc.)
componentEvents := ParseComponentLogEvents(f.Content)
result.Events = append(result.Events, componentEvents...)
// Extract additional telemetry sensors from component.log sections
// (fan RPM, backplane temperature, PSU summary power, etc.).
componentSensors := ParseComponentLogSensors(f.Content)
result.Sensors = mergeSensorReadings(result.Sensors, componentSensors)
// Record sections where BMC itself returned an error instead of data,
// and mirror each one into the Events stream so they appear in the log viewer.
// Source is set to "BMC/<section>" so the viewer can show the specific module.
for _, ce := range ParseComponentLogCollectionErrors(f.Content) {
result.CollectionErrors = append(result.CollectionErrors, ce)
desc := ce.Message
if ce.Code != 0 {
desc = fmt.Sprintf("%s (code %d)", ce.Message, ce.Code)
}
result.Events = append(result.Events, models.Event{
ID: fmt.Sprintf("bmc_collection_error_%s", strings.ToLower(strings.ReplaceAll(ce.Section, " ", "_"))),
Timestamp: time.Time{}, // no timestamp available
Source: fmt.Sprintf("BMC/%s", ce.Section),
SensorType: "bmc_collection_error",
EventType: "Collection Error",
Severity: models.SeverityWarning,
Description: desc,
})
}
}
// Enrich runtime component data from Redis snapshot (serials, FW, telemetry),
// when text logs miss these fields.
if f := parser.FindFileByName(files, "redis-dump.rdb"); f != nil && result.Hardware != nil {
enrichFromRedisDump(f.Content, result.Hardware)
}
// Parse IDL-like logs (plain and structured JSON logs with embedded IDL messages)
idlFiles := parser.FindFileByPattern(files, "/idl.log", "idl_json.log", "run_json.log")
for _, f := range idlFiles {
idlEvents := ParseIDLLog(f.Content)
result.Events = append(result.Events, idlEvents...)
}
// Parse SEL list (selelist.csv)
if f := parser.FindFileByName(files, "selelist.csv"); f != nil {
selEvents := ParseSELListWithLocation(f.Content, selLocation)
result.Events = append(result.Events, selEvents...)
}
// Parse syslog files
syslogFiles := parser.FindFileByPattern(files, "syslog/alert", "syslog/warning", "syslog/notice", "syslog/info")
for _, f := range syslogFiles {
events := ParseSyslog(f.Content, f.Path)
result.Events = append(result.Events, events...)
}
// Fallback for archives where board serial is missing in parsed FRU/asset data:
// recover it from log content, never from archive filename.
if strings.TrimSpace(result.Hardware.BoardInfo.SerialNumber) == "" {
if serial := inferBoardSerialFromFallbackLogs(files); serial != "" {
result.Hardware.BoardInfo.SerialNumber = serial
}
}
if strings.TrimSpace(result.Hardware.BoardInfo.ProductName) == "" {
if model := inferBoardModelFromFallbackLogs(files); model != "" {
result.Hardware.BoardInfo.ProductName = model
}
}
// Enrich GPU inventory from HGX Redfish snapshot (serial/model/part mapping).
if f := parser.FindFileByName(files, "HGX_HWInfo_FWVersion.log"); f != nil && result.Hardware != nil {
enrichGPUsFromHGXHWInfo(f.Content, result.Hardware)
appendHGXFirmwareFromHWInfo(f.Content, result.Hardware)
}
// Mark problematic GPUs from IDL errors like "BIOS miss F_GPU6".
if result.Hardware != nil {
applyGPUStatusFromEvents(result.Hardware, result.Events)
enrichStorageFromSerialFallbackFiles(files, result.Hardware)
// Apply RAID disk serials from audit.log (authoritative: last non-NULL SN change).
// These override redis/component.log serials which may be stale after disk replacement.
applyRAIDSlotSerials(result.Hardware, raidSlotSerials)
parser.ApplyManufacturedYearWeekFromFRU(result.FRU, result.Hardware)
}
return result, nil
}
func inferInspurArchiveLocation(files []parser.ExtractedFile) *time.Location {
fallback := parser.DefaultArchiveLocation()
f := parser.FindFileByName(files, "timezone.conf")
if f == nil {
return fallback
}
locName := parseTimezoneConfigLocation(f.Content)
if strings.TrimSpace(locName) == "" {
return fallback
}
loc, err := time.LoadLocation(locName)
if err != nil {
return fallback
}
return loc
}
func parseTimezoneConfigLocation(content []byte) string {
lines := strings.Split(string(content), "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "[") || strings.HasPrefix(line, "#") || strings.HasPrefix(line, ";") {
continue
}
parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 {
continue
}
key := strings.ToLower(strings.TrimSpace(parts[0]))
val := strings.TrimSpace(parts[1])
if key == "timezone" && val != "" {
return val
}
}
return ""
}
func (p *Parser) parseDeviceFruSDR(content []byte, result *models.AnalysisResult) {
lines := string(content)
// Find SDR section
sdrStart := strings.Index(lines, "BMC sdr Info:")
fruStart := strings.Index(lines, "BMC fru Info:")
if sdrStart != -1 {
var sdrContent string
if fruStart != -1 && fruStart > sdrStart {
sdrContent = lines[sdrStart:fruStart]
} else {
sdrContent = lines[sdrStart:]
}
result.Sensors = ParseSDR([]byte(sdrContent))
}
// Find FRU section
if fruStart != -1 {
fruContent := lines[fruStart:]
result.FRU = ParseFRU([]byte(fruContent))
}
// Parse PCIe devices from RESTful PCIE Device info
// This supplements data from asset.json with serial numbers, firmware, etc.
pcieDevicesFromREST := ParsePCIeDevices(content)
// Merge PCIe data: asset.json is the base inventory, RESTful data enriches names/links/serials.
if result.Hardware != nil {
result.Hardware.PCIeDevices = MergePCIeDevices(result.Hardware.PCIeDevices, pcieDevicesFromREST)
}
// Parse GPU devices and add temperature data from sensors
if len(result.Sensors) > 0 && result.Hardware != nil {
// Use existing GPU data from asset.json and enrich with sensor data
for i := range result.Hardware.GPUs {
gpu := &result.Hardware.GPUs[i]
// Extract GPU number from slot name
slotNum := extractSlotNumberFromGPU(gpu.Slot)
// Find temperature sensors for this GPU
for _, sensor := range result.Sensors {
sensorName := strings.ToUpper(sensor.Name)
// Match GPU temperature sensor
if strings.Contains(sensorName, fmt.Sprintf("GPU%d_TEMP", slotNum)) && !strings.Contains(sensorName, "MEM") {
if sensor.RawValue != "" {
fmt.Sscanf(sensor.RawValue, "%d", &gpu.Temperature)
}
}
// Match GPU memory temperature
if strings.Contains(sensorName, fmt.Sprintf("GPU%d_MEM_TEMP", slotNum)) {
if sensor.RawValue != "" {
fmt.Sscanf(sensor.RawValue, "%d", &gpu.MemTemperature)
}
}
// Match PCIe slot temperature as fallback
if strings.Contains(sensorName, fmt.Sprintf("PCIE%d_GPU_TLM_T", slotNum)) && gpu.Temperature == 0 {
if sensor.RawValue != "" {
fmt.Sscanf(sensor.RawValue, "%d", &gpu.Temperature)
}
}
}
}
}
}
// extractSlotNumberFromGPU extracts slot number from GPU slot string
func extractSlotNumberFromGPU(slot string) int {
parts := strings.Split(slot, "_")
for _, part := range parts {
if strings.HasPrefix(part, "PCIE") {
var num int
fmt.Sscanf(part, "PCIE%d", &num)
if num > 0 {
return num
}
}
}
return 0
}
func mergeSensorReadings(base, extra []models.SensorReading) []models.SensorReading {
if len(extra) == 0 {
return base
}
out := append([]models.SensorReading{}, base...)
seen := make(map[string]struct{}, len(out))
for _, s := range out {
if key := sensorMergeKey(s); key != "" {
seen[key] = struct{}{}
}
}
for _, s := range extra {
key := sensorMergeKey(s)
if key != "" {
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
}
out = append(out, s)
}
return out
}
func sensorMergeKey(s models.SensorReading) string {
name := strings.ToLower(strings.TrimSpace(s.Name))
if name == "" {
return ""
}
return name
}