Update parser and project changes

This commit is contained in:
2026-02-15 22:02:07 +03:00
parent c7b2a7ab29
commit 5e49adaf05
23 changed files with 959 additions and 292 deletions

View File

@@ -103,8 +103,9 @@ func extractBoardInfo(fruList []models.FRUInfo, hw *models.HardwareConfig) {
return
}
// Look for the main board/chassis FRU entry
// Usually it's the first entry or one with "Builtin FRU" or containing board info
// Look for the main board/chassis FRU entry.
// Keep the first non-empty serial as the server serial and avoid overwriting it
// with module-specific serials (e.g., SCM_FRU).
for _, fru := range fruList {
// Skip empty entries
if fru.ProductName == "" && fru.SerialNumber == "" {
@@ -118,25 +119,23 @@ func extractBoardInfo(fruList []models.FRUInfo, hw *models.HardwareConfig) {
strings.Contains(desc, "chassis") ||
strings.Contains(desc, "board")
// If we haven't set board info yet, or this is a main board entry
if hw.BoardInfo.ProductName == "" || isMainBoard {
if fru.ProductName != "" {
hw.BoardInfo.ProductName = fru.ProductName
}
if fru.SerialNumber != "" {
hw.BoardInfo.SerialNumber = fru.SerialNumber
}
if fru.Manufacturer != "" {
hw.BoardInfo.Manufacturer = fru.Manufacturer
}
if fru.PartNumber != "" {
hw.BoardInfo.PartNumber = fru.PartNumber
}
if fru.SerialNumber != "" && hw.BoardInfo.SerialNumber == "" {
hw.BoardInfo.SerialNumber = fru.SerialNumber
}
if fru.ProductName != "" && (hw.BoardInfo.ProductName == "" || isMainBoard) {
hw.BoardInfo.ProductName = fru.ProductName
}
// Manufacturer from non-main FRU entries (e.g. PSU vendor) should not become server vendor.
if fru.Manufacturer != "" && isMainBoard && hw.BoardInfo.Manufacturer == "" {
hw.BoardInfo.Manufacturer = fru.Manufacturer
}
if fru.PartNumber != "" && (hw.BoardInfo.PartNumber == "" || isMainBoard) {
hw.BoardInfo.PartNumber = fru.PartNumber
}
// If we found a main board entry, stop searching
if isMainBoard && fru.ProductName != "" && fru.SerialNumber != "" {
break
}
// Main board entry with complete data is good enough to stop.
if isMainBoard && hw.BoardInfo.ProductName != "" && hw.BoardInfo.SerialNumber != "" {
break
}
}
}

View File

@@ -0,0 +1,59 @@
package inspur
import (
"testing"
"git.mchus.pro/mchus/logpile/internal/models"
)
func TestExtractBoardInfo_PreservesBuiltinSerial(t *testing.T) {
hw := &models.HardwareConfig{}
fruList := []models.FRUInfo{
{
Description: "Builtin FRU Device (ID 0)",
SerialNumber: "21D634101",
},
{
Description: "SCM_FRU (ID 8)",
SerialNumber: "CAR509K10613C10",
ProductName: "CA",
Manufacturer: "inagile",
PartNumber: "YZCA-02758-105",
},
}
extractBoardInfo(fruList, hw)
if hw.BoardInfo.SerialNumber != "21D634101" {
t.Fatalf("expected board serial 21D634101, got %q", hw.BoardInfo.SerialNumber)
}
if hw.BoardInfo.ProductName != "CA" {
t.Fatalf("expected product name CA, got %q", hw.BoardInfo.ProductName)
}
}
func TestExtractBoardInfo_DoesNotUsePSUVendorAsBoardManufacturer(t *testing.T) {
hw := &models.HardwareConfig{}
fruList := []models.FRUInfo{
{
Description: "Builtin FRU Device (ID 0)",
SerialNumber: "2KD605238",
},
{
Description: "PSU0_FRU (ID 30)",
SerialNumber: "PMR315HS10F1A",
ProductName: "AP-CR3000F12BY",
Manufacturer: "APLUSPOWER",
PartNumber: "18XA1M43400C2",
},
}
extractBoardInfo(fruList, hw)
if hw.BoardInfo.SerialNumber != "2KD605238" {
t.Fatalf("expected board serial 2KD605238, got %q", hw.BoardInfo.SerialNumber)
}
if hw.BoardInfo.Manufacturer != "" {
t.Fatalf("expected empty board manufacturer, got %q", hw.BoardInfo.Manufacturer)
}
}

View File

@@ -0,0 +1,56 @@
package inspur
import (
"regexp"
"strconv"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
)
var reFaultGPU = regexp.MustCompile(`\bF_GPU(\d+)\b`)
func applyGPUStatusFromEvents(hw *models.HardwareConfig, events []models.Event) {
if hw == nil || len(hw.GPUs) == 0 {
return
}
faulty := make(map[int]bool)
for _, e := range events {
if !isGPUFaultEvent(e) {
continue
}
matches := reFaultGPU.FindAllStringSubmatch(e.Description, -1)
for _, m := range matches {
if len(m) < 2 {
continue
}
idx, err := strconv.Atoi(m[1])
if err == nil && idx >= 0 {
faulty[idx] = true
}
}
}
for i := range hw.GPUs {
gpu := &hw.GPUs[i]
idx, ok := extractLogicalGPUIndex(gpu.Slot)
if ok && faulty[idx] {
gpu.Status = "Critical"
continue
}
if strings.TrimSpace(gpu.Status) == "" {
gpu.Status = "OK"
}
}
}
func isGPUFaultEvent(e models.Event) bool {
desc := strings.ToLower(e.Description)
if strings.Contains(desc, "bios miss f_gpu") {
return true
}
return strings.EqualFold(strings.TrimSpace(e.ID), "17FFB002")
}

View File

@@ -0,0 +1,120 @@
package inspur
import (
"testing"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
)
func TestEnrichGPUsFromHGXHWInfo_UsesHGXLogicalMapping(t *testing.T) {
hw := &models.HardwareConfig{
GPUs: []models.GPU{
{Slot: "#GPU6"},
{Slot: "#GPU7"},
{Slot: "#GPU0"},
{Slot: "#CPU0_PE1_E_BMC", Model: "AST2500 VGA"},
},
}
content := []byte(`
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_1/Assembly
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN1","SerialNumber":"SXM1SN"}
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_3/Assembly
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN3","SerialNumber":"SXM3SN"}
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_5/Assembly
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN5","SerialNumber":"SXM5SN"}
`)
enrichGPUsFromHGXHWInfo(content, hw)
if hw.GPUs[0].SerialNumber != "SXM3SN" {
t.Fatalf("expected #GPU6 to map to SXM3 serial, got %q", hw.GPUs[0].SerialNumber)
}
if hw.GPUs[1].SerialNumber != "SXM1SN" {
t.Fatalf("expected #GPU7 to map to SXM1 serial, got %q", hw.GPUs[1].SerialNumber)
}
if hw.GPUs[2].SerialNumber != "SXM5SN" {
t.Fatalf("expected #GPU0 to map to SXM5 serial, got %q", hw.GPUs[2].SerialNumber)
}
for _, g := range hw.GPUs {
if g.Slot == "#CPU0_PE1_E_BMC" {
t.Fatalf("expected non-HGX BMC VGA entry to be filtered out")
}
}
}
func TestEnrichGPUsFromHGXHWInfo_AddsMissingLogicalGPU(t *testing.T) {
hw := &models.HardwareConfig{
GPUs: []models.GPU{
{Slot: "#GPU0"},
{Slot: "#GPU1"},
{Slot: "#GPU2"},
{Slot: "#GPU3"},
{Slot: "#GPU4"},
{Slot: "#GPU5"},
{Slot: "#GPU7"},
},
}
content := []byte(`
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_3/Assembly
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN3","SerialNumber":"SXM3SN"}
`)
enrichGPUsFromHGXHWInfo(content, hw)
found := false
for _, g := range hw.GPUs {
if g.Slot == "#GPU6" {
found = true
if g.SerialNumber != "SXM3SN" {
t.Fatalf("expected synthesized #GPU6 serial SXM3SN, got %q", g.SerialNumber)
}
}
}
if !found {
t.Fatalf("expected synthesized #GPU6 entry")
}
}
func TestApplyGPUStatusFromEvents_MarksFaultedGPU(t *testing.T) {
hw := &models.HardwareConfig{
GPUs: []models.GPU{
{Slot: "#GPU6"},
{Slot: "#GPU5"},
},
}
events := []models.Event{
{
ID: "17FFB002",
Timestamp: time.Now(),
Description: "PCIe Present mismatch BIOS miss F_GPU6",
},
}
applyGPUStatusFromEvents(hw, events)
if hw.GPUs[0].Status != "Critical" {
t.Fatalf("expected #GPU6 status Critical, got %q", hw.GPUs[0].Status)
}
if hw.GPUs[1].Status != "OK" {
t.Fatalf("expected healthy GPU status OK, got %q", hw.GPUs[1].Status)
}
}
func TestParseIDLLog_ParsesStructuredJSONLine(t *testing.T) {
content := []byte(`{ "MESSAGE": "|2026-01-12T23:05:18+08:00|PCIE|Assert|Critical|17FFB002|PCIe Present mismatch BIOS miss F_GPU6 - Assert|" }`)
events := ParseIDLLog(content)
if len(events) != 1 {
t.Fatalf("expected 1 event from JSON line, got %d", len(events))
}
if events[0].ID != "17FFB002" {
t.Fatalf("expected event ID 17FFB002, got %q", events[0].ID)
}
if events[0].Source != "PCIE" {
t.Fatalf("expected source PCIE, got %q", events[0].Source)
}
}

View File

@@ -0,0 +1,175 @@
package inspur
import (
"fmt"
"regexp"
"strconv"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
)
type hgxGPUAssemblyInfo struct {
Model string
Part string
Serial string
}
// Logical GPU index mapping used by HGX B200 UI ordering.
// Example from real logs/UI:
// GPU0->SXM5, GPU1->SXM7, GPU2->SXM6, GPU3->SXM8, GPU4->SXM2, GPU5->SXM4, GPU6->SXM3, GPU7->SXM1.
var hgxLogicalToSXM = map[int]int{
0: 5,
1: 7,
2: 6,
3: 8,
4: 2,
5: 4,
6: 3,
7: 1,
}
var (
reHGXGPUBlock = regexp.MustCompile(`(?s)/redfish/v1/Chassis/HGX_GPU_SXM_(\d+)/Assembly.*?"Name":\s*"GPU Board Assembly".*?"Model":\s*"([^"]+)".*?"PartNumber":\s*"([^"]+)".*?"SerialNumber":\s*"([^"]+)"`)
reSlotGPU = regexp.MustCompile(`(?i)gpu\s*#?\s*(\d+)`)
)
func enrichGPUsFromHGXHWInfo(content []byte, hw *models.HardwareConfig) {
if hw == nil || len(hw.GPUs) == 0 || len(content) == 0 {
return
}
bySXM := parseHGXGPUAssembly(content)
if len(bySXM) == 0 {
return
}
normalizeHGXGPUInventory(hw, bySXM)
for i := range hw.GPUs {
gpu := &hw.GPUs[i]
logicalIdx, ok := extractLogicalGPUIndex(gpu.Slot)
if !ok {
// Keep existing info if slot index cannot be determined.
continue
}
sxm := resolveSXMIndex(logicalIdx, bySXM)
info, found := bySXM[sxm]
if !found {
continue
}
if strings.TrimSpace(gpu.SerialNumber) == "" {
gpu.SerialNumber = info.Serial
}
if shouldReplaceGPUModel(gpu.Model) {
gpu.Model = info.Model
}
if strings.TrimSpace(gpu.PartNumber) == "" {
gpu.PartNumber = info.Part
}
if strings.TrimSpace(gpu.Manufacturer) == "" {
gpu.Manufacturer = "NVIDIA"
}
}
}
func parseHGXGPUAssembly(content []byte) map[int]hgxGPUAssemblyInfo {
result := make(map[int]hgxGPUAssemblyInfo)
matches := reHGXGPUBlock.FindAllSubmatch(content, -1)
for _, m := range matches {
if len(m) != 5 {
continue
}
sxmIdx, err := strconv.Atoi(string(m[1]))
if err != nil || sxmIdx <= 0 {
continue
}
result[sxmIdx] = hgxGPUAssemblyInfo{
Model: strings.TrimSpace(string(m[2])),
Part: strings.TrimSpace(string(m[3])),
Serial: strings.TrimSpace(string(m[4])),
}
}
return result
}
func extractLogicalGPUIndex(slot string) (int, bool) {
m := reSlotGPU.FindStringSubmatch(slot)
if len(m) < 2 {
return 0, false
}
idx, err := strconv.Atoi(m[1])
if err != nil || idx < 0 {
return 0, false
}
return idx, true
}
func resolveSXMIndex(logicalIdx int, bySXM map[int]hgxGPUAssemblyInfo) int {
if sxm, ok := hgxLogicalToSXM[logicalIdx]; ok {
if _, exists := bySXM[sxm]; exists {
return sxm
}
}
identity := logicalIdx + 1
if _, exists := bySXM[identity]; exists {
return identity
}
return identity
}
func shouldReplaceGPUModel(model string) bool {
trimmed := strings.TrimSpace(model)
if trimmed == "" {
return true
}
switch strings.ToLower(trimmed) {
case "vga", "3d controller", "display controller", "unknown":
return true
default:
return false
}
}
func normalizeHGXGPUInventory(hw *models.HardwareConfig, bySXM map[int]hgxGPUAssemblyInfo) {
// Keep only logical HGX GPUs (#GPU0..#GPU7) and remove BMC VGA entries.
filtered := make([]models.GPU, 0, len(hw.GPUs))
present := make(map[int]bool)
for _, gpu := range hw.GPUs {
idx, ok := extractLogicalGPUIndex(gpu.Slot)
if !ok || idx < 0 || idx > 7 {
continue
}
present[idx] = true
filtered = append(filtered, gpu)
}
// If some logical GPUs are missing in asset.json, add placeholders from HGX Redfish assembly.
for logicalIdx := 0; logicalIdx <= 7; logicalIdx++ {
if present[logicalIdx] {
continue
}
sxm := resolveSXMIndex(logicalIdx, bySXM)
info, ok := bySXM[sxm]
if !ok {
continue
}
filtered = append(filtered, models.GPU{
Slot: fmt.Sprintf("#GPU%d", logicalIdx),
Model: info.Model,
Manufacturer: "NVIDIA",
SerialNumber: info.Serial,
PartNumber: info.Part,
})
}
hw.GPUs = filtered
}

View File

@@ -8,8 +8,10 @@ import (
"git.mchus.pro/mchus/logpile/internal/models"
)
// ParseIDLLog parses the IDL (Inspur Diagnostic Log) file for BMC alarms
// Format: |timestamp|component|type|severity|eventID|description|
// ParseIDLLog parses IDL-style entries for BMC alarms.
// Works for both plain idl.log lines and JSON structured logs (idl_json/run_json)
// where MESSAGE/LOG2_FMTMSG contains:
// |timestamp|component|type|severity|eventID|description|
func ParseIDLLog(content []byte) []models.Event {
var events []models.Event
@@ -21,10 +23,6 @@ func ParseIDLLog(content []byte) []models.Event {
seenEvents := make(map[string]bool) // Deduplicate events
for _, line := range lines {
if !strings.Contains(line, "CommerDiagnose") {
continue
}
matches := re.FindStringSubmatch(line)
if matches == nil {
continue

View File

@@ -15,7 +15,7 @@ import (
// parserVersion - version of this parser module
// IMPORTANT: Increment this version when making changes to parser logic!
const parserVersion = "1.0.0"
const parserVersion = "1.1.0"
func init() {
parser.Register(&Parser{})
@@ -125,8 +125,9 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
result.Events = append(result.Events, componentEvents...)
}
// Parse IDL log (BMC alarms/diagnose events)
if f := parser.FindFileByName(files, "idl.log"); f != nil {
// Parse IDL-like logs (plain and structured JSON logs with embedded IDL messages)
idlFiles := parser.FindFileByPattern(files, "/idl.log", "idl_json.log", "run_json.log")
for _, f := range idlFiles {
idlEvents := ParseIDLLog(f.Content)
result.Events = append(result.Events, idlEvents...)
}
@@ -144,6 +145,29 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
result.Events = append(result.Events, events...)
}
// Fallback for archives where board serial is missing in parsed FRU/asset data:
// recover it from log content, never from archive filename.
if strings.TrimSpace(result.Hardware.BoardInfo.SerialNumber) == "" {
if serial := inferBoardSerialFromFallbackLogs(files); serial != "" {
result.Hardware.BoardInfo.SerialNumber = serial
}
}
if strings.TrimSpace(result.Hardware.BoardInfo.ProductName) == "" {
if model := inferBoardModelFromFallbackLogs(files); model != "" {
result.Hardware.BoardInfo.ProductName = model
}
}
// Enrich GPU inventory from HGX Redfish snapshot (serial/model/part mapping).
if f := parser.FindFileByName(files, "HGX_HWInfo_FWVersion.log"); f != nil && result.Hardware != nil {
enrichGPUsFromHGXHWInfo(f.Content, result.Hardware)
}
// Mark problematic GPUs from IDL errors like "BIOS miss F_GPU6".
if result.Hardware != nil {
applyGPUStatusFromEvents(result.Hardware, result.Events)
}
return result, nil
}

View File

@@ -0,0 +1,92 @@
package inspur
import (
"regexp"
"strings"
"git.mchus.pro/mchus/logpile/internal/parser"
)
var (
hostnameJSONRegex = regexp.MustCompile(`"_HOSTNAME"\s*:\s*"([^"]+)"`)
)
func inferBoardSerialFromFallbackLogs(files []parser.ExtractedFile) string {
// Prefer FRU dump when present.
if f := parser.FindFileByName(files, "fru.txt"); f != nil {
fruList := ParseFRU(f.Content)
for _, fru := range fruList {
serial := strings.TrimSpace(fru.SerialNumber)
if serial == "" || serial == "0" {
continue
}
desc := strings.ToLower(strings.TrimSpace(fru.Description))
if strings.Contains(desc, "builtin") || strings.Contains(desc, "fru device") {
return serial
}
}
}
// Fallback to explicit hostname file.
if f := parser.FindFileByName(files, "hostname"); f != nil {
if serial := sanitizeCandidateSerial(firstNonEmptyLine(string(f.Content))); serial != "" {
return serial
}
}
// Last-resort fallback from structured journal logs.
if f := parser.FindFileByName(files, "maintenance_json.log"); f != nil {
if m := hostnameJSONRegex.FindSubmatch(f.Content); len(m) == 2 {
if serial := sanitizeCandidateSerial(string(m[1])); serial != "" {
return serial
}
}
}
return ""
}
func inferBoardModelFromFallbackLogs(files []parser.ExtractedFile) string {
// Prefer FRU dump when present.
if f := parser.FindFileByName(files, "fru.txt"); f != nil {
fruList := ParseFRU(f.Content)
for _, fru := range fruList {
model := sanitizeCandidateModel(fru.ProductName)
if model == "" {
continue
}
desc := strings.ToLower(strings.TrimSpace(fru.Description))
if strings.Contains(desc, "builtin") || strings.Contains(desc, "fru device") {
return model
}
}
}
return ""
}
func firstNonEmptyLine(s string) string {
for _, line := range strings.Split(s, "\n") {
line = strings.TrimSpace(line)
if line != "" {
return line
}
}
return ""
}
func sanitizeCandidateSerial(s string) string {
s = strings.TrimSpace(s)
if s == "" || strings.EqualFold(s, "localhost") || strings.ContainsAny(s, " \t") {
return ""
}
return s
}
func sanitizeCandidateModel(s string) string {
s = strings.TrimSpace(s)
if s == "" || strings.EqualFold(s, "null") || s == "0" {
return ""
}
return s
}

View File

@@ -0,0 +1,76 @@
package inspur
import (
"testing"
"git.mchus.pro/mchus/logpile/internal/parser"
)
func TestInferBoardSerialFromFallbackLogs_PrefersFRU(t *testing.T) {
files := []parser.ExtractedFile{
{
Path: "component/fru.txt",
Content: []byte(`FRU Device Description : Builtin FRU Device (ID 0)
Product Serial : 23DB01639
`),
},
{
Path: "runningdata/RTOSDump/hostname",
Content: []byte("HOSTNAME-FALLBACK\n"),
},
{
Path: "log/bmc/struct-log/maintenance_json.log",
Content: []byte(`{ "_HOSTNAME": "JSON-FALLBACK" }`),
},
}
got := inferBoardSerialFromFallbackLogs(files)
if got != "23DB01639" {
t.Fatalf("expected FRU serial 23DB01639, got %q", got)
}
}
func TestInferBoardSerialFromFallbackLogs_UsesHostnameFile(t *testing.T) {
files := []parser.ExtractedFile{
{
Path: "runningdata/RTOSDump/hostname",
Content: []byte("23DB01639\n"),
},
}
got := inferBoardSerialFromFallbackLogs(files)
if got != "23DB01639" {
t.Fatalf("expected hostname serial 23DB01639, got %q", got)
}
}
func TestInferBoardSerialFromFallbackLogs_UsesMaintenanceJSON(t *testing.T) {
files := []parser.ExtractedFile{
{
Path: "log/bmc/struct-log/maintenance_json.log",
Content: []byte(`{ "_HOSTNAME": "23DB01639", "MESSAGE": "ok" }`),
},
}
got := inferBoardSerialFromFallbackLogs(files)
if got != "23DB01639" {
t.Fatalf("expected JSON hostname serial 23DB01639, got %q", got)
}
}
func TestInferBoardModelFromFallbackLogs_PrefersFRU(t *testing.T) {
files := []parser.ExtractedFile{
{
Path: "component/fru.txt",
Content: []byte(`FRU Device Description : Builtin FRU Device (ID 0)
Board Product : KR9288-X3-A0-F0-00
Product Name : KR9288-X3-A0-F0-00
`),
},
}
got := inferBoardModelFromFallbackLogs(files)
if got != "KR9288-X3-A0-F0-00" {
t.Fatalf("expected board model KR9288-X3-A0-F0-00, got %q", got)
}
}