Update parser and project changes

This commit is contained in:
2026-02-15 22:02:07 +03:00
parent c7b2a7ab29
commit 5e49adaf05
23 changed files with 959 additions and 292 deletions

View File

@@ -14,11 +14,14 @@ import (
const maxSingleFileSize = 10 * 1024 * 1024
const maxZipArchiveSize = 50 * 1024 * 1024
const maxGzipDecompressedSize = 50 * 1024 * 1024
// ExtractedFile represents a file extracted from archive
type ExtractedFile struct {
Path string
Content []byte
Path string
Content []byte
Truncated bool
TruncatedMessage string
}
// ExtractArchive extracts tar.gz or zip archive and returns file contents
@@ -121,12 +124,16 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
}
defer gzr.Close()
// Read all decompressed content into buffer
// Limit to 50MB for plain gzip files, 10MB per file for tar.gz
decompressed, err := io.ReadAll(io.LimitReader(gzr, 50*1024*1024))
// Read decompressed content with a hard cap.
// When the payload exceeds the cap, keep the first chunk and mark it as truncated.
decompressed, err := io.ReadAll(io.LimitReader(gzr, maxGzipDecompressedSize+1))
if err != nil {
return nil, fmt.Errorf("read gzip content: %w", err)
}
gzipTruncated := len(decompressed) > maxGzipDecompressedSize
if gzipTruncated {
decompressed = decompressed[:maxGzipDecompressedSize]
}
// Try to read as tar archive
tr := tar.NewReader(bytes.NewReader(decompressed))
@@ -142,12 +149,19 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
baseName = gzr.Name
}
return []ExtractedFile{
{
Path: baseName,
Content: decompressed,
},
}, nil
file := ExtractedFile{
Path: baseName,
Content: decompressed,
}
if gzipTruncated {
file.Truncated = true
file.TruncatedMessage = fmt.Sprintf(
"decompressed gzip content exceeded %d bytes and was truncated",
maxGzipDecompressedSize,
)
}
return []ExtractedFile{file}, nil
}
return nil, fmt.Errorf("tar read: %w", err)
}
@@ -288,16 +302,24 @@ func extractSingleFileFromReader(r io.Reader, filename string) ([]ExtractedFile,
if err != nil {
return nil, fmt.Errorf("read file content: %w", err)
}
if len(content) > maxSingleFileSize {
return nil, fmt.Errorf("file too large: max %d bytes", maxSingleFileSize)
truncated := len(content) > maxSingleFileSize
if truncated {
content = content[:maxSingleFileSize]
}
return []ExtractedFile{
{
Path: filepath.Base(filename),
Content: content,
},
}, nil
file := ExtractedFile{
Path: filepath.Base(filename),
Content: content,
}
if truncated {
file.Truncated = true
file.TruncatedMessage = fmt.Sprintf(
"file exceeded %d bytes and was truncated",
maxSingleFileSize,
)
}
return []ExtractedFile{file}, nil
}
// FindFileByPattern finds files matching pattern in extracted files

View File

@@ -1,6 +1,7 @@
package parser
import (
"bytes"
"os"
"path/filepath"
"strings"
@@ -46,3 +47,25 @@ func TestExtractArchiveTXT(t *testing.T) {
t.Fatalf("content mismatch")
}
}
func TestExtractArchiveFromReaderTXT_TruncatedWhenTooLarge(t *testing.T) {
large := bytes.Repeat([]byte("a"), maxSingleFileSize+1024)
files, err := ExtractArchiveFromReader(bytes.NewReader(large), "huge.log")
if err != nil {
t.Fatalf("extract huge txt from reader: %v", err)
}
if len(files) != 1 {
t.Fatalf("expected 1 file, got %d", len(files))
}
f := files[0]
if !f.Truncated {
t.Fatalf("expected file to be marked as truncated")
}
if got := len(f.Content); got != maxSingleFileSize {
t.Fatalf("expected truncated size %d, got %d", maxSingleFileSize, got)
}
if f.TruncatedMessage == "" {
t.Fatalf("expected truncation message")
}
}

View File

@@ -3,6 +3,8 @@ package parser
import (
"fmt"
"io"
"strings"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
)
@@ -62,11 +64,44 @@ func (p *BMCParser) parseFiles() error {
// Preserve filename
result.Filename = p.result.Filename
appendExtractionWarnings(result, p.files)
p.result = result
return nil
}
func appendExtractionWarnings(result *models.AnalysisResult, files []ExtractedFile) {
if result == nil {
return
}
truncated := make([]string, 0)
for _, f := range files {
if !f.Truncated {
continue
}
if f.TruncatedMessage != "" {
truncated = append(truncated, fmt.Sprintf("%s: %s", f.Path, f.TruncatedMessage))
continue
}
truncated = append(truncated, fmt.Sprintf("%s: content was truncated due to size limit", f.Path))
}
if len(truncated) == 0 {
return
}
result.Events = append(result.Events, models.Event{
Timestamp: time.Now(),
Source: "LOGPile",
EventType: "Analysis Warning",
Severity: models.SeverityWarning,
Description: "Input data was too large; analysis is partial and may be incomplete",
RawData: strings.Join(truncated, "; "),
})
}
// Result returns the analysis result
func (p *BMCParser) Result() *models.AnalysisResult {
return p.result

View File

@@ -0,0 +1,34 @@
package parser
import (
"testing"
"git.mchus.pro/mchus/logpile/internal/models"
)
func TestAppendExtractionWarnings(t *testing.T) {
result := &models.AnalysisResult{
Events: make([]models.Event, 0),
}
files := []ExtractedFile{
{Path: "ok.log", Content: []byte("ok")},
{Path: "big.log", Truncated: true, TruncatedMessage: "file exceeded size limit and was truncated"},
}
appendExtractionWarnings(result, files)
if len(result.Events) != 1 {
t.Fatalf("expected 1 warning event, got %d", len(result.Events))
}
ev := result.Events[0]
if ev.Severity != models.SeverityWarning {
t.Fatalf("expected warning severity, got %q", ev.Severity)
}
if ev.EventType != "Analysis Warning" {
t.Fatalf("unexpected event type: %q", ev.EventType)
}
if ev.RawData == "" {
t.Fatalf("expected warning details in RawData")
}
}

View File

@@ -103,8 +103,9 @@ func extractBoardInfo(fruList []models.FRUInfo, hw *models.HardwareConfig) {
return
}
// Look for the main board/chassis FRU entry
// Usually it's the first entry or one with "Builtin FRU" or containing board info
// Look for the main board/chassis FRU entry.
// Keep the first non-empty serial as the server serial and avoid overwriting it
// with module-specific serials (e.g., SCM_FRU).
for _, fru := range fruList {
// Skip empty entries
if fru.ProductName == "" && fru.SerialNumber == "" {
@@ -118,25 +119,23 @@ func extractBoardInfo(fruList []models.FRUInfo, hw *models.HardwareConfig) {
strings.Contains(desc, "chassis") ||
strings.Contains(desc, "board")
// If we haven't set board info yet, or this is a main board entry
if hw.BoardInfo.ProductName == "" || isMainBoard {
if fru.ProductName != "" {
hw.BoardInfo.ProductName = fru.ProductName
}
if fru.SerialNumber != "" {
hw.BoardInfo.SerialNumber = fru.SerialNumber
}
if fru.Manufacturer != "" {
hw.BoardInfo.Manufacturer = fru.Manufacturer
}
if fru.PartNumber != "" {
hw.BoardInfo.PartNumber = fru.PartNumber
}
if fru.SerialNumber != "" && hw.BoardInfo.SerialNumber == "" {
hw.BoardInfo.SerialNumber = fru.SerialNumber
}
if fru.ProductName != "" && (hw.BoardInfo.ProductName == "" || isMainBoard) {
hw.BoardInfo.ProductName = fru.ProductName
}
// Manufacturer from non-main FRU entries (e.g. PSU vendor) should not become server vendor.
if fru.Manufacturer != "" && isMainBoard && hw.BoardInfo.Manufacturer == "" {
hw.BoardInfo.Manufacturer = fru.Manufacturer
}
if fru.PartNumber != "" && (hw.BoardInfo.PartNumber == "" || isMainBoard) {
hw.BoardInfo.PartNumber = fru.PartNumber
}
// If we found a main board entry, stop searching
if isMainBoard && fru.ProductName != "" && fru.SerialNumber != "" {
break
}
// Main board entry with complete data is good enough to stop.
if isMainBoard && hw.BoardInfo.ProductName != "" && hw.BoardInfo.SerialNumber != "" {
break
}
}
}

View File

@@ -0,0 +1,59 @@
package inspur
import (
"testing"
"git.mchus.pro/mchus/logpile/internal/models"
)
func TestExtractBoardInfo_PreservesBuiltinSerial(t *testing.T) {
hw := &models.HardwareConfig{}
fruList := []models.FRUInfo{
{
Description: "Builtin FRU Device (ID 0)",
SerialNumber: "21D634101",
},
{
Description: "SCM_FRU (ID 8)",
SerialNumber: "CAR509K10613C10",
ProductName: "CA",
Manufacturer: "inagile",
PartNumber: "YZCA-02758-105",
},
}
extractBoardInfo(fruList, hw)
if hw.BoardInfo.SerialNumber != "21D634101" {
t.Fatalf("expected board serial 21D634101, got %q", hw.BoardInfo.SerialNumber)
}
if hw.BoardInfo.ProductName != "CA" {
t.Fatalf("expected product name CA, got %q", hw.BoardInfo.ProductName)
}
}
func TestExtractBoardInfo_DoesNotUsePSUVendorAsBoardManufacturer(t *testing.T) {
hw := &models.HardwareConfig{}
fruList := []models.FRUInfo{
{
Description: "Builtin FRU Device (ID 0)",
SerialNumber: "2KD605238",
},
{
Description: "PSU0_FRU (ID 30)",
SerialNumber: "PMR315HS10F1A",
ProductName: "AP-CR3000F12BY",
Manufacturer: "APLUSPOWER",
PartNumber: "18XA1M43400C2",
},
}
extractBoardInfo(fruList, hw)
if hw.BoardInfo.SerialNumber != "2KD605238" {
t.Fatalf("expected board serial 2KD605238, got %q", hw.BoardInfo.SerialNumber)
}
if hw.BoardInfo.Manufacturer != "" {
t.Fatalf("expected empty board manufacturer, got %q", hw.BoardInfo.Manufacturer)
}
}

View File

@@ -0,0 +1,56 @@
package inspur
import (
"regexp"
"strconv"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
)
var reFaultGPU = regexp.MustCompile(`\bF_GPU(\d+)\b`)
func applyGPUStatusFromEvents(hw *models.HardwareConfig, events []models.Event) {
if hw == nil || len(hw.GPUs) == 0 {
return
}
faulty := make(map[int]bool)
for _, e := range events {
if !isGPUFaultEvent(e) {
continue
}
matches := reFaultGPU.FindAllStringSubmatch(e.Description, -1)
for _, m := range matches {
if len(m) < 2 {
continue
}
idx, err := strconv.Atoi(m[1])
if err == nil && idx >= 0 {
faulty[idx] = true
}
}
}
for i := range hw.GPUs {
gpu := &hw.GPUs[i]
idx, ok := extractLogicalGPUIndex(gpu.Slot)
if ok && faulty[idx] {
gpu.Status = "Critical"
continue
}
if strings.TrimSpace(gpu.Status) == "" {
gpu.Status = "OK"
}
}
}
func isGPUFaultEvent(e models.Event) bool {
desc := strings.ToLower(e.Description)
if strings.Contains(desc, "bios miss f_gpu") {
return true
}
return strings.EqualFold(strings.TrimSpace(e.ID), "17FFB002")
}

View File

@@ -0,0 +1,120 @@
package inspur
import (
"testing"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
)
func TestEnrichGPUsFromHGXHWInfo_UsesHGXLogicalMapping(t *testing.T) {
hw := &models.HardwareConfig{
GPUs: []models.GPU{
{Slot: "#GPU6"},
{Slot: "#GPU7"},
{Slot: "#GPU0"},
{Slot: "#CPU0_PE1_E_BMC", Model: "AST2500 VGA"},
},
}
content := []byte(`
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_1/Assembly
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN1","SerialNumber":"SXM1SN"}
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_3/Assembly
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN3","SerialNumber":"SXM3SN"}
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_5/Assembly
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN5","SerialNumber":"SXM5SN"}
`)
enrichGPUsFromHGXHWInfo(content, hw)
if hw.GPUs[0].SerialNumber != "SXM3SN" {
t.Fatalf("expected #GPU6 to map to SXM3 serial, got %q", hw.GPUs[0].SerialNumber)
}
if hw.GPUs[1].SerialNumber != "SXM1SN" {
t.Fatalf("expected #GPU7 to map to SXM1 serial, got %q", hw.GPUs[1].SerialNumber)
}
if hw.GPUs[2].SerialNumber != "SXM5SN" {
t.Fatalf("expected #GPU0 to map to SXM5 serial, got %q", hw.GPUs[2].SerialNumber)
}
for _, g := range hw.GPUs {
if g.Slot == "#CPU0_PE1_E_BMC" {
t.Fatalf("expected non-HGX BMC VGA entry to be filtered out")
}
}
}
func TestEnrichGPUsFromHGXHWInfo_AddsMissingLogicalGPU(t *testing.T) {
hw := &models.HardwareConfig{
GPUs: []models.GPU{
{Slot: "#GPU0"},
{Slot: "#GPU1"},
{Slot: "#GPU2"},
{Slot: "#GPU3"},
{Slot: "#GPU4"},
{Slot: "#GPU5"},
{Slot: "#GPU7"},
},
}
content := []byte(`
# curl -X GET http://127.0.0.1/redfish/v1/Chassis/HGX_GPU_SXM_3/Assembly
{"Name":"GPU Board Assembly","Model":"B200 180GB HBM3e","PartNumber":"PN3","SerialNumber":"SXM3SN"}
`)
enrichGPUsFromHGXHWInfo(content, hw)
found := false
for _, g := range hw.GPUs {
if g.Slot == "#GPU6" {
found = true
if g.SerialNumber != "SXM3SN" {
t.Fatalf("expected synthesized #GPU6 serial SXM3SN, got %q", g.SerialNumber)
}
}
}
if !found {
t.Fatalf("expected synthesized #GPU6 entry")
}
}
func TestApplyGPUStatusFromEvents_MarksFaultedGPU(t *testing.T) {
hw := &models.HardwareConfig{
GPUs: []models.GPU{
{Slot: "#GPU6"},
{Slot: "#GPU5"},
},
}
events := []models.Event{
{
ID: "17FFB002",
Timestamp: time.Now(),
Description: "PCIe Present mismatch BIOS miss F_GPU6",
},
}
applyGPUStatusFromEvents(hw, events)
if hw.GPUs[0].Status != "Critical" {
t.Fatalf("expected #GPU6 status Critical, got %q", hw.GPUs[0].Status)
}
if hw.GPUs[1].Status != "OK" {
t.Fatalf("expected healthy GPU status OK, got %q", hw.GPUs[1].Status)
}
}
func TestParseIDLLog_ParsesStructuredJSONLine(t *testing.T) {
content := []byte(`{ "MESSAGE": "|2026-01-12T23:05:18+08:00|PCIE|Assert|Critical|17FFB002|PCIe Present mismatch BIOS miss F_GPU6 - Assert|" }`)
events := ParseIDLLog(content)
if len(events) != 1 {
t.Fatalf("expected 1 event from JSON line, got %d", len(events))
}
if events[0].ID != "17FFB002" {
t.Fatalf("expected event ID 17FFB002, got %q", events[0].ID)
}
if events[0].Source != "PCIE" {
t.Fatalf("expected source PCIE, got %q", events[0].Source)
}
}

View File

@@ -0,0 +1,175 @@
package inspur
import (
"fmt"
"regexp"
"strconv"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
)
type hgxGPUAssemblyInfo struct {
Model string
Part string
Serial string
}
// Logical GPU index mapping used by HGX B200 UI ordering.
// Example from real logs/UI:
// GPU0->SXM5, GPU1->SXM7, GPU2->SXM6, GPU3->SXM8, GPU4->SXM2, GPU5->SXM4, GPU6->SXM3, GPU7->SXM1.
var hgxLogicalToSXM = map[int]int{
0: 5,
1: 7,
2: 6,
3: 8,
4: 2,
5: 4,
6: 3,
7: 1,
}
var (
reHGXGPUBlock = regexp.MustCompile(`(?s)/redfish/v1/Chassis/HGX_GPU_SXM_(\d+)/Assembly.*?"Name":\s*"GPU Board Assembly".*?"Model":\s*"([^"]+)".*?"PartNumber":\s*"([^"]+)".*?"SerialNumber":\s*"([^"]+)"`)
reSlotGPU = regexp.MustCompile(`(?i)gpu\s*#?\s*(\d+)`)
)
func enrichGPUsFromHGXHWInfo(content []byte, hw *models.HardwareConfig) {
if hw == nil || len(hw.GPUs) == 0 || len(content) == 0 {
return
}
bySXM := parseHGXGPUAssembly(content)
if len(bySXM) == 0 {
return
}
normalizeHGXGPUInventory(hw, bySXM)
for i := range hw.GPUs {
gpu := &hw.GPUs[i]
logicalIdx, ok := extractLogicalGPUIndex(gpu.Slot)
if !ok {
// Keep existing info if slot index cannot be determined.
continue
}
sxm := resolveSXMIndex(logicalIdx, bySXM)
info, found := bySXM[sxm]
if !found {
continue
}
if strings.TrimSpace(gpu.SerialNumber) == "" {
gpu.SerialNumber = info.Serial
}
if shouldReplaceGPUModel(gpu.Model) {
gpu.Model = info.Model
}
if strings.TrimSpace(gpu.PartNumber) == "" {
gpu.PartNumber = info.Part
}
if strings.TrimSpace(gpu.Manufacturer) == "" {
gpu.Manufacturer = "NVIDIA"
}
}
}
func parseHGXGPUAssembly(content []byte) map[int]hgxGPUAssemblyInfo {
result := make(map[int]hgxGPUAssemblyInfo)
matches := reHGXGPUBlock.FindAllSubmatch(content, -1)
for _, m := range matches {
if len(m) != 5 {
continue
}
sxmIdx, err := strconv.Atoi(string(m[1]))
if err != nil || sxmIdx <= 0 {
continue
}
result[sxmIdx] = hgxGPUAssemblyInfo{
Model: strings.TrimSpace(string(m[2])),
Part: strings.TrimSpace(string(m[3])),
Serial: strings.TrimSpace(string(m[4])),
}
}
return result
}
func extractLogicalGPUIndex(slot string) (int, bool) {
m := reSlotGPU.FindStringSubmatch(slot)
if len(m) < 2 {
return 0, false
}
idx, err := strconv.Atoi(m[1])
if err != nil || idx < 0 {
return 0, false
}
return idx, true
}
func resolveSXMIndex(logicalIdx int, bySXM map[int]hgxGPUAssemblyInfo) int {
if sxm, ok := hgxLogicalToSXM[logicalIdx]; ok {
if _, exists := bySXM[sxm]; exists {
return sxm
}
}
identity := logicalIdx + 1
if _, exists := bySXM[identity]; exists {
return identity
}
return identity
}
func shouldReplaceGPUModel(model string) bool {
trimmed := strings.TrimSpace(model)
if trimmed == "" {
return true
}
switch strings.ToLower(trimmed) {
case "vga", "3d controller", "display controller", "unknown":
return true
default:
return false
}
}
func normalizeHGXGPUInventory(hw *models.HardwareConfig, bySXM map[int]hgxGPUAssemblyInfo) {
// Keep only logical HGX GPUs (#GPU0..#GPU7) and remove BMC VGA entries.
filtered := make([]models.GPU, 0, len(hw.GPUs))
present := make(map[int]bool)
for _, gpu := range hw.GPUs {
idx, ok := extractLogicalGPUIndex(gpu.Slot)
if !ok || idx < 0 || idx > 7 {
continue
}
present[idx] = true
filtered = append(filtered, gpu)
}
// If some logical GPUs are missing in asset.json, add placeholders from HGX Redfish assembly.
for logicalIdx := 0; logicalIdx <= 7; logicalIdx++ {
if present[logicalIdx] {
continue
}
sxm := resolveSXMIndex(logicalIdx, bySXM)
info, ok := bySXM[sxm]
if !ok {
continue
}
filtered = append(filtered, models.GPU{
Slot: fmt.Sprintf("#GPU%d", logicalIdx),
Model: info.Model,
Manufacturer: "NVIDIA",
SerialNumber: info.Serial,
PartNumber: info.Part,
})
}
hw.GPUs = filtered
}

View File

@@ -8,8 +8,10 @@ import (
"git.mchus.pro/mchus/logpile/internal/models"
)
// ParseIDLLog parses the IDL (Inspur Diagnostic Log) file for BMC alarms
// Format: |timestamp|component|type|severity|eventID|description|
// ParseIDLLog parses IDL-style entries for BMC alarms.
// Works for both plain idl.log lines and JSON structured logs (idl_json/run_json)
// where MESSAGE/LOG2_FMTMSG contains:
// |timestamp|component|type|severity|eventID|description|
func ParseIDLLog(content []byte) []models.Event {
var events []models.Event
@@ -21,10 +23,6 @@ func ParseIDLLog(content []byte) []models.Event {
seenEvents := make(map[string]bool) // Deduplicate events
for _, line := range lines {
if !strings.Contains(line, "CommerDiagnose") {
continue
}
matches := re.FindStringSubmatch(line)
if matches == nil {
continue

View File

@@ -15,7 +15,7 @@ import (
// parserVersion - version of this parser module
// IMPORTANT: Increment this version when making changes to parser logic!
const parserVersion = "1.0.0"
const parserVersion = "1.1.0"
func init() {
parser.Register(&Parser{})
@@ -125,8 +125,9 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
result.Events = append(result.Events, componentEvents...)
}
// Parse IDL log (BMC alarms/diagnose events)
if f := parser.FindFileByName(files, "idl.log"); f != nil {
// Parse IDL-like logs (plain and structured JSON logs with embedded IDL messages)
idlFiles := parser.FindFileByPattern(files, "/idl.log", "idl_json.log", "run_json.log")
for _, f := range idlFiles {
idlEvents := ParseIDLLog(f.Content)
result.Events = append(result.Events, idlEvents...)
}
@@ -144,6 +145,29 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
result.Events = append(result.Events, events...)
}
// Fallback for archives where board serial is missing in parsed FRU/asset data:
// recover it from log content, never from archive filename.
if strings.TrimSpace(result.Hardware.BoardInfo.SerialNumber) == "" {
if serial := inferBoardSerialFromFallbackLogs(files); serial != "" {
result.Hardware.BoardInfo.SerialNumber = serial
}
}
if strings.TrimSpace(result.Hardware.BoardInfo.ProductName) == "" {
if model := inferBoardModelFromFallbackLogs(files); model != "" {
result.Hardware.BoardInfo.ProductName = model
}
}
// Enrich GPU inventory from HGX Redfish snapshot (serial/model/part mapping).
if f := parser.FindFileByName(files, "HGX_HWInfo_FWVersion.log"); f != nil && result.Hardware != nil {
enrichGPUsFromHGXHWInfo(f.Content, result.Hardware)
}
// Mark problematic GPUs from IDL errors like "BIOS miss F_GPU6".
if result.Hardware != nil {
applyGPUStatusFromEvents(result.Hardware, result.Events)
}
return result, nil
}

View File

@@ -0,0 +1,92 @@
package inspur
import (
"regexp"
"strings"
"git.mchus.pro/mchus/logpile/internal/parser"
)
var (
hostnameJSONRegex = regexp.MustCompile(`"_HOSTNAME"\s*:\s*"([^"]+)"`)
)
func inferBoardSerialFromFallbackLogs(files []parser.ExtractedFile) string {
// Prefer FRU dump when present.
if f := parser.FindFileByName(files, "fru.txt"); f != nil {
fruList := ParseFRU(f.Content)
for _, fru := range fruList {
serial := strings.TrimSpace(fru.SerialNumber)
if serial == "" || serial == "0" {
continue
}
desc := strings.ToLower(strings.TrimSpace(fru.Description))
if strings.Contains(desc, "builtin") || strings.Contains(desc, "fru device") {
return serial
}
}
}
// Fallback to explicit hostname file.
if f := parser.FindFileByName(files, "hostname"); f != nil {
if serial := sanitizeCandidateSerial(firstNonEmptyLine(string(f.Content))); serial != "" {
return serial
}
}
// Last-resort fallback from structured journal logs.
if f := parser.FindFileByName(files, "maintenance_json.log"); f != nil {
if m := hostnameJSONRegex.FindSubmatch(f.Content); len(m) == 2 {
if serial := sanitizeCandidateSerial(string(m[1])); serial != "" {
return serial
}
}
}
return ""
}
func inferBoardModelFromFallbackLogs(files []parser.ExtractedFile) string {
// Prefer FRU dump when present.
if f := parser.FindFileByName(files, "fru.txt"); f != nil {
fruList := ParseFRU(f.Content)
for _, fru := range fruList {
model := sanitizeCandidateModel(fru.ProductName)
if model == "" {
continue
}
desc := strings.ToLower(strings.TrimSpace(fru.Description))
if strings.Contains(desc, "builtin") || strings.Contains(desc, "fru device") {
return model
}
}
}
return ""
}
func firstNonEmptyLine(s string) string {
for _, line := range strings.Split(s, "\n") {
line = strings.TrimSpace(line)
if line != "" {
return line
}
}
return ""
}
func sanitizeCandidateSerial(s string) string {
s = strings.TrimSpace(s)
if s == "" || strings.EqualFold(s, "localhost") || strings.ContainsAny(s, " \t") {
return ""
}
return s
}
func sanitizeCandidateModel(s string) string {
s = strings.TrimSpace(s)
if s == "" || strings.EqualFold(s, "null") || s == "0" {
return ""
}
return s
}

View File

@@ -0,0 +1,76 @@
package inspur
import (
"testing"
"git.mchus.pro/mchus/logpile/internal/parser"
)
func TestInferBoardSerialFromFallbackLogs_PrefersFRU(t *testing.T) {
files := []parser.ExtractedFile{
{
Path: "component/fru.txt",
Content: []byte(`FRU Device Description : Builtin FRU Device (ID 0)
Product Serial : 23DB01639
`),
},
{
Path: "runningdata/RTOSDump/hostname",
Content: []byte("HOSTNAME-FALLBACK\n"),
},
{
Path: "log/bmc/struct-log/maintenance_json.log",
Content: []byte(`{ "_HOSTNAME": "JSON-FALLBACK" }`),
},
}
got := inferBoardSerialFromFallbackLogs(files)
if got != "23DB01639" {
t.Fatalf("expected FRU serial 23DB01639, got %q", got)
}
}
func TestInferBoardSerialFromFallbackLogs_UsesHostnameFile(t *testing.T) {
files := []parser.ExtractedFile{
{
Path: "runningdata/RTOSDump/hostname",
Content: []byte("23DB01639\n"),
},
}
got := inferBoardSerialFromFallbackLogs(files)
if got != "23DB01639" {
t.Fatalf("expected hostname serial 23DB01639, got %q", got)
}
}
func TestInferBoardSerialFromFallbackLogs_UsesMaintenanceJSON(t *testing.T) {
files := []parser.ExtractedFile{
{
Path: "log/bmc/struct-log/maintenance_json.log",
Content: []byte(`{ "_HOSTNAME": "23DB01639", "MESSAGE": "ok" }`),
},
}
got := inferBoardSerialFromFallbackLogs(files)
if got != "23DB01639" {
t.Fatalf("expected JSON hostname serial 23DB01639, got %q", got)
}
}
func TestInferBoardModelFromFallbackLogs_PrefersFRU(t *testing.T) {
files := []parser.ExtractedFile{
{
Path: "component/fru.txt",
Content: []byte(`FRU Device Description : Builtin FRU Device (ID 0)
Board Product : KR9288-X3-A0-F0-00
Product Name : KR9288-X3-A0-F0-00
`),
},
}
got := inferBoardModelFromFallbackLogs(files)
if got != "KR9288-X3-A0-F0-00" {
t.Fatalf("expected board model KR9288-X3-A0-F0-00, got %q", got)
}
}

View File

@@ -106,6 +106,8 @@ func parseGPUInfo(content string, result *models.AnalysisResult) {
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
}
applyGPUSerialNumbers(content, result.Hardware.GPUs)
// Create event for GPU summary
if len(result.Hardware.GPUs) > 0 {
result.Events = append(result.Events, models.Event{
@@ -168,3 +170,138 @@ func formatGPUSummary(gpus []models.GPU) string {
return summary.String()
}
func applyGPUSerialNumbers(content string, gpus []models.GPU) {
if len(gpus) == 0 {
return
}
serialByBDF := parseGPUSerialsFromNvidiaSMI(content)
if len(serialByBDF) == 0 {
serialByBDF = parseGPUSerialsFromSummary(content)
}
if len(serialByBDF) == 0 {
return
}
for i := range gpus {
bdf := normalizeGPUAddress(gpus[i].BDF)
if bdf == "" {
continue
}
if serial, ok := serialByBDF[bdf]; ok && serial != "" {
gpus[i].SerialNumber = serial
}
}
}
func parseGPUSerialsFromNvidiaSMI(content string) map[string]string {
scanner := bufio.NewScanner(strings.NewReader(content))
reGPU := regexp.MustCompile(`^GPU\s+([0-9A-F]{8}:[0-9A-F]{2}:[0-9A-F]{2}\.[0-9A-F])$`)
serialByBDF := make(map[string]string)
currentBDF := ""
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if matches := reGPU.FindStringSubmatch(line); len(matches) == 2 {
currentBDF = normalizeGPUAddress(matches[1])
continue
}
if currentBDF == "" {
continue
}
if strings.HasPrefix(line, "Serial Number") {
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
serial := strings.TrimSpace(parts[1])
if serial != "" && !strings.EqualFold(serial, "N/A") {
serialByBDF[currentBDF] = serial
}
}
}
return serialByBDF
}
func parseGPUSerialsFromSummary(content string) map[string]string {
scanner := bufio.NewScanner(strings.NewReader(content))
serialByBDF := make(map[string]string)
inGPUDetails := false
for scanner.Scan() {
line := scanner.Text()
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "NVIDIA GPU Details") {
inGPUDetails = true
}
if !inGPUDetails {
continue
}
if strings.HasPrefix(trimmed, "NVIDIA Switch Details") {
break
}
parts := strings.Split(line, "|")
if len(parts) < 2 {
continue
}
payload := strings.TrimSpace(parts[len(parts)-1])
if payload == "" {
continue
}
fields := strings.Split(payload, ",")
if len(fields) < 6 {
continue
}
bdf := normalizeGPUAddress(strings.TrimSpace(fields[4]))
serial := strings.TrimSpace(fields[5])
if bdf == "" || serial == "" || strings.EqualFold(serial, "N/A") {
continue
}
serialByBDF[bdf] = serial
}
return serialByBDF
}
func normalizeGPUAddress(addr string) string {
addr = strings.TrimSpace(addr)
if addr == "" {
return ""
}
parts := strings.Split(addr, ":")
if len(parts) != 3 {
return strings.ToLower(addr)
}
domain := parts[0]
bus := parts[1]
devFn := parts[2]
devFnParts := strings.Split(devFn, ".")
if len(devFnParts) != 2 {
return strings.ToLower(addr)
}
device := devFnParts[0]
fn := devFnParts[1]
if len(domain) == 8 {
domain = domain[4:]
}
return strings.ToLower(domain + ":" + bus + ":" + device + "." + fn)
}

View File

@@ -0,0 +1,54 @@
package nvidia_bug_report
import (
"testing"
"git.mchus.pro/mchus/logpile/internal/models"
)
func TestApplyGPUSerialNumbers_FromNvidiaSMI(t *testing.T) {
content := `
/usr/bin/nvidia-smi --query
GPU 00000000:18:00.0
Serial Number : 1653925025827
GPU 00000000:2A:00.0
Serial Number : 1653925050608
`
gpus := []models.GPU{
{BDF: "0000:18:00.0"},
{BDF: "0000:2a:00.0"},
}
applyGPUSerialNumbers(content, gpus)
if gpus[0].SerialNumber != "1653925025827" {
t.Fatalf("unexpected serial for gpu0: %q", gpus[0].SerialNumber)
}
if gpus[1].SerialNumber != "1653925050608" {
t.Fatalf("unexpected serial for gpu1: %q", gpus[1].SerialNumber)
}
}
func TestApplyGPUSerialNumbers_FromSummaryFallback(t *testing.T) {
content := `
NVIDIA GPU Details | NVIDIA H200, 570.172.08, 143771 MiB, 96.00.D0.00.03, 00000000:18:00.0, 1653925025827
| NVIDIA H200, 570.172.08, 143771 MiB, 96.00.D0.00.03, 00000000:2A:00.0, 1653925050608
NVIDIA Switch Details | No devices matching query 'Quantum'
`
gpus := []models.GPU{
{BDF: "0000:18:00.0"},
{BDF: "0000:2a:00.0"},
}
applyGPUSerialNumbers(content, gpus)
if gpus[0].SerialNumber != "1653925025827" {
t.Fatalf("unexpected serial for gpu0: %q", gpus[0].SerialNumber)
}
if gpus[1].SerialNumber != "1653925050608" {
t.Fatalf("unexpected serial for gpu1: %q", gpus[1].SerialNumber)
}
}