Files
logpile/internal/parser/vendors/nvidia/parser.go
Mikhail Chusavitin 70cd541d9e v1.3.0: Add multiple vendor parsers and enhanced hardware detection
New parsers:
- NVIDIA Field Diagnostics parser with dmidecode output support
- NVIDIA Bug Report parser with comprehensive hardware extraction
- Supermicro crashdump (CDump.txt) parser
- Generic fallback parser for unrecognized text files

Enhanced GPU parsing (nvidia-bug-report):
- Model and manufacturer detection (NVIDIA H100 80GB HBM3)
- UUID, Video BIOS version, IRQ information
- Bus location (BDF), DMA size/mask, device minor
- PCIe bus type details

New hardware detection (nvidia-bug-report):
- System Information: server S/N, UUID, manufacturer, product name
- CPU: model, S/N, cores, threads, frequencies from dmidecode
- Memory: P/N, S/N, manufacturer, speed for all DIMMs
- Power Supplies: manufacturer, model, S/N, wattage, status
- Network Adapters: Ethernet/InfiniBand controllers with VPD data
  - Model, P/N, S/N from lspci Vital Product Data
  - Port count/type detection (QSFP56, OSFP, etc.)
  - Support for ConnectX-6/7 adapters

Archive handling improvements:
- Plain .gz file support (not just tar.gz)
- Increased size limit for plain gzip files (50MB)
- Better error handling for mixed archive formats

Web interface enhancements:
- Display parser name and filename badges
- Improved file info section with visual indicators

Co-Authored-By: Claude (qwen3-coder:480b) <noreply@anthropic.com>
2026-01-30 17:19:47 +03:00

167 lines
4.6 KiB
Go

// Package nvidia provides parser for NVIDIA Field Diagnostics archives
// Tested with: HGX Field Diag (works with various server vendors)
//
// IMPORTANT: Increment parserVersion when modifying parser logic!
// This helps track which version was used to parse specific logs.
package nvidia
import (
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
"git.mchus.pro/mchus/logpile/internal/parser"
)
// parserVersion - version of this parser module
// IMPORTANT: Increment this version when making changes to parser logic!
const parserVersion = "1.1.0"
func init() {
parser.Register(&Parser{})
}
// Parser implements VendorParser for NVIDIA Field Diagnostics
type Parser struct{}
// Name returns human-readable parser name
func (p *Parser) Name() string {
return "NVIDIA Field Diagnostics Parser"
}
// Vendor returns vendor identifier
func (p *Parser) Vendor() string {
return "nvidia"
}
// Version returns parser version
// IMPORTANT: Update parserVersion constant when modifying parser logic!
func (p *Parser) Version() string {
return parserVersion
}
// Detect checks if archive matches NVIDIA Field Diagnostics format
// Returns confidence 0-100
func (p *Parser) Detect(files []parser.ExtractedFile) int {
confidence := 0
for _, f := range files {
path := strings.ToLower(f.Path)
// Strong indicators for NVIDIA Field Diagnostics format
if strings.HasSuffix(path, "unified_summary.json") {
// Check if it's really NVIDIA Field Diag format
if containsNvidiaFieldDiagMarkers(f.Content) {
confidence += 40
}
}
if strings.HasSuffix(path, "summary.json") && !strings.Contains(path, "unified_") {
confidence += 20
}
if strings.HasSuffix(path, "summary.csv") {
confidence += 15
}
if strings.Contains(path, "gpu_fieldiag/") {
confidence += 15
}
if strings.HasSuffix(path, "output.log") {
// Check if it contains dmidecode output
if strings.Contains(string(f.Content), "dmidecode") ||
strings.Contains(string(f.Content), "System Information") {
confidence += 10
}
}
// Cap at 100
if confidence >= 100 {
return 100
}
}
return confidence
}
// containsNvidiaFieldDiagMarkers checks if content has NVIDIA Field Diag markers
func containsNvidiaFieldDiagMarkers(content []byte) bool {
s := string(content)
// Check for typical NVIDIA Field Diagnostics structure
return strings.Contains(s, "runInfo") &&
strings.Contains(s, "diagVersion") &&
strings.Contains(s, "HGX Field Diag")
}
// Parse parses NVIDIA Field Diagnostics archive
func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, error) {
result := &models.AnalysisResult{
Events: make([]models.Event, 0),
FRU: make([]models.FRUInfo, 0),
Sensors: make([]models.SensorReading, 0),
}
// Initialize hardware config
result.Hardware = &models.HardwareConfig{
GPUs: make([]models.GPU, 0),
}
// Parse output.log first (contains dmidecode system info)
// Find the output.log file that contains dmidecode output
outputLogFile := findDmidecodeOutputLog(files)
if outputLogFile != nil {
if err := ParseOutputLog(outputLogFile.Content, result); err != nil {
// Log error but continue parsing other files
_ = err // Ignore error for now
}
}
// Parse unified_summary.json (contains detailed component info)
if f := parser.FindFileByName(files, "unified_summary.json"); f != nil {
if err := ParseUnifiedSummary(f.Content, result); err != nil {
// Log error but continue parsing other files
_ = err // Ignore error for now
}
}
// Parse summary.json (test results summary)
if f := parser.FindFileByName(files, "summary.json"); f != nil {
events := ParseSummaryJSON(f.Content)
result.Events = append(result.Events, events...)
}
// Parse summary.csv (alternative format)
if f := parser.FindFileByName(files, "summary.csv"); f != nil {
csvEvents := ParseSummaryCSV(f.Content)
result.Events = append(result.Events, csvEvents...)
}
// Parse GPU field diagnostics logs
gpuFieldiagFiles := parser.FindFileByPattern(files, "gpu_fieldiag/", ".log")
for _, f := range gpuFieldiagFiles {
// Parse individual GPU diagnostic logs if needed
// For now, we focus on summary files
_ = f
}
return result, nil
}
// findDmidecodeOutputLog finds the output.log file that contains dmidecode output
func findDmidecodeOutputLog(files []parser.ExtractedFile) *parser.ExtractedFile {
for _, f := range files {
// Look for output.log files
if !strings.HasSuffix(strings.ToLower(f.Path), "output.log") {
continue
}
// Check if it contains dmidecode output
content := string(f.Content)
if strings.Contains(content, "dmidecode") &&
strings.Contains(content, "System Information") {
return &f
}
}
return nil
}