Files
logpile/internal/parser/vendors/nvidia/unified_summary.go
Mikhail Chusavitin 70cd541d9e v1.3.0: Add multiple vendor parsers and enhanced hardware detection
New parsers:
- NVIDIA Field Diagnostics parser with dmidecode output support
- NVIDIA Bug Report parser with comprehensive hardware extraction
- Supermicro crashdump (CDump.txt) parser
- Generic fallback parser for unrecognized text files

Enhanced GPU parsing (nvidia-bug-report):
- Model and manufacturer detection (NVIDIA H100 80GB HBM3)
- UUID, Video BIOS version, IRQ information
- Bus location (BDF), DMA size/mask, device minor
- PCIe bus type details

New hardware detection (nvidia-bug-report):
- System Information: server S/N, UUID, manufacturer, product name
- CPU: model, S/N, cores, threads, frequencies from dmidecode
- Memory: P/N, S/N, manufacturer, speed for all DIMMs
- Power Supplies: manufacturer, model, S/N, wattage, status
- Network Adapters: Ethernet/InfiniBand controllers with VPD data
  - Model, P/N, S/N from lspci Vital Product Data
  - Port count/type detection (QSFP56, OSFP, etc.)
  - Support for ConnectX-6/7 adapters

Archive handling improvements:
- Plain .gz file support (not just tar.gz)
- Increased size limit for plain gzip files (50MB)
- Better error handling for mixed archive formats

Web interface enhancements:
- Display parser name and filename badges
- Improved file info section with visual indicators

Co-Authored-By: Claude (qwen3-coder:480b) <noreply@anthropic.com>
2026-01-30 17:19:47 +03:00

282 lines
7.4 KiB
Go

package nvidia
import (
"encoding/json"
"fmt"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
)
// UnifiedSummaryData represents the structure of unified_summary.json
type UnifiedSummaryData struct {
RunInfo RunInfo `json:"runInfo"`
Tests []Test `json:"tests"`
}
// RunInfo contains information about the diagnostic run
type RunInfo struct {
TimeInfo struct {
StartTime string `json:"startTime"`
EndTime string `json:"endTime"`
TotalDuration string `json:"totalDuration"`
} `json:"timeInfo"`
DiagVersion string `json:"diagVersion"`
BaseVersion string `json:"baseVersion"`
FinalResult string `json:"finalResult"`
ErrorCode int `json:"errorCode"`
DiagName string `json:"diagName"`
RunLevel string `json:"runLevel"`
}
// Test represents a diagnostic test
type Test struct {
VirtualID string `json:"virtualId"`
Action string `json:"action"`
StartTime string `json:"startTime"`
EndTime string `json:"endTime"`
Components []Component `json:"components"`
}
// Component represents a hardware component
type Component struct {
ComponentID string `json:"componentId"`
ErrorCode string `json:"errorCode"`
Notes string `json:"notes"`
Result string `json:"result"`
Properties []Property `json:"properties"`
}
// Property represents a component property
type Property struct {
ID string `json:"id"`
Value interface{} `json:"value"` // Can be string or number
}
// GetValueAsString returns the value as a string
func (p *Property) GetValueAsString() string {
switch v := p.Value.(type) {
case string:
return v
case float64:
return fmt.Sprintf("%.0f", v)
case int:
return fmt.Sprintf("%d", v)
default:
return fmt.Sprintf("%v", v)
}
}
// ParseUnifiedSummary parses unified_summary.json file
func ParseUnifiedSummary(content []byte, result *models.AnalysisResult) error {
var data UnifiedSummaryData
if err := json.Unmarshal(content, &data); err != nil {
return fmt.Errorf("failed to parse unified_summary.json: %w", err)
}
// Set default board info only if not already set (from output.log)
if result.Hardware.BoardInfo.ProductName == "" {
result.Hardware.BoardInfo.ProductName = "GPU Server (Field Diag)"
}
// Parse inventory test for hardware details
for _, test := range data.Tests {
if test.VirtualID == "inventory" || test.Action == "inventory" {
parseInventoryComponents(test.Components, result)
}
}
return nil
}
// parseInventoryComponents extracts hardware info from inventory test
func parseInventoryComponents(components []Component, result *models.AnalysisResult) {
for _, comp := range components {
// Parse system/board information
if parseSystemInfo(comp, result) {
// System info was found and parsed
continue
}
// Parse GPU components
if strings.HasPrefix(comp.ComponentID, "GPUSXM") {
gpu := parseGPUComponent(comp)
if gpu != nil {
result.Hardware.GPUs = append(result.Hardware.GPUs, *gpu)
}
}
// Parse NVSwitch components
if strings.HasPrefix(comp.ComponentID, "NVSWITCHNVSWITCH") {
nvswitch := parseNVSwitchComponent(comp)
if nvswitch != nil {
// Add as PCIe device for now
result.Hardware.PCIeDevices = append(result.Hardware.PCIeDevices, *nvswitch)
}
}
}
}
// parseSystemInfo extracts system/board information from a component
// Returns true if this component contains system info
func parseSystemInfo(comp Component, result *models.AnalysisResult) bool {
compID := strings.ToUpper(comp.ComponentID)
// Check if this is a system/board component
isSystemComponent := strings.Contains(compID, "BASEBOARD") ||
strings.Contains(compID, "SYSTEM") ||
strings.Contains(compID, "MOTHERBOARD") ||
strings.Contains(compID, "BOARD") ||
comp.ComponentID == "Inventory"
if !isSystemComponent {
return false
}
// Extract system properties
for _, prop := range comp.Properties {
propID := prop.ID
value := prop.GetValueAsString()
if value == "" {
continue
}
switch propID {
case "Manufacturer", "BoardManufacturer", "SystemManufacturer":
// Only set if not already populated (e.g., from output.log)
if result.Hardware.BoardInfo.Manufacturer == "" {
result.Hardware.BoardInfo.Manufacturer = value
}
case "ProductName", "Product", "Model", "ModelName", "BoardProduct", "SystemProduct":
// Don't overwrite real data from output.log with generic data
// Only set if empty or still has the default placeholder value
if result.Hardware.BoardInfo.ProductName == "" ||
result.Hardware.BoardInfo.ProductName == "GPU Server (Field Diag)" {
result.Hardware.BoardInfo.ProductName = value
}
case "SerialNumber", "Serial", "BoardSerial", "SystemSerial":
// Only set if not already populated (e.g., from output.log)
if result.Hardware.BoardInfo.SerialNumber == "" {
result.Hardware.BoardInfo.SerialNumber = value
}
case "PartNumber", "BoardPartNumber":
// Only set if not already populated
if result.Hardware.BoardInfo.PartNumber == "" {
result.Hardware.BoardInfo.PartNumber = value
}
}
}
return true
}
// parseGPUComponent parses GPU component information
func parseGPUComponent(comp Component) *models.GPU {
gpu := &models.GPU{
Slot: comp.ComponentID, // e.g., "GPUSXM1"
}
var deviceID, vbios, pciID string
for _, prop := range comp.Properties {
switch prop.ID {
case "DeviceID":
deviceID = prop.GetValueAsString()
case "Vendor":
gpu.Manufacturer = prop.GetValueAsString()
case "DeviceName":
gpu.Model = prop.GetValueAsString()
case "VBIOS_version":
vbios = prop.GetValueAsString()
case "PCIID":
pciID = prop.GetValueAsString()
}
}
// Build model string from vendor/device IDs
if gpu.Model == "" || strings.Contains(gpu.Model, "Device") {
if deviceID != "" {
gpu.Model = fmt.Sprintf("NVIDIA Device %s", strings.ToUpper(deviceID))
}
}
// Add firmware info
if vbios != "" {
gpu.Firmware = vbios
}
// Add PCI info
if pciID != "" {
gpu.BDF = pciID
}
return gpu
}
// parseNVSwitchComponent parses NVSwitch component information
func parseNVSwitchComponent(comp Component) *models.PCIeDevice {
device := &models.PCIeDevice{
Slot: comp.ComponentID, // e.g., "NVSWITCHNVSWITCH0"
}
var vendorIDStr, deviceIDStr, vbios, pciID string
var pciSpeedStr, pciWidthStr string
var vendor string
for _, prop := range comp.Properties {
switch prop.ID {
case "VendorID":
vendorIDStr = prop.GetValueAsString()
case "DeviceID":
deviceIDStr = prop.GetValueAsString()
case "Vendor":
vendor = prop.GetValueAsString()
case "VBIOS_version":
vbios = prop.GetValueAsString()
case "InfoROM_version":
// Store in part number field as we don't have a better place
case "PCIID":
pciID = prop.GetValueAsString()
device.BDF = pciID
case "PCISpeed":
pciSpeedStr = prop.GetValueAsString()
device.LinkSpeed = pciSpeedStr
device.MaxLinkSpeed = pciSpeedStr
case "PCIWidth":
pciWidthStr = prop.GetValueAsString()
}
}
// Parse vendor ID
if vendorIDStr != "" {
fmt.Sscanf(vendorIDStr, "%x", &device.VendorID)
}
// Parse device ID
if deviceIDStr != "" {
fmt.Sscanf(deviceIDStr, "%x", &device.DeviceID)
}
// Set manufacturer
if vendor != "" {
device.Manufacturer = vendor
}
// Set device class
device.DeviceClass = "NVSwitch"
// Parse link width
if pciWidthStr != "" {
fmt.Sscanf(pciWidthStr, "x%d", &device.LinkWidth)
device.MaxLinkWidth = device.LinkWidth
}
// Store part number (use for firmware version)
if vbios != "" {
device.PartNumber = vbios
}
return device
}