New parsers: - NVIDIA Field Diagnostics parser with dmidecode output support - NVIDIA Bug Report parser with comprehensive hardware extraction - Supermicro crashdump (CDump.txt) parser - Generic fallback parser for unrecognized text files Enhanced GPU parsing (nvidia-bug-report): - Model and manufacturer detection (NVIDIA H100 80GB HBM3) - UUID, Video BIOS version, IRQ information - Bus location (BDF), DMA size/mask, device minor - PCIe bus type details New hardware detection (nvidia-bug-report): - System Information: server S/N, UUID, manufacturer, product name - CPU: model, S/N, cores, threads, frequencies from dmidecode - Memory: P/N, S/N, manufacturer, speed for all DIMMs - Power Supplies: manufacturer, model, S/N, wattage, status - Network Adapters: Ethernet/InfiniBand controllers with VPD data - Model, P/N, S/N from lspci Vital Product Data - Port count/type detection (QSFP56, OSFP, etc.) - Support for ConnectX-6/7 adapters Archive handling improvements: - Plain .gz file support (not just tar.gz) - Increased size limit for plain gzip files (50MB) - Better error handling for mixed archive formats Web interface enhancements: - Display parser name and filename badges - Improved file info section with visual indicators Co-Authored-By: Claude (qwen3-coder:480b) <noreply@anthropic.com>
282 lines
7.4 KiB
Go
282 lines
7.4 KiB
Go
package nvidia
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"git.mchus.pro/mchus/logpile/internal/models"
|
|
)
|
|
|
|
// UnifiedSummaryData represents the structure of unified_summary.json
|
|
type UnifiedSummaryData struct {
|
|
RunInfo RunInfo `json:"runInfo"`
|
|
Tests []Test `json:"tests"`
|
|
}
|
|
|
|
// RunInfo contains information about the diagnostic run
|
|
type RunInfo struct {
|
|
TimeInfo struct {
|
|
StartTime string `json:"startTime"`
|
|
EndTime string `json:"endTime"`
|
|
TotalDuration string `json:"totalDuration"`
|
|
} `json:"timeInfo"`
|
|
DiagVersion string `json:"diagVersion"`
|
|
BaseVersion string `json:"baseVersion"`
|
|
FinalResult string `json:"finalResult"`
|
|
ErrorCode int `json:"errorCode"`
|
|
DiagName string `json:"diagName"`
|
|
RunLevel string `json:"runLevel"`
|
|
}
|
|
|
|
// Test represents a diagnostic test
|
|
type Test struct {
|
|
VirtualID string `json:"virtualId"`
|
|
Action string `json:"action"`
|
|
StartTime string `json:"startTime"`
|
|
EndTime string `json:"endTime"`
|
|
Components []Component `json:"components"`
|
|
}
|
|
|
|
// Component represents a hardware component
|
|
type Component struct {
|
|
ComponentID string `json:"componentId"`
|
|
ErrorCode string `json:"errorCode"`
|
|
Notes string `json:"notes"`
|
|
Result string `json:"result"`
|
|
Properties []Property `json:"properties"`
|
|
}
|
|
|
|
// Property represents a component property
|
|
type Property struct {
|
|
ID string `json:"id"`
|
|
Value interface{} `json:"value"` // Can be string or number
|
|
}
|
|
|
|
// GetValueAsString returns the value as a string
|
|
func (p *Property) GetValueAsString() string {
|
|
switch v := p.Value.(type) {
|
|
case string:
|
|
return v
|
|
case float64:
|
|
return fmt.Sprintf("%.0f", v)
|
|
case int:
|
|
return fmt.Sprintf("%d", v)
|
|
default:
|
|
return fmt.Sprintf("%v", v)
|
|
}
|
|
}
|
|
|
|
// ParseUnifiedSummary parses unified_summary.json file
|
|
func ParseUnifiedSummary(content []byte, result *models.AnalysisResult) error {
|
|
var data UnifiedSummaryData
|
|
if err := json.Unmarshal(content, &data); err != nil {
|
|
return fmt.Errorf("failed to parse unified_summary.json: %w", err)
|
|
}
|
|
|
|
// Set default board info only if not already set (from output.log)
|
|
if result.Hardware.BoardInfo.ProductName == "" {
|
|
result.Hardware.BoardInfo.ProductName = "GPU Server (Field Diag)"
|
|
}
|
|
|
|
// Parse inventory test for hardware details
|
|
for _, test := range data.Tests {
|
|
if test.VirtualID == "inventory" || test.Action == "inventory" {
|
|
parseInventoryComponents(test.Components, result)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// parseInventoryComponents extracts hardware info from inventory test
|
|
func parseInventoryComponents(components []Component, result *models.AnalysisResult) {
|
|
for _, comp := range components {
|
|
// Parse system/board information
|
|
if parseSystemInfo(comp, result) {
|
|
// System info was found and parsed
|
|
continue
|
|
}
|
|
|
|
// Parse GPU components
|
|
if strings.HasPrefix(comp.ComponentID, "GPUSXM") {
|
|
gpu := parseGPUComponent(comp)
|
|
if gpu != nil {
|
|
result.Hardware.GPUs = append(result.Hardware.GPUs, *gpu)
|
|
}
|
|
}
|
|
|
|
// Parse NVSwitch components
|
|
if strings.HasPrefix(comp.ComponentID, "NVSWITCHNVSWITCH") {
|
|
nvswitch := parseNVSwitchComponent(comp)
|
|
if nvswitch != nil {
|
|
// Add as PCIe device for now
|
|
result.Hardware.PCIeDevices = append(result.Hardware.PCIeDevices, *nvswitch)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// parseSystemInfo extracts system/board information from a component
|
|
// Returns true if this component contains system info
|
|
func parseSystemInfo(comp Component, result *models.AnalysisResult) bool {
|
|
compID := strings.ToUpper(comp.ComponentID)
|
|
|
|
// Check if this is a system/board component
|
|
isSystemComponent := strings.Contains(compID, "BASEBOARD") ||
|
|
strings.Contains(compID, "SYSTEM") ||
|
|
strings.Contains(compID, "MOTHERBOARD") ||
|
|
strings.Contains(compID, "BOARD") ||
|
|
comp.ComponentID == "Inventory"
|
|
|
|
if !isSystemComponent {
|
|
return false
|
|
}
|
|
|
|
// Extract system properties
|
|
for _, prop := range comp.Properties {
|
|
propID := prop.ID
|
|
value := prop.GetValueAsString()
|
|
|
|
if value == "" {
|
|
continue
|
|
}
|
|
|
|
switch propID {
|
|
case "Manufacturer", "BoardManufacturer", "SystemManufacturer":
|
|
// Only set if not already populated (e.g., from output.log)
|
|
if result.Hardware.BoardInfo.Manufacturer == "" {
|
|
result.Hardware.BoardInfo.Manufacturer = value
|
|
}
|
|
case "ProductName", "Product", "Model", "ModelName", "BoardProduct", "SystemProduct":
|
|
// Don't overwrite real data from output.log with generic data
|
|
// Only set if empty or still has the default placeholder value
|
|
if result.Hardware.BoardInfo.ProductName == "" ||
|
|
result.Hardware.BoardInfo.ProductName == "GPU Server (Field Diag)" {
|
|
result.Hardware.BoardInfo.ProductName = value
|
|
}
|
|
case "SerialNumber", "Serial", "BoardSerial", "SystemSerial":
|
|
// Only set if not already populated (e.g., from output.log)
|
|
if result.Hardware.BoardInfo.SerialNumber == "" {
|
|
result.Hardware.BoardInfo.SerialNumber = value
|
|
}
|
|
case "PartNumber", "BoardPartNumber":
|
|
// Only set if not already populated
|
|
if result.Hardware.BoardInfo.PartNumber == "" {
|
|
result.Hardware.BoardInfo.PartNumber = value
|
|
}
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// parseGPUComponent parses GPU component information
|
|
func parseGPUComponent(comp Component) *models.GPU {
|
|
gpu := &models.GPU{
|
|
Slot: comp.ComponentID, // e.g., "GPUSXM1"
|
|
}
|
|
|
|
var deviceID, vbios, pciID string
|
|
|
|
for _, prop := range comp.Properties {
|
|
switch prop.ID {
|
|
case "DeviceID":
|
|
deviceID = prop.GetValueAsString()
|
|
case "Vendor":
|
|
gpu.Manufacturer = prop.GetValueAsString()
|
|
case "DeviceName":
|
|
gpu.Model = prop.GetValueAsString()
|
|
case "VBIOS_version":
|
|
vbios = prop.GetValueAsString()
|
|
case "PCIID":
|
|
pciID = prop.GetValueAsString()
|
|
}
|
|
}
|
|
|
|
// Build model string from vendor/device IDs
|
|
if gpu.Model == "" || strings.Contains(gpu.Model, "Device") {
|
|
if deviceID != "" {
|
|
gpu.Model = fmt.Sprintf("NVIDIA Device %s", strings.ToUpper(deviceID))
|
|
}
|
|
}
|
|
|
|
// Add firmware info
|
|
if vbios != "" {
|
|
gpu.Firmware = vbios
|
|
}
|
|
|
|
// Add PCI info
|
|
if pciID != "" {
|
|
gpu.BDF = pciID
|
|
}
|
|
|
|
return gpu
|
|
}
|
|
|
|
// parseNVSwitchComponent parses NVSwitch component information
|
|
func parseNVSwitchComponent(comp Component) *models.PCIeDevice {
|
|
device := &models.PCIeDevice{
|
|
Slot: comp.ComponentID, // e.g., "NVSWITCHNVSWITCH0"
|
|
}
|
|
|
|
var vendorIDStr, deviceIDStr, vbios, pciID string
|
|
var pciSpeedStr, pciWidthStr string
|
|
var vendor string
|
|
|
|
for _, prop := range comp.Properties {
|
|
switch prop.ID {
|
|
case "VendorID":
|
|
vendorIDStr = prop.GetValueAsString()
|
|
case "DeviceID":
|
|
deviceIDStr = prop.GetValueAsString()
|
|
case "Vendor":
|
|
vendor = prop.GetValueAsString()
|
|
case "VBIOS_version":
|
|
vbios = prop.GetValueAsString()
|
|
case "InfoROM_version":
|
|
// Store in part number field as we don't have a better place
|
|
case "PCIID":
|
|
pciID = prop.GetValueAsString()
|
|
device.BDF = pciID
|
|
case "PCISpeed":
|
|
pciSpeedStr = prop.GetValueAsString()
|
|
device.LinkSpeed = pciSpeedStr
|
|
device.MaxLinkSpeed = pciSpeedStr
|
|
case "PCIWidth":
|
|
pciWidthStr = prop.GetValueAsString()
|
|
}
|
|
}
|
|
|
|
// Parse vendor ID
|
|
if vendorIDStr != "" {
|
|
fmt.Sscanf(vendorIDStr, "%x", &device.VendorID)
|
|
}
|
|
|
|
// Parse device ID
|
|
if deviceIDStr != "" {
|
|
fmt.Sscanf(deviceIDStr, "%x", &device.DeviceID)
|
|
}
|
|
|
|
// Set manufacturer
|
|
if vendor != "" {
|
|
device.Manufacturer = vendor
|
|
}
|
|
|
|
// Set device class
|
|
device.DeviceClass = "NVSwitch"
|
|
|
|
// Parse link width
|
|
if pciWidthStr != "" {
|
|
fmt.Sscanf(pciWidthStr, "x%d", &device.LinkWidth)
|
|
device.MaxLinkWidth = device.LinkWidth
|
|
}
|
|
|
|
// Store part number (use for firmware version)
|
|
if vbios != "" {
|
|
device.PartNumber = vbios
|
|
}
|
|
|
|
return device
|
|
}
|