v1.3.0: Add multiple vendor parsers and enhanced hardware detection
New parsers: - NVIDIA Field Diagnostics parser with dmidecode output support - NVIDIA Bug Report parser with comprehensive hardware extraction - Supermicro crashdump (CDump.txt) parser - Generic fallback parser for unrecognized text files Enhanced GPU parsing (nvidia-bug-report): - Model and manufacturer detection (NVIDIA H100 80GB HBM3) - UUID, Video BIOS version, IRQ information - Bus location (BDF), DMA size/mask, device minor - PCIe bus type details New hardware detection (nvidia-bug-report): - System Information: server S/N, UUID, manufacturer, product name - CPU: model, S/N, cores, threads, frequencies from dmidecode - Memory: P/N, S/N, manufacturer, speed for all DIMMs - Power Supplies: manufacturer, model, S/N, wattage, status - Network Adapters: Ethernet/InfiniBand controllers with VPD data - Model, P/N, S/N from lspci Vital Product Data - Port count/type detection (QSFP56, OSFP, etc.) - Support for ConnectX-6/7 adapters Archive handling improvements: - Plain .gz file support (not just tar.gz) - Increased size limit for plain gzip files (50MB) - Better error handling for mixed archive formats Web interface enhancements: - Display parser name and filename badges - Improved file info section with visual indicators Co-Authored-By: Claude (qwen3-coder:480b) <noreply@anthropic.com>
This commit is contained in:
152
internal/parser/vendors/nvidia/summary.go
vendored
Normal file
152
internal/parser/vendors/nvidia/summary.go
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
// SummaryEntry represents a single test result entry
|
||||
type SummaryEntry struct {
|
||||
ErrorCode string `json:"Error Code"`
|
||||
Test string `json:"Test"`
|
||||
ComponentID string `json:"Component ID"`
|
||||
Notes string `json:"Notes"`
|
||||
VirtualID string `json:"Virtual ID"`
|
||||
IgnoreError string `json:"Ignore Error"`
|
||||
}
|
||||
|
||||
// ParseSummaryJSON parses summary.json file and returns events
|
||||
func ParseSummaryJSON(content []byte) []models.Event {
|
||||
var entries []SummaryEntry
|
||||
if err := json.Unmarshal(content, &entries); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
events := make([]models.Event, 0)
|
||||
timestamp := time.Now() // Use current time as we don't have exact timestamps in summary
|
||||
|
||||
for _, entry := range entries {
|
||||
// Only create events for failures or warnings
|
||||
if entry.Notes != "OK" || entry.ErrorCode != "001-000-1-000000000000" {
|
||||
event := models.Event{
|
||||
Timestamp: timestamp,
|
||||
Source: "GPU Field Diagnostics",
|
||||
EventType: entry.Test,
|
||||
Description: formatSummaryDescription(entry),
|
||||
Severity: getSeverityFromErrorCode(entry.ErrorCode, entry.Notes),
|
||||
RawData: fmt.Sprintf("Test: %s, Component: %s, Error: %s", entry.Test, entry.ComponentID, entry.ErrorCode),
|
||||
}
|
||||
events = append(events, event)
|
||||
}
|
||||
}
|
||||
|
||||
return events
|
||||
}
|
||||
|
||||
// ParseSummaryCSV parses summary.csv file and returns events
|
||||
func ParseSummaryCSV(content []byte) []models.Event {
|
||||
reader := csv.NewReader(strings.NewReader(string(content)))
|
||||
records, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
events := make([]models.Event, 0)
|
||||
timestamp := time.Now()
|
||||
|
||||
// Skip header row
|
||||
for i, record := range records {
|
||||
if i == 0 {
|
||||
continue // Skip header
|
||||
}
|
||||
|
||||
// CSV format: ErrorCode,Test,VirtualID,SubTest,Type,ComponentID,Notes,Level,,,IgnoreError
|
||||
if len(record) < 7 {
|
||||
continue
|
||||
}
|
||||
|
||||
errorCode := record[0]
|
||||
test := record[1]
|
||||
componentID := record[5]
|
||||
notes := record[6]
|
||||
|
||||
// Only create events for failures or warnings
|
||||
if notes != "OK" || (errorCode != "0" && !strings.HasPrefix(errorCode, "048-000-0") && !strings.HasPrefix(errorCode, "001-000-1")) {
|
||||
event := models.Event{
|
||||
Timestamp: timestamp,
|
||||
Source: "GPU Field Diagnostics",
|
||||
EventType: test,
|
||||
Description: formatCSVDescription(test, componentID, notes, errorCode),
|
||||
Severity: getSeverityFromErrorCode(errorCode, notes),
|
||||
RawData: fmt.Sprintf("Test: %s, Component: %s, Error: %s", test, componentID, errorCode),
|
||||
}
|
||||
events = append(events, event)
|
||||
}
|
||||
}
|
||||
|
||||
return events
|
||||
}
|
||||
|
||||
// formatSummaryDescription creates a human-readable description from summary entry
|
||||
func formatSummaryDescription(entry SummaryEntry) string {
|
||||
component := entry.ComponentID
|
||||
if component == "" {
|
||||
component = entry.VirtualID
|
||||
}
|
||||
|
||||
if entry.Notes == "OK" {
|
||||
return fmt.Sprintf("%s test passed for %s", entry.Test, component)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s test failed for %s: %s (Error: %s)", entry.Test, component, entry.Notes, entry.ErrorCode)
|
||||
}
|
||||
|
||||
// formatCSVDescription creates a human-readable description from CSV record
|
||||
func formatCSVDescription(test, component, notes, errorCode string) string {
|
||||
if notes == "OK" {
|
||||
return fmt.Sprintf("%s test passed for %s", test, component)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s test failed for %s: %s (Error: %s)", test, component, notes, errorCode)
|
||||
}
|
||||
|
||||
// getSeverityFromErrorCode determines severity based on error code and notes
|
||||
func getSeverityFromErrorCode(errorCode, notes string) models.Severity {
|
||||
// Parse error code format: XXX-YYY-Z-ZZZZZZZZZZZZ
|
||||
// First digit indicates severity in some cases
|
||||
|
||||
if notes == "OK" {
|
||||
return models.SeverityInfo
|
||||
}
|
||||
|
||||
// Row remapping failed is a warning
|
||||
if strings.Contains(notes, "Row remapping failed") {
|
||||
return models.SeverityWarning
|
||||
}
|
||||
|
||||
// Check error code
|
||||
if errorCode == "" || errorCode == "0" {
|
||||
return models.SeverityInfo
|
||||
}
|
||||
|
||||
// Codes starting with 0 are typically informational
|
||||
if strings.HasPrefix(errorCode, "001-000-1") || strings.HasPrefix(errorCode, "048-000-0") {
|
||||
return models.SeverityInfo
|
||||
}
|
||||
|
||||
// Non-zero error codes are typically warnings or errors
|
||||
// If code is in 300+ range, it's likely an error
|
||||
if len(errorCode) > 2 {
|
||||
firstDigits := errorCode[:3]
|
||||
if firstDigits >= "300" {
|
||||
return models.SeverityCritical
|
||||
}
|
||||
}
|
||||
|
||||
return models.SeverityWarning
|
||||
}
|
||||
Reference in New Issue
Block a user