New parsers: - NVIDIA Field Diagnostics parser with dmidecode output support - NVIDIA Bug Report parser with comprehensive hardware extraction - Supermicro crashdump (CDump.txt) parser - Generic fallback parser for unrecognized text files Enhanced GPU parsing (nvidia-bug-report): - Model and manufacturer detection (NVIDIA H100 80GB HBM3) - UUID, Video BIOS version, IRQ information - Bus location (BDF), DMA size/mask, device minor - PCIe bus type details New hardware detection (nvidia-bug-report): - System Information: server S/N, UUID, manufacturer, product name - CPU: model, S/N, cores, threads, frequencies from dmidecode - Memory: P/N, S/N, manufacturer, speed for all DIMMs - Power Supplies: manufacturer, model, S/N, wattage, status - Network Adapters: Ethernet/InfiniBand controllers with VPD data - Model, P/N, S/N from lspci Vital Product Data - Port count/type detection (QSFP56, OSFP, etc.) - Support for ConnectX-6/7 adapters Archive handling improvements: - Plain .gz file support (not just tar.gz) - Increased size limit for plain gzip files (50MB) - Better error handling for mixed archive formats Web interface enhancements: - Display parser name and filename badges - Improved file info section with visual indicators Co-Authored-By: Claude (qwen3-coder:480b) <noreply@anthropic.com>
171 lines
4.3 KiB
Go
171 lines
4.3 KiB
Go
package nvidia_bug_report
|
|
|
|
import (
|
|
"bufio"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.mchus.pro/mchus/logpile/internal/models"
|
|
)
|
|
|
|
// parseGPUInfo extracts GPU information from the bug report
|
|
func parseGPUInfo(content string, result *models.AnalysisResult) {
|
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
|
|
|
var currentGPU *models.GPU
|
|
inGPUInfo := false
|
|
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
|
|
// Look for GPU information section markers (but skip ls listings)
|
|
if strings.Contains(line, "/proc/driver/nvidia") && strings.Contains(line, "/gpus/") &&
|
|
strings.Contains(line, "/information") && !strings.Contains(line, "ls:") {
|
|
// Extract PCI address
|
|
re := regexp.MustCompile(`/gpus/([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.[\da-f])`)
|
|
matches := re.FindStringSubmatch(line)
|
|
if len(matches) > 1 {
|
|
pciAddr := matches[1]
|
|
|
|
// Save previous GPU if exists
|
|
if currentGPU != nil {
|
|
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
|
|
}
|
|
|
|
// Start new GPU entry
|
|
currentGPU = &models.GPU{
|
|
BDF: pciAddr,
|
|
Manufacturer: "NVIDIA",
|
|
}
|
|
inGPUInfo = true
|
|
continue
|
|
}
|
|
}
|
|
|
|
// End of GPU info section (separator line or new section, but not ls lines)
|
|
if inGPUInfo && (strings.HasPrefix(line, "___") || (strings.HasPrefix(line, "***") && !strings.Contains(line, "ls:"))) {
|
|
inGPUInfo = false
|
|
continue
|
|
}
|
|
|
|
// Parse GPU fields within information section
|
|
if inGPUInfo && currentGPU != nil && strings.Contains(line, ":") {
|
|
// Split on first colon and trim whitespace/tabs
|
|
parts := strings.SplitN(line, ":", 2)
|
|
if len(parts) != 2 {
|
|
continue
|
|
}
|
|
|
|
field := strings.TrimSpace(parts[0])
|
|
value := strings.TrimSpace(parts[1])
|
|
|
|
if value == "" {
|
|
continue
|
|
}
|
|
|
|
switch field {
|
|
case "Model":
|
|
currentGPU.Model = value
|
|
case "IRQ":
|
|
if irq, err := strconv.Atoi(value); err == nil {
|
|
currentGPU.IRQ = irq
|
|
}
|
|
case "GPU UUID":
|
|
currentGPU.UUID = value
|
|
case "Video BIOS":
|
|
currentGPU.VideoBIOS = value
|
|
case "Bus Type":
|
|
currentGPU.BusType = value
|
|
case "DMA Size":
|
|
currentGPU.DMASize = value
|
|
case "DMA Mask":
|
|
currentGPU.DMAMask = value
|
|
case "Bus Location":
|
|
// BDF already set from path, but verify consistency
|
|
if currentGPU.BDF != value {
|
|
// Use the value from the information section as it's more explicit
|
|
currentGPU.BDF = value
|
|
}
|
|
case "Device Minor":
|
|
if minor, err := strconv.Atoi(value); err == nil {
|
|
currentGPU.DeviceMinor = minor
|
|
}
|
|
case "GPU Excluded":
|
|
// Store as status if "Yes"
|
|
if strings.ToLower(value) == "yes" {
|
|
currentGPU.Status = "Excluded"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Save last GPU if exists
|
|
if currentGPU != nil {
|
|
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
|
|
}
|
|
|
|
// Create event for GPU summary
|
|
if len(result.Hardware.GPUs) > 0 {
|
|
result.Events = append(result.Events, models.Event{
|
|
Timestamp: time.Now(),
|
|
Source: "NVIDIA Driver",
|
|
EventType: "GPU Detection",
|
|
Description: "NVIDIA GPUs detected",
|
|
Severity: models.SeverityInfo,
|
|
RawData: formatGPUSummary(result.Hardware.GPUs),
|
|
})
|
|
}
|
|
}
|
|
|
|
// parseDriverVersion extracts NVIDIA driver version
|
|
func parseDriverVersion(content string, result *models.AnalysisResult) {
|
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
|
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
|
|
// Look for NVRM version line
|
|
if strings.Contains(line, "NVRM version:") {
|
|
// Extract version info
|
|
parts := strings.Split(line, "NVRM version:")
|
|
if len(parts) > 1 {
|
|
version := strings.TrimSpace(parts[1])
|
|
|
|
result.Events = append(result.Events, models.Event{
|
|
Timestamp: time.Now(),
|
|
Source: "NVIDIA Driver",
|
|
EventType: "Driver Version",
|
|
Description: "NVIDIA driver version detected",
|
|
Severity: models.SeverityInfo,
|
|
RawData: version,
|
|
})
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// formatGPUSummary creates a summary string for GPUs
|
|
func formatGPUSummary(gpus []models.GPU) string {
|
|
if len(gpus) == 0 {
|
|
return ""
|
|
}
|
|
|
|
var summary strings.Builder
|
|
for i, gpu := range gpus {
|
|
if i > 0 {
|
|
summary.WriteString("; ")
|
|
}
|
|
summary.WriteString(gpu.BDF)
|
|
if gpu.Model != "" {
|
|
summary.WriteString(" (")
|
|
summary.WriteString(gpu.Model)
|
|
summary.WriteString(")")
|
|
}
|
|
}
|
|
|
|
return summary.String()
|
|
}
|