Files
logpile/internal/parser/vendors/nvidia_bug_report/gpu.go
Mikhail Chusavitin 70cd541d9e v1.3.0: Add multiple vendor parsers and enhanced hardware detection
New parsers:
- NVIDIA Field Diagnostics parser with dmidecode output support
- NVIDIA Bug Report parser with comprehensive hardware extraction
- Supermicro crashdump (CDump.txt) parser
- Generic fallback parser for unrecognized text files

Enhanced GPU parsing (nvidia-bug-report):
- Model and manufacturer detection (NVIDIA H100 80GB HBM3)
- UUID, Video BIOS version, IRQ information
- Bus location (BDF), DMA size/mask, device minor
- PCIe bus type details

New hardware detection (nvidia-bug-report):
- System Information: server S/N, UUID, manufacturer, product name
- CPU: model, S/N, cores, threads, frequencies from dmidecode
- Memory: P/N, S/N, manufacturer, speed for all DIMMs
- Power Supplies: manufacturer, model, S/N, wattage, status
- Network Adapters: Ethernet/InfiniBand controllers with VPD data
  - Model, P/N, S/N from lspci Vital Product Data
  - Port count/type detection (QSFP56, OSFP, etc.)
  - Support for ConnectX-6/7 adapters

Archive handling improvements:
- Plain .gz file support (not just tar.gz)
- Increased size limit for plain gzip files (50MB)
- Better error handling for mixed archive formats

Web interface enhancements:
- Display parser name and filename badges
- Improved file info section with visual indicators

Co-Authored-By: Claude (qwen3-coder:480b) <noreply@anthropic.com>
2026-01-30 17:19:47 +03:00

171 lines
4.3 KiB
Go

package nvidia_bug_report
import (
"bufio"
"regexp"
"strconv"
"strings"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
)
// parseGPUInfo extracts GPU information from the bug report
func parseGPUInfo(content string, result *models.AnalysisResult) {
scanner := bufio.NewScanner(strings.NewReader(content))
var currentGPU *models.GPU
inGPUInfo := false
for scanner.Scan() {
line := scanner.Text()
// Look for GPU information section markers (but skip ls listings)
if strings.Contains(line, "/proc/driver/nvidia") && strings.Contains(line, "/gpus/") &&
strings.Contains(line, "/information") && !strings.Contains(line, "ls:") {
// Extract PCI address
re := regexp.MustCompile(`/gpus/([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.[\da-f])`)
matches := re.FindStringSubmatch(line)
if len(matches) > 1 {
pciAddr := matches[1]
// Save previous GPU if exists
if currentGPU != nil {
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
}
// Start new GPU entry
currentGPU = &models.GPU{
BDF: pciAddr,
Manufacturer: "NVIDIA",
}
inGPUInfo = true
continue
}
}
// End of GPU info section (separator line or new section, but not ls lines)
if inGPUInfo && (strings.HasPrefix(line, "___") || (strings.HasPrefix(line, "***") && !strings.Contains(line, "ls:"))) {
inGPUInfo = false
continue
}
// Parse GPU fields within information section
if inGPUInfo && currentGPU != nil && strings.Contains(line, ":") {
// Split on first colon and trim whitespace/tabs
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
field := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
if value == "" {
continue
}
switch field {
case "Model":
currentGPU.Model = value
case "IRQ":
if irq, err := strconv.Atoi(value); err == nil {
currentGPU.IRQ = irq
}
case "GPU UUID":
currentGPU.UUID = value
case "Video BIOS":
currentGPU.VideoBIOS = value
case "Bus Type":
currentGPU.BusType = value
case "DMA Size":
currentGPU.DMASize = value
case "DMA Mask":
currentGPU.DMAMask = value
case "Bus Location":
// BDF already set from path, but verify consistency
if currentGPU.BDF != value {
// Use the value from the information section as it's more explicit
currentGPU.BDF = value
}
case "Device Minor":
if minor, err := strconv.Atoi(value); err == nil {
currentGPU.DeviceMinor = minor
}
case "GPU Excluded":
// Store as status if "Yes"
if strings.ToLower(value) == "yes" {
currentGPU.Status = "Excluded"
}
}
}
}
// Save last GPU if exists
if currentGPU != nil {
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
}
// Create event for GPU summary
if len(result.Hardware.GPUs) > 0 {
result.Events = append(result.Events, models.Event{
Timestamp: time.Now(),
Source: "NVIDIA Driver",
EventType: "GPU Detection",
Description: "NVIDIA GPUs detected",
Severity: models.SeverityInfo,
RawData: formatGPUSummary(result.Hardware.GPUs),
})
}
}
// parseDriverVersion extracts NVIDIA driver version
func parseDriverVersion(content string, result *models.AnalysisResult) {
scanner := bufio.NewScanner(strings.NewReader(content))
for scanner.Scan() {
line := scanner.Text()
// Look for NVRM version line
if strings.Contains(line, "NVRM version:") {
// Extract version info
parts := strings.Split(line, "NVRM version:")
if len(parts) > 1 {
version := strings.TrimSpace(parts[1])
result.Events = append(result.Events, models.Event{
Timestamp: time.Now(),
Source: "NVIDIA Driver",
EventType: "Driver Version",
Description: "NVIDIA driver version detected",
Severity: models.SeverityInfo,
RawData: version,
})
break
}
}
}
}
// formatGPUSummary creates a summary string for GPUs
func formatGPUSummary(gpus []models.GPU) string {
if len(gpus) == 0 {
return ""
}
var summary strings.Builder
for i, gpu := range gpus {
if i > 0 {
summary.WriteString("; ")
}
summary.WriteString(gpu.BDF)
if gpu.Model != "" {
summary.WriteString(" (")
summary.WriteString(gpu.Model)
summary.WriteString(")")
}
}
return summary.String()
}