Files
logpile/internal/parser/vendors/nvidia_bug_report/gpu.go

308 lines
7.1 KiB
Go

package nvidia_bug_report
import (
"bufio"
"regexp"
"strconv"
"strings"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
)
// parseGPUInfo extracts GPU information from the bug report
func parseGPUInfo(content string, result *models.AnalysisResult) {
scanner := bufio.NewScanner(strings.NewReader(content))
var currentGPU *models.GPU
inGPUInfo := false
for scanner.Scan() {
line := scanner.Text()
// Look for GPU information section markers (but skip ls listings)
if strings.Contains(line, "/proc/driver/nvidia") && strings.Contains(line, "/gpus/") &&
strings.Contains(line, "/information") && !strings.Contains(line, "ls:") {
// Extract PCI address
re := regexp.MustCompile(`/gpus/([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.[\da-f])`)
matches := re.FindStringSubmatch(line)
if len(matches) > 1 {
pciAddr := matches[1]
// Save previous GPU if exists
if currentGPU != nil {
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
}
// Start new GPU entry
currentGPU = &models.GPU{
BDF: pciAddr,
Manufacturer: "NVIDIA",
}
inGPUInfo = true
continue
}
}
// End of GPU info section (separator line or new section, but not ls lines)
if inGPUInfo && (strings.HasPrefix(line, "___") || (strings.HasPrefix(line, "***") && !strings.Contains(line, "ls:"))) {
inGPUInfo = false
continue
}
// Parse GPU fields within information section
if inGPUInfo && currentGPU != nil && strings.Contains(line, ":") {
// Split on first colon and trim whitespace/tabs
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
field := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
if value == "" {
continue
}
switch field {
case "Model":
currentGPU.Model = value
case "IRQ":
if irq, err := strconv.Atoi(value); err == nil {
currentGPU.IRQ = irq
}
case "GPU UUID":
currentGPU.UUID = value
case "Video BIOS":
currentGPU.VideoBIOS = value
case "Bus Type":
currentGPU.BusType = value
case "DMA Size":
currentGPU.DMASize = value
case "DMA Mask":
currentGPU.DMAMask = value
case "Bus Location":
// BDF already set from path, but verify consistency
if currentGPU.BDF != value {
// Use the value from the information section as it's more explicit
currentGPU.BDF = value
}
case "Device Minor":
if minor, err := strconv.Atoi(value); err == nil {
currentGPU.DeviceMinor = minor
}
case "GPU Excluded":
// Store as status if "Yes"
if strings.ToLower(value) == "yes" {
currentGPU.Status = "Excluded"
}
}
}
}
// Save last GPU if exists
if currentGPU != nil {
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
}
applyGPUSerialNumbers(content, result.Hardware.GPUs)
// Create event for GPU summary
if len(result.Hardware.GPUs) > 0 {
result.Events = append(result.Events, models.Event{
Timestamp: time.Now(),
Source: "NVIDIA Driver",
EventType: "GPU Detection",
Description: "NVIDIA GPUs detected",
Severity: models.SeverityInfo,
RawData: formatGPUSummary(result.Hardware.GPUs),
})
}
}
// parseDriverVersion extracts NVIDIA driver version
func parseDriverVersion(content string, result *models.AnalysisResult) {
scanner := bufio.NewScanner(strings.NewReader(content))
for scanner.Scan() {
line := scanner.Text()
// Look for NVRM version line
if strings.Contains(line, "NVRM version:") {
// Extract version info
parts := strings.Split(line, "NVRM version:")
if len(parts) > 1 {
version := strings.TrimSpace(parts[1])
result.Events = append(result.Events, models.Event{
Timestamp: time.Now(),
Source: "NVIDIA Driver",
EventType: "Driver Version",
Description: "NVIDIA driver version detected",
Severity: models.SeverityInfo,
RawData: version,
})
break
}
}
}
}
// formatGPUSummary creates a summary string for GPUs
func formatGPUSummary(gpus []models.GPU) string {
if len(gpus) == 0 {
return ""
}
var summary strings.Builder
for i, gpu := range gpus {
if i > 0 {
summary.WriteString("; ")
}
summary.WriteString(gpu.BDF)
if gpu.Model != "" {
summary.WriteString(" (")
summary.WriteString(gpu.Model)
summary.WriteString(")")
}
}
return summary.String()
}
func applyGPUSerialNumbers(content string, gpus []models.GPU) {
if len(gpus) == 0 {
return
}
serialByBDF := parseGPUSerialsFromNvidiaSMI(content)
if len(serialByBDF) == 0 {
serialByBDF = parseGPUSerialsFromSummary(content)
}
if len(serialByBDF) == 0 {
return
}
for i := range gpus {
bdf := normalizeGPUAddress(gpus[i].BDF)
if bdf == "" {
continue
}
if serial, ok := serialByBDF[bdf]; ok && serial != "" {
gpus[i].SerialNumber = serial
}
}
}
func parseGPUSerialsFromNvidiaSMI(content string) map[string]string {
scanner := bufio.NewScanner(strings.NewReader(content))
reGPU := regexp.MustCompile(`^GPU\s+([0-9A-F]{8}:[0-9A-F]{2}:[0-9A-F]{2}\.[0-9A-F])$`)
serialByBDF := make(map[string]string)
currentBDF := ""
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if matches := reGPU.FindStringSubmatch(line); len(matches) == 2 {
currentBDF = normalizeGPUAddress(matches[1])
continue
}
if currentBDF == "" {
continue
}
if strings.HasPrefix(line, "Serial Number") {
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
serial := strings.TrimSpace(parts[1])
if serial != "" && !strings.EqualFold(serial, "N/A") {
serialByBDF[currentBDF] = serial
}
}
}
return serialByBDF
}
func parseGPUSerialsFromSummary(content string) map[string]string {
scanner := bufio.NewScanner(strings.NewReader(content))
serialByBDF := make(map[string]string)
inGPUDetails := false
for scanner.Scan() {
line := scanner.Text()
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "NVIDIA GPU Details") {
inGPUDetails = true
}
if !inGPUDetails {
continue
}
if strings.HasPrefix(trimmed, "NVIDIA Switch Details") {
break
}
parts := strings.Split(line, "|")
if len(parts) < 2 {
continue
}
payload := strings.TrimSpace(parts[len(parts)-1])
if payload == "" {
continue
}
fields := strings.Split(payload, ",")
if len(fields) < 6 {
continue
}
bdf := normalizeGPUAddress(strings.TrimSpace(fields[4]))
serial := strings.TrimSpace(fields[5])
if bdf == "" || serial == "" || strings.EqualFold(serial, "N/A") {
continue
}
serialByBDF[bdf] = serial
}
return serialByBDF
}
func normalizeGPUAddress(addr string) string {
addr = strings.TrimSpace(addr)
if addr == "" {
return ""
}
parts := strings.Split(addr, ":")
if len(parts) != 3 {
return strings.ToLower(addr)
}
domain := parts[0]
bus := parts[1]
devFn := parts[2]
devFnParts := strings.Split(devFn, ".")
if len(devFnParts) != 2 {
return strings.ToLower(addr)
}
device := devFnParts[0]
fn := devFnParts[1]
if len(domain) == 8 {
domain = domain[4:]
}
return strings.ToLower(domain + ":" + bus + ":" + device + "." + fn)
}