308 lines
7.1 KiB
Go
308 lines
7.1 KiB
Go
package nvidia_bug_report
|
|
|
|
import (
|
|
"bufio"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.mchus.pro/mchus/logpile/internal/models"
|
|
)
|
|
|
|
// parseGPUInfo extracts GPU information from the bug report
|
|
func parseGPUInfo(content string, result *models.AnalysisResult) {
|
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
|
|
|
var currentGPU *models.GPU
|
|
inGPUInfo := false
|
|
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
|
|
// Look for GPU information section markers (but skip ls listings)
|
|
if strings.Contains(line, "/proc/driver/nvidia") && strings.Contains(line, "/gpus/") &&
|
|
strings.Contains(line, "/information") && !strings.Contains(line, "ls:") {
|
|
// Extract PCI address
|
|
re := regexp.MustCompile(`/gpus/([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.[\da-f])`)
|
|
matches := re.FindStringSubmatch(line)
|
|
if len(matches) > 1 {
|
|
pciAddr := matches[1]
|
|
|
|
// Save previous GPU if exists
|
|
if currentGPU != nil {
|
|
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
|
|
}
|
|
|
|
// Start new GPU entry
|
|
currentGPU = &models.GPU{
|
|
BDF: pciAddr,
|
|
Manufacturer: "NVIDIA",
|
|
}
|
|
inGPUInfo = true
|
|
continue
|
|
}
|
|
}
|
|
|
|
// End of GPU info section (separator line or new section, but not ls lines)
|
|
if inGPUInfo && (strings.HasPrefix(line, "___") || (strings.HasPrefix(line, "***") && !strings.Contains(line, "ls:"))) {
|
|
inGPUInfo = false
|
|
continue
|
|
}
|
|
|
|
// Parse GPU fields within information section
|
|
if inGPUInfo && currentGPU != nil && strings.Contains(line, ":") {
|
|
// Split on first colon and trim whitespace/tabs
|
|
parts := strings.SplitN(line, ":", 2)
|
|
if len(parts) != 2 {
|
|
continue
|
|
}
|
|
|
|
field := strings.TrimSpace(parts[0])
|
|
value := strings.TrimSpace(parts[1])
|
|
|
|
if value == "" {
|
|
continue
|
|
}
|
|
|
|
switch field {
|
|
case "Model":
|
|
currentGPU.Model = value
|
|
case "IRQ":
|
|
if irq, err := strconv.Atoi(value); err == nil {
|
|
currentGPU.IRQ = irq
|
|
}
|
|
case "GPU UUID":
|
|
currentGPU.UUID = value
|
|
case "Video BIOS":
|
|
currentGPU.VideoBIOS = value
|
|
case "Bus Type":
|
|
currentGPU.BusType = value
|
|
case "DMA Size":
|
|
currentGPU.DMASize = value
|
|
case "DMA Mask":
|
|
currentGPU.DMAMask = value
|
|
case "Bus Location":
|
|
// BDF already set from path, but verify consistency
|
|
if currentGPU.BDF != value {
|
|
// Use the value from the information section as it's more explicit
|
|
currentGPU.BDF = value
|
|
}
|
|
case "Device Minor":
|
|
if minor, err := strconv.Atoi(value); err == nil {
|
|
currentGPU.DeviceMinor = minor
|
|
}
|
|
case "GPU Excluded":
|
|
// Store as status if "Yes"
|
|
if strings.ToLower(value) == "yes" {
|
|
currentGPU.Status = "Excluded"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Save last GPU if exists
|
|
if currentGPU != nil {
|
|
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
|
|
}
|
|
|
|
applyGPUSerialNumbers(content, result.Hardware.GPUs)
|
|
|
|
// Create event for GPU summary
|
|
if len(result.Hardware.GPUs) > 0 {
|
|
result.Events = append(result.Events, models.Event{
|
|
Timestamp: time.Now(),
|
|
Source: "NVIDIA Driver",
|
|
EventType: "GPU Detection",
|
|
Description: "NVIDIA GPUs detected",
|
|
Severity: models.SeverityInfo,
|
|
RawData: formatGPUSummary(result.Hardware.GPUs),
|
|
})
|
|
}
|
|
}
|
|
|
|
// parseDriverVersion extracts NVIDIA driver version
|
|
func parseDriverVersion(content string, result *models.AnalysisResult) {
|
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
|
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
|
|
// Look for NVRM version line
|
|
if strings.Contains(line, "NVRM version:") {
|
|
// Extract version info
|
|
parts := strings.Split(line, "NVRM version:")
|
|
if len(parts) > 1 {
|
|
version := strings.TrimSpace(parts[1])
|
|
|
|
result.Events = append(result.Events, models.Event{
|
|
Timestamp: time.Now(),
|
|
Source: "NVIDIA Driver",
|
|
EventType: "Driver Version",
|
|
Description: "NVIDIA driver version detected",
|
|
Severity: models.SeverityInfo,
|
|
RawData: version,
|
|
})
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// formatGPUSummary creates a summary string for GPUs
|
|
func formatGPUSummary(gpus []models.GPU) string {
|
|
if len(gpus) == 0 {
|
|
return ""
|
|
}
|
|
|
|
var summary strings.Builder
|
|
for i, gpu := range gpus {
|
|
if i > 0 {
|
|
summary.WriteString("; ")
|
|
}
|
|
summary.WriteString(gpu.BDF)
|
|
if gpu.Model != "" {
|
|
summary.WriteString(" (")
|
|
summary.WriteString(gpu.Model)
|
|
summary.WriteString(")")
|
|
}
|
|
}
|
|
|
|
return summary.String()
|
|
}
|
|
|
|
func applyGPUSerialNumbers(content string, gpus []models.GPU) {
|
|
if len(gpus) == 0 {
|
|
return
|
|
}
|
|
|
|
serialByBDF := parseGPUSerialsFromNvidiaSMI(content)
|
|
if len(serialByBDF) == 0 {
|
|
serialByBDF = parseGPUSerialsFromSummary(content)
|
|
}
|
|
|
|
if len(serialByBDF) == 0 {
|
|
return
|
|
}
|
|
|
|
for i := range gpus {
|
|
bdf := normalizeGPUAddress(gpus[i].BDF)
|
|
if bdf == "" {
|
|
continue
|
|
}
|
|
if serial, ok := serialByBDF[bdf]; ok && serial != "" {
|
|
gpus[i].SerialNumber = serial
|
|
}
|
|
}
|
|
}
|
|
|
|
func parseGPUSerialsFromNvidiaSMI(content string) map[string]string {
|
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
|
reGPU := regexp.MustCompile(`^GPU\s+([0-9A-F]{8}:[0-9A-F]{2}:[0-9A-F]{2}\.[0-9A-F])$`)
|
|
|
|
serialByBDF := make(map[string]string)
|
|
currentBDF := ""
|
|
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
if matches := reGPU.FindStringSubmatch(line); len(matches) == 2 {
|
|
currentBDF = normalizeGPUAddress(matches[1])
|
|
continue
|
|
}
|
|
|
|
if currentBDF == "" {
|
|
continue
|
|
}
|
|
|
|
if strings.HasPrefix(line, "Serial Number") {
|
|
parts := strings.SplitN(line, ":", 2)
|
|
if len(parts) != 2 {
|
|
continue
|
|
}
|
|
serial := strings.TrimSpace(parts[1])
|
|
if serial != "" && !strings.EqualFold(serial, "N/A") {
|
|
serialByBDF[currentBDF] = serial
|
|
}
|
|
}
|
|
}
|
|
|
|
return serialByBDF
|
|
}
|
|
|
|
func parseGPUSerialsFromSummary(content string) map[string]string {
|
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
|
|
|
serialByBDF := make(map[string]string)
|
|
inGPUDetails := false
|
|
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
trimmed := strings.TrimSpace(line)
|
|
|
|
if strings.HasPrefix(trimmed, "NVIDIA GPU Details") {
|
|
inGPUDetails = true
|
|
}
|
|
if !inGPUDetails {
|
|
continue
|
|
}
|
|
if strings.HasPrefix(trimmed, "NVIDIA Switch Details") {
|
|
break
|
|
}
|
|
|
|
parts := strings.Split(line, "|")
|
|
if len(parts) < 2 {
|
|
continue
|
|
}
|
|
payload := strings.TrimSpace(parts[len(parts)-1])
|
|
if payload == "" {
|
|
continue
|
|
}
|
|
|
|
fields := strings.Split(payload, ",")
|
|
if len(fields) < 6 {
|
|
continue
|
|
}
|
|
|
|
bdf := normalizeGPUAddress(strings.TrimSpace(fields[4]))
|
|
serial := strings.TrimSpace(fields[5])
|
|
if bdf == "" || serial == "" || strings.EqualFold(serial, "N/A") {
|
|
continue
|
|
}
|
|
serialByBDF[bdf] = serial
|
|
}
|
|
|
|
return serialByBDF
|
|
}
|
|
|
|
func normalizeGPUAddress(addr string) string {
|
|
addr = strings.TrimSpace(addr)
|
|
if addr == "" {
|
|
return ""
|
|
}
|
|
parts := strings.Split(addr, ":")
|
|
if len(parts) != 3 {
|
|
return strings.ToLower(addr)
|
|
}
|
|
|
|
domain := parts[0]
|
|
bus := parts[1]
|
|
devFn := parts[2]
|
|
|
|
devFnParts := strings.Split(devFn, ".")
|
|
if len(devFnParts) != 2 {
|
|
return strings.ToLower(addr)
|
|
}
|
|
device := devFnParts[0]
|
|
fn := devFnParts[1]
|
|
|
|
if len(domain) == 8 {
|
|
domain = domain[4:]
|
|
}
|
|
|
|
return strings.ToLower(domain + ":" + bus + ":" + device + "." + fn)
|
|
}
|