Update parser and project changes
This commit is contained in:
137
internal/parser/vendors/nvidia_bug_report/gpu.go
vendored
137
internal/parser/vendors/nvidia_bug_report/gpu.go
vendored
@@ -106,6 +106,8 @@ func parseGPUInfo(content string, result *models.AnalysisResult) {
|
||||
result.Hardware.GPUs = append(result.Hardware.GPUs, *currentGPU)
|
||||
}
|
||||
|
||||
applyGPUSerialNumbers(content, result.Hardware.GPUs)
|
||||
|
||||
// Create event for GPU summary
|
||||
if len(result.Hardware.GPUs) > 0 {
|
||||
result.Events = append(result.Events, models.Event{
|
||||
@@ -168,3 +170,138 @@ func formatGPUSummary(gpus []models.GPU) string {
|
||||
|
||||
return summary.String()
|
||||
}
|
||||
|
||||
func applyGPUSerialNumbers(content string, gpus []models.GPU) {
|
||||
if len(gpus) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
serialByBDF := parseGPUSerialsFromNvidiaSMI(content)
|
||||
if len(serialByBDF) == 0 {
|
||||
serialByBDF = parseGPUSerialsFromSummary(content)
|
||||
}
|
||||
|
||||
if len(serialByBDF) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for i := range gpus {
|
||||
bdf := normalizeGPUAddress(gpus[i].BDF)
|
||||
if bdf == "" {
|
||||
continue
|
||||
}
|
||||
if serial, ok := serialByBDF[bdf]; ok && serial != "" {
|
||||
gpus[i].SerialNumber = serial
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseGPUSerialsFromNvidiaSMI(content string) map[string]string {
|
||||
scanner := bufio.NewScanner(strings.NewReader(content))
|
||||
reGPU := regexp.MustCompile(`^GPU\s+([0-9A-F]{8}:[0-9A-F]{2}:[0-9A-F]{2}\.[0-9A-F])$`)
|
||||
|
||||
serialByBDF := make(map[string]string)
|
||||
currentBDF := ""
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if matches := reGPU.FindStringSubmatch(line); len(matches) == 2 {
|
||||
currentBDF = normalizeGPUAddress(matches[1])
|
||||
continue
|
||||
}
|
||||
|
||||
if currentBDF == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(line, "Serial Number") {
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
serial := strings.TrimSpace(parts[1])
|
||||
if serial != "" && !strings.EqualFold(serial, "N/A") {
|
||||
serialByBDF[currentBDF] = serial
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return serialByBDF
|
||||
}
|
||||
|
||||
func parseGPUSerialsFromSummary(content string) map[string]string {
|
||||
scanner := bufio.NewScanner(strings.NewReader(content))
|
||||
|
||||
serialByBDF := make(map[string]string)
|
||||
inGPUDetails := false
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
trimmed := strings.TrimSpace(line)
|
||||
|
||||
if strings.HasPrefix(trimmed, "NVIDIA GPU Details") {
|
||||
inGPUDetails = true
|
||||
}
|
||||
if !inGPUDetails {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(trimmed, "NVIDIA Switch Details") {
|
||||
break
|
||||
}
|
||||
|
||||
parts := strings.Split(line, "|")
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
payload := strings.TrimSpace(parts[len(parts)-1])
|
||||
if payload == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
fields := strings.Split(payload, ",")
|
||||
if len(fields) < 6 {
|
||||
continue
|
||||
}
|
||||
|
||||
bdf := normalizeGPUAddress(strings.TrimSpace(fields[4]))
|
||||
serial := strings.TrimSpace(fields[5])
|
||||
if bdf == "" || serial == "" || strings.EqualFold(serial, "N/A") {
|
||||
continue
|
||||
}
|
||||
serialByBDF[bdf] = serial
|
||||
}
|
||||
|
||||
return serialByBDF
|
||||
}
|
||||
|
||||
func normalizeGPUAddress(addr string) string {
|
||||
addr = strings.TrimSpace(addr)
|
||||
if addr == "" {
|
||||
return ""
|
||||
}
|
||||
parts := strings.Split(addr, ":")
|
||||
if len(parts) != 3 {
|
||||
return strings.ToLower(addr)
|
||||
}
|
||||
|
||||
domain := parts[0]
|
||||
bus := parts[1]
|
||||
devFn := parts[2]
|
||||
|
||||
devFnParts := strings.Split(devFn, ".")
|
||||
if len(devFnParts) != 2 {
|
||||
return strings.ToLower(addr)
|
||||
}
|
||||
device := devFnParts[0]
|
||||
fn := devFnParts[1]
|
||||
|
||||
if len(domain) == 8 {
|
||||
domain = domain[4:]
|
||||
}
|
||||
|
||||
return strings.ToLower(domain + ":" + bus + ":" + device + "." + fn)
|
||||
}
|
||||
|
||||
54
internal/parser/vendors/nvidia_bug_report/gpu_test.go
vendored
Normal file
54
internal/parser/vendors/nvidia_bug_report/gpu_test.go
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
package nvidia_bug_report
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
func TestApplyGPUSerialNumbers_FromNvidiaSMI(t *testing.T) {
|
||||
content := `
|
||||
/usr/bin/nvidia-smi --query
|
||||
GPU 00000000:18:00.0
|
||||
Serial Number : 1653925025827
|
||||
GPU 00000000:2A:00.0
|
||||
Serial Number : 1653925050608
|
||||
`
|
||||
|
||||
gpus := []models.GPU{
|
||||
{BDF: "0000:18:00.0"},
|
||||
{BDF: "0000:2a:00.0"},
|
||||
}
|
||||
|
||||
applyGPUSerialNumbers(content, gpus)
|
||||
|
||||
if gpus[0].SerialNumber != "1653925025827" {
|
||||
t.Fatalf("unexpected serial for gpu0: %q", gpus[0].SerialNumber)
|
||||
}
|
||||
if gpus[1].SerialNumber != "1653925050608" {
|
||||
t.Fatalf("unexpected serial for gpu1: %q", gpus[1].SerialNumber)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyGPUSerialNumbers_FromSummaryFallback(t *testing.T) {
|
||||
content := `
|
||||
NVIDIA GPU Details | NVIDIA H200, 570.172.08, 143771 MiB, 96.00.D0.00.03, 00000000:18:00.0, 1653925025827
|
||||
| NVIDIA H200, 570.172.08, 143771 MiB, 96.00.D0.00.03, 00000000:2A:00.0, 1653925050608
|
||||
NVIDIA Switch Details | No devices matching query 'Quantum'
|
||||
`
|
||||
|
||||
gpus := []models.GPU{
|
||||
{BDF: "0000:18:00.0"},
|
||||
{BDF: "0000:2a:00.0"},
|
||||
}
|
||||
|
||||
applyGPUSerialNumbers(content, gpus)
|
||||
|
||||
if gpus[0].SerialNumber != "1653925025827" {
|
||||
t.Fatalf("unexpected serial for gpu0: %q", gpus[0].SerialNumber)
|
||||
}
|
||||
if gpus[1].SerialNumber != "1653925050608" {
|
||||
t.Fatalf("unexpected serial for gpu1: %q", gpus[1].SerialNumber)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user