diff --git a/internal/parser/vendors/nvidia/inventory_log.go b/internal/parser/vendors/nvidia/inventory_log.go new file mode 100644 index 0000000..6dc06ad --- /dev/null +++ b/internal/parser/vendors/nvidia/inventory_log.go @@ -0,0 +1,143 @@ +package nvidia + +import ( + "bufio" + "regexp" + "strings" + + "git.mchus.pro/mchus/logpile/internal/models" + "git.mchus.pro/mchus/logpile/internal/parser" +) + +var ( + // Regex to extract GPU serial numbers from lspci output + // Example: " Capabilities: [2f0 v1] Device Serial Number 14-17-dc-65-77-2d-b0-48" + gpuSerialRegex = regexp.MustCompile(`Device Serial Number\s+([\da-fA-F-]+)`) + + // Regex to extract PCI BDF from lspci header + // Example: "2a:00.0 3D controller: NVIDIA Corporation Device 2335 (rev a1)" + // Note: lspci format is bus:device.function (e.g., "2a:00.0") + pciBDFRegex = regexp.MustCompile(`^([0-9a-fA-F]{2,4}:[0-9a-fA-F]{2}\.[0-9])\s+3D controller.*NVIDIA`) + + // Regex to extract devname mappings from fieldiag command line + // Example: "devname=0000:ba:00.0,SXM5_SN_1653925027099" + devnameRegex = regexp.MustCompile(`devname=([\da-fA-F:\.]+),(\w+)`) +) + +// ParseInventoryLog parses inventory/output.log to extract GPU serial numbers +// from lspci output and map them to slots +func ParseInventoryLog(content []byte, result *models.AnalysisResult) error { + if result.Hardware == nil || len(result.Hardware.GPUs) == 0 { + // No GPUs to update + return nil + } + + scanner := bufio.NewScanner(strings.NewReader(string(content))) + + // First pass: build mapping of PCI BDF -> Slot name from fieldiag command line + pciToSlot := make(map[string]string) + for scanner.Scan() { + line := scanner.Text() + // Look for fieldiag command with devname parameters + if strings.Contains(line, "devname=") && strings.Contains(line, "fieldiag") { + matches := devnameRegex.FindAllStringSubmatch(line, -1) + for _, match := range matches { + if len(match) == 3 { + pciBDF := match[1] + slotName := match[2] + // Extract slot number from name like "SXM5_SN_1653925027099" + // We want to map to slot like "GPUSXM5" + if strings.HasPrefix(slotName, "SXM") { + parts := strings.Split(slotName, "_") + if len(parts) >= 1 { + // Convert "SXM5" to "GPUSXM5" + slot := "GPU" + parts[0] + pciToSlot[pciBDF] = slot + } + } + } + } + } + } + + // Second pass: extract GPU serial numbers from lspci output + scanner = bufio.NewScanner(strings.NewReader(string(content))) + var currentPCIBDF string + var currentSlot string + + for scanner.Scan() { + line := scanner.Text() + + // Check if this is a new GPU device header + if match := pciBDFRegex.FindStringSubmatch(line); len(match) > 1 { + currentPCIBDF = match[1] + // Normalize BDF format - lspci uses short format (bus:device.function) + // but fieldiag uses full format (domain:bus:device.function) + // Convert "2a:00.0" to "0000:2a:00.0" + normalizedBDF := currentPCIBDF + if len(strings.Split(currentPCIBDF, ":")) == 2 { + // Short format without domain, add 0000: + normalizedBDF = "0000:" + currentPCIBDF + } + + // Map to slot name if we have it + if slot, ok := pciToSlot[normalizedBDF]; ok { + currentSlot = slot + } else if slot, ok := pciToSlot[currentPCIBDF]; ok { + currentSlot = slot + } else { + currentSlot = "" + } + continue + } + + // Look for Device Serial Number in capabilities + if match := gpuSerialRegex.FindStringSubmatch(line); len(match) > 1 && currentSlot != "" { + serialNumber := match[1] + // Format: 14-17-dc-65-77-2d-b0-48 + // Convert to more readable format: 48:b0:2d:77:65:dc:17:14 (reversed) + serialFormatted := formatGPUSerial(serialNumber) + + // Find the GPU in our results and update its serial number + for i := range result.Hardware.GPUs { + if result.Hardware.GPUs[i].Slot == currentSlot { + result.Hardware.GPUs[i].SerialNumber = serialFormatted + break + } + } + } + } + + return scanner.Err() +} + +// formatGPUSerial formats GPU serial number from PCIe format to human-readable +// Input: "14-17-dc-65-77-2d-b0-48" (little-endian from PCIe) +// Output: "48:b0:2d:77:65:dc:17:14" (reversed to match GPU label) +func formatGPUSerial(serial string) string { + parts := strings.Split(serial, "-") + if len(parts) != 8 { + return serial // Return as-is if unexpected format + } + + // Reverse the bytes (PCIe reports in little-endian) + reversed := make([]string, len(parts)) + for i := range parts { + reversed[len(parts)-1-i] = strings.ToUpper(parts[i]) + } + + return strings.Join(reversed, ":") +} + +// findInventoryOutputLog finds the inventory/output.log file +func findInventoryOutputLog(files []parser.ExtractedFile) *parser.ExtractedFile { + for _, f := range files { + // Look for inventory/output.log + path := strings.ToLower(f.Path) + if strings.Contains(path, "inventory/output.log") || + strings.Contains(path, "inventory\\output.log") { + return &f + } + } + return nil +} diff --git a/internal/parser/vendors/nvidia/inventory_log_test.go b/internal/parser/vendors/nvidia/inventory_log_test.go new file mode 100644 index 0000000..87a0126 --- /dev/null +++ b/internal/parser/vendors/nvidia/inventory_log_test.go @@ -0,0 +1,87 @@ +package nvidia + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "git.mchus.pro/mchus/logpile/internal/parser" +) + +func TestParseInventoryLog(t *testing.T) { + // Test with the real archive + archivePath := filepath.Join("../../../../example", "A514359X5A09844_logs-20260115-151707.tar") + + // Check if file exists + if _, err := os.Stat(archivePath); os.IsNotExist(err) { + t.Skip("Test archive not found, skipping test") + } + + // Extract files from archive + files, err := parser.ExtractArchive(archivePath) + if err != nil { + t.Fatalf("Failed to extract archive: %v", err) + } + + // Find inventory/output.log + var inventoryLog *parser.ExtractedFile + for _, f := range files { + if strings.Contains(f.Path, "inventory/output.log") { + inventoryLog = &f + break + } + } + + if inventoryLog == nil { + t.Fatal("inventory/output.log not found") + } + + content := string(inventoryLog.Content) + + // Test devname regex + t.Log("Testing devname extraction:") + lines := strings.Split(content, "\n") + for i, line := range lines { + if strings.Contains(line, "devname=") && strings.Contains(line, "fieldiag") { + t.Logf("Line %d: Found fieldiag command", i) + matches := devnameRegex.FindAllStringSubmatch(line, -1) + t.Logf(" Found %d devname matches", len(matches)) + for _, match := range matches { + if len(match) == 3 { + t.Logf(" PCI: %s -> Slot: %s", match[1], match[2]) + } + } + break + } + } + + // Test lspci regex + t.Log("\nTesting lspci BDF extraction:") + serialCount := 0 + bdfCount := 0 + for i, line := range lines { + // Check for lines that look like lspci headers + if strings.Contains(line, "3D controller") && strings.Contains(line, "NVIDIA") { + t.Logf("Line %d: Potential lspci line: %q (starts with: %q)", i, line[:min(80, len(line))], line[:min(10, len(line))]) + if match := pciBDFRegex.FindStringSubmatch(line); len(match) > 1 { + bdfCount++ + t.Logf(" -> Matched BDF: %s", match[1]) + } else { + t.Logf(" -> NO MATCH") + } + } + if match := gpuSerialRegex.FindStringSubmatch(line); len(match) > 1 { + serialCount++ + } + } + t.Logf("\nTotal BDFs found: %d", bdfCount) + t.Logf("Total serials found: %d", serialCount) +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/internal/parser/vendors/nvidia/parser.go b/internal/parser/vendors/nvidia/parser.go index 2fae7a5..82c8db7 100644 --- a/internal/parser/vendors/nvidia/parser.go +++ b/internal/parser/vendors/nvidia/parser.go @@ -14,7 +14,7 @@ import ( // parserVersion - version of this parser module // IMPORTANT: Increment this version when making changes to parser logic! -const parserVersion = "1.1.0" +const parserVersion = "1.2.0" func init() { parser.Register(&Parser{}) @@ -124,6 +124,15 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er } } + // Parse inventory/output.log (contains GPU serial numbers from lspci) + inventoryLogFile := findInventoryOutputLog(files) + if inventoryLogFile != nil { + if err := ParseInventoryLog(inventoryLogFile.Content, result); err != nil { + // Log error but continue parsing other files + _ = err // Ignore error for now + } + } + // Parse summary.json (test results summary) if f := parser.FindFileByName(files, "summary.json"); f != nil { events := ParseSummaryJSON(f.Content) diff --git a/internal/parser/vendors/nvidia/parser_test.go b/internal/parser/vendors/nvidia/parser_test.go new file mode 100644 index 0000000..0186d8c --- /dev/null +++ b/internal/parser/vendors/nvidia/parser_test.go @@ -0,0 +1,145 @@ +package nvidia + +import ( + "os" + "path/filepath" + "testing" + + "git.mchus.pro/mchus/logpile/internal/parser" +) + +func TestNVIDIAParser_RealArchive(t *testing.T) { + // Test with the real archive that was reported as problematic + archivePath := filepath.Join("../../../../example", "A514359X5A09844_logs-20260115-151707.tar") + + // Check if file exists + if _, err := os.Stat(archivePath); os.IsNotExist(err) { + t.Skip("Test archive not found, skipping test") + } + + // Extract files from archive + files, err := parser.ExtractArchive(archivePath) + if err != nil { + t.Fatalf("Failed to extract archive: %v", err) + } + + // Check if inventory/output.log exists + hasInventoryLog := false + for _, f := range files { + if filepath.Base(f.Path) == "output.log" { + t.Logf("Found file: %s", f.Path) + } + if f.Path == "./inventory/output.log" || f.Path == "inventory/output.log" { + hasInventoryLog = true + t.Logf("Found inventory/output.log with %d bytes", len(f.Content)) + } + } + if !hasInventoryLog { + t.Error("inventory/output.log not found in extracted files") + } + + // Create parser and parse + p := &Parser{} + result, err := p.Parse(files) + if err != nil { + t.Fatalf("Failed to parse archive: %v", err) + } + + // Verify basic system info + if result.Hardware.BoardInfo.Manufacturer == "" { + t.Error("Expected Manufacturer to be set") + } + if result.Hardware.BoardInfo.ProductName == "" { + t.Error("Expected ProductName to be set") + } + if result.Hardware.BoardInfo.SerialNumber == "" { + t.Error("Expected SerialNumber to be set") + } + + t.Logf("System Info:") + t.Logf(" Manufacturer: %s", result.Hardware.BoardInfo.Manufacturer) + t.Logf(" Product: %s", result.Hardware.BoardInfo.ProductName) + t.Logf(" Serial: %s", result.Hardware.BoardInfo.SerialNumber) + + // Verify GPUs were found + if len(result.Hardware.GPUs) == 0 { + t.Error("Expected to find GPUs") + } + + t.Logf("\nFound %d GPUs:", len(result.Hardware.GPUs)) + + gpusWithSerials := 0 + for _, gpu := range result.Hardware.GPUs { + t.Logf(" %s: %s (Firmware: %s, Serial: %s, BDF: %s)", + gpu.Slot, gpu.Model, gpu.Firmware, gpu.SerialNumber, gpu.BDF) + + if gpu.SerialNumber != "" { + gpusWithSerials++ + } + } + + // Verify that GPU serial numbers were extracted + if gpusWithSerials == 0 { + t.Error("Expected at least some GPUs to have serial numbers") + } + + t.Logf("\nGPUs with serial numbers: %d/%d", gpusWithSerials, len(result.Hardware.GPUs)) + + // Check events for SXM2 failures + t.Logf("\nTotal events: %d", len(result.Events)) + + // Look for the specific serial or SXM2 + sxm2Events := 0 + for _, event := range result.Events { + desc := event.Description + " " + event.RawData + " " + event.EventType + if contains(desc, "SXM2") || contains(desc, "1653925025827") { + t.Logf(" SXM2 Event: [%s] %s (Severity: %s)", event.EventType, event.Description, event.Severity) + sxm2Events++ + } + } + + if sxm2Events == 0 { + t.Error("Expected to find events for SXM2 (faulty GPU 1653925025827)") + } + t.Logf("\nSXM2 failure events: %d", sxm2Events) +} + +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && + (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || + findSubstring(s, substr))) +} + +func findSubstring(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + +func TestFormatGPUSerial(t *testing.T) { + tests := []struct { + input string + expected string + }{ + { + input: "14-17-dc-65-77-2d-b0-48", + expected: "48:B0:2D:77:65:DC:17:14", + }, + { + input: "f2-fd-85-e0-2f-2d-b0-48", + expected: "48:B0:2D:2F:E0:85:FD:F2", + }, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := formatGPUSerial(tt.input) + if result != tt.expected { + t.Errorf("formatGPUSerial(%s) = %s, want %s", tt.input, result, tt.expected) + } + }) + } +} diff --git a/internal/server/handlers.go b/internal/server/handlers.go index c5d1def..e5b9fda 100644 --- a/internal/server/handlers.go +++ b/internal/server/handlers.go @@ -391,6 +391,24 @@ func (s *Server) handleGetSerials(w http.ResponseWriter, r *http.Request) { }) } + // GPUs + for _, gpu := range result.Hardware.GPUs { + if gpu.SerialNumber == "" { + continue + } + model := gpu.Model + if model == "" { + model = "GPU" + } + serials = append(serials, SerialEntry{ + Component: model, + Location: gpu.Slot, + SerialNumber: gpu.SerialNumber, + Manufacturer: gpu.Manufacturer, + Category: "GPU", + }) + } + // PCIe devices for _, pcie := range result.Hardware.PCIeDevices { if pcie.SerialNumber == "" { diff --git a/internal/server/handlers_gpu_test.go b/internal/server/handlers_gpu_test.go new file mode 100644 index 0000000..2bbc70d --- /dev/null +++ b/internal/server/handlers_gpu_test.go @@ -0,0 +1,132 @@ +package server + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "git.mchus.pro/mchus/logpile/internal/models" +) + +func TestHandleGetSerials_WithGPUs(t *testing.T) { + // Create test server with GPU data + srv := &Server{} + + testResult := &models.AnalysisResult{ + Hardware: &models.HardwareConfig{ + GPUs: []models.GPU{ + { + Slot: "GPUSXM1", + Model: "NVIDIA Device 2335", + Manufacturer: "NVIDIA Corporation", + SerialNumber: "48:B0:2D:BB:8E:51:9E:E5", + Firmware: "96.00.D0.00.03", + BDF: "0000:3a:00.0", + }, + { + Slot: "GPUSXM2", + Model: "NVIDIA Device 2335", + Manufacturer: "NVIDIA Corporation", + SerialNumber: "48:B0:2D:EE:DA:27:CF:78", + Firmware: "96.00.D0.00.03", + BDF: "0000:18:00.0", + }, + }, + }, + } + + srv.SetResult(testResult) + + // Create request + req := httptest.NewRequest("GET", "/api/serials", nil) + w := httptest.NewRecorder() + + // Call handler + srv.handleGetSerials(w, req) + + // Check response + if w.Code != http.StatusOK { + t.Errorf("Expected status 200, got %d", w.Code) + } + + // Parse response + var serials []struct { + Component string `json:"component"` + Location string `json:"location,omitempty"` + SerialNumber string `json:"serial_number"` + Manufacturer string `json:"manufacturer,omitempty"` + Category string `json:"category"` + } + + if err := json.NewDecoder(w.Body).Decode(&serials); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + // Check that we have GPU entries + gpuCount := 0 + for _, s := range serials { + if s.Category == "GPU" { + gpuCount++ + t.Logf("Found GPU: %s (%s) S/N: %s", s.Component, s.Location, s.SerialNumber) + + // Verify fields are set + if s.SerialNumber == "" { + t.Errorf("GPU serial number is empty") + } + if s.Location == "" { + t.Errorf("GPU location is empty") + } + if s.Manufacturer == "" { + t.Errorf("GPU manufacturer is empty") + } + } + } + + if gpuCount != 2 { + t.Errorf("Expected 2 GPUs in serials, got %d", gpuCount) + } +} + +func TestHandleGetSerials_WithoutGPUSerials(t *testing.T) { + // Create test server with GPUs but no serial numbers + srv := &Server{} + + testResult := &models.AnalysisResult{ + Hardware: &models.HardwareConfig{ + GPUs: []models.GPU{ + { + Slot: "GPU0", + Model: "Some GPU", + Manufacturer: "Vendor", + SerialNumber: "", // No serial number + }, + }, + }, + } + + srv.SetResult(testResult) + + // Create request + req := httptest.NewRequest("GET", "/api/serials", nil) + w := httptest.NewRecorder() + + // Call handler + srv.handleGetSerials(w, req) + + // Parse response + var serials []struct { + Category string `json:"category"` + } + + if err := json.NewDecoder(w.Body).Decode(&serials); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + // Check that GPUs without serial numbers are not included + for _, s := range serials { + if s.Category == "GPU" { + t.Error("GPU without serial number should not be included in serials list") + } + } +} diff --git a/web/static/js/app.js b/web/static/js/app.js index 3370451..744ad62 100644 --- a/web/static/js/app.js +++ b/web/static/js/app.js @@ -1079,6 +1079,7 @@ function renderSerials(serials) { 'CPU': 'Процессор', 'Memory': 'Память', 'Storage': 'Накопитель', + 'GPU': 'Видеокарта', 'PCIe': 'PCIe', 'Network': 'Сеть', 'PSU': 'БП',