// Package nvidia provides parser for NVIDIA Field Diagnostics archives // Tested with: HGX Field Diag (works with various server vendors) // // IMPORTANT: Increment parserVersion when modifying parser logic! // This helps track which version was used to parse specific logs. package nvidia import ( "strings" "git.mchus.pro/mchus/logpile/internal/models" "git.mchus.pro/mchus/logpile/internal/parser" ) // parserVersion - version of this parser module // IMPORTANT: Increment this version when making changes to parser logic! const parserVersion = "1.3.0" func init() { parser.Register(&Parser{}) } // Parser implements VendorParser for NVIDIA Field Diagnostics type Parser struct{} // Name returns human-readable parser name func (p *Parser) Name() string { return "NVIDIA Field Diagnostics Parser" } // Vendor returns vendor identifier func (p *Parser) Vendor() string { return "nvidia" } // Version returns parser version // IMPORTANT: Update parserVersion constant when modifying parser logic! func (p *Parser) Version() string { return parserVersion } // Detect checks if archive matches NVIDIA Field Diagnostics format // Returns confidence 0-100 func (p *Parser) Detect(files []parser.ExtractedFile) int { confidence := 0 for _, f := range files { path := strings.ToLower(f.Path) // Strong indicators for NVIDIA Field Diagnostics format if strings.HasSuffix(path, "unified_summary.json") { // Check if it's really NVIDIA Field Diag format if containsNvidiaFieldDiagMarkers(f.Content) { confidence += 40 } } if strings.HasSuffix(path, "summary.json") && !strings.Contains(path, "unified_") { confidence += 20 } if strings.HasSuffix(path, "summary.csv") { confidence += 15 } if strings.Contains(path, "gpu_fieldiag/") { confidence += 15 } if strings.HasSuffix(path, "output.log") { // Check if it contains dmidecode output if strings.Contains(string(f.Content), "dmidecode") || strings.Contains(string(f.Content), "System Information") { confidence += 10 } } // Cap at 100 if confidence >= 100 { return 100 } } return confidence } // containsNvidiaFieldDiagMarkers checks if content has NVIDIA Field Diag markers func containsNvidiaFieldDiagMarkers(content []byte) bool { s := string(content) // Check for typical NVIDIA Field Diagnostics structure return strings.Contains(s, "runInfo") && strings.Contains(s, "diagVersion") && strings.Contains(s, "HGX Field Diag") } // Parse parses NVIDIA Field Diagnostics archive func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, error) { result := &models.AnalysisResult{ Events: make([]models.Event, 0), FRU: make([]models.FRUInfo, 0), Sensors: make([]models.SensorReading, 0), } // Initialize hardware config result.Hardware = &models.HardwareConfig{ GPUs: make([]models.GPU, 0), } gpuStatuses := make(map[string]string) gpuFailureDetails := make(map[string]string) nvswitchStatuses := make(map[string]string) // Parse output.log first (contains dmidecode system info) // Find the output.log file that contains dmidecode output outputLogFile := findDmidecodeOutputLog(files) if outputLogFile != nil { if err := ParseOutputLog(outputLogFile.Content, result); err != nil { // Log error but continue parsing other files _ = err // Ignore error for now } } // Parse unified_summary.json (contains detailed component info) if f := parser.FindFileByName(files, "unified_summary.json"); f != nil { if err := ParseUnifiedSummary(f.Content, result); err != nil { // Log error but continue parsing other files _ = err // Ignore error for now } } // Parse inventory/output.log (contains GPU serial numbers from lspci) inventoryLogFile := findInventoryOutputLog(files) if inventoryLogFile != nil { if err := ParseInventoryLog(inventoryLogFile.Content, result); err != nil { // Log error but continue parsing other files _ = err // Ignore error for now } } // Parse inventory/inventory.log to enrich PCI BDF mapping for components. inventoryInfoLog := findInventoryInfoLog(files) if inventoryInfoLog != nil { if err := ApplyInventoryPCIIDs(inventoryInfoLog.Content, result); err != nil { _ = err } } // Enhance GPU model names using SKU mapping from testspec + inventory summary. ApplyGPUModelsFromSKU(files, result) // Parse inventory/nvflash_verbose.log and apply firmware versions by BDF + IDs. // This runs after GPU model/part-number enrichment so firmware tab uses final model labels. nvflashVerbose := findNVFlashVerboseLog(files) if nvflashVerbose != nil { if err := ParseNVFlashVerboseLog(nvflashVerbose.Content, result); err != nil { _ = err } } // Parse summary.json (test results summary) if f := parser.FindFileByName(files, "summary.json"); f != nil { events := ParseSummaryJSON(f.Content) result.Events = append(result.Events, events...) for componentID, status := range CollectGPUStatusesFromSummaryJSON(f.Content) { gpuStatuses[componentID] = mergeGPUStatus(gpuStatuses[componentID], status) } for slot, status := range CollectNVSwitchStatusesFromSummaryJSON(f.Content) { nvswitchStatuses[slot] = mergeGPUStatus(nvswitchStatuses[slot], status) } for componentID, detail := range CollectGPUFailureDetailsFromSummaryJSON(f.Content) { if _, exists := gpuFailureDetails[componentID]; !exists && strings.TrimSpace(detail) != "" { gpuFailureDetails[componentID] = strings.TrimSpace(detail) } } } // Parse summary.csv (alternative format) if f := parser.FindFileByName(files, "summary.csv"); f != nil { csvEvents := ParseSummaryCSV(f.Content) result.Events = append(result.Events, csvEvents...) for componentID, status := range CollectGPUStatusesFromSummaryCSV(f.Content) { gpuStatuses[componentID] = mergeGPUStatus(gpuStatuses[componentID], status) } for slot, status := range CollectNVSwitchStatusesFromSummaryCSV(f.Content) { nvswitchStatuses[slot] = mergeGPUStatus(nvswitchStatuses[slot], status) } for componentID, detail := range CollectGPUFailureDetailsFromSummaryCSV(f.Content) { if _, exists := gpuFailureDetails[componentID]; !exists && strings.TrimSpace(detail) != "" { gpuFailureDetails[componentID] = strings.TrimSpace(detail) } } } // Apply per-GPU PASS/FAIL status derived from summary files. ApplyGPUStatuses(result, gpuStatuses) ApplyGPUFailureDetails(result, gpuFailureDetails) ApplyNVSwitchStatuses(result, nvswitchStatuses) ApplyGPUAndNVSwitchCheckTimes(result, CollectGPUAndNVSwitchCheckTimes(files)) // Parse GPU field diagnostics logs gpuFieldiagFiles := parser.FindFileByPattern(files, "gpu_fieldiag/", ".log") for _, f := range gpuFieldiagFiles { // Parse individual GPU diagnostic logs if needed // For now, we focus on summary files _ = f } return result, nil } // findDmidecodeOutputLog finds the output.log file that contains dmidecode output func findDmidecodeOutputLog(files []parser.ExtractedFile) *parser.ExtractedFile { for _, f := range files { // Look for output.log files if !strings.HasSuffix(strings.ToLower(f.Path), "output.log") { continue } // Check if it contains dmidecode output content := string(f.Content) if strings.Contains(content, "dmidecode") && strings.Contains(content, "System Information") { return &f } } return nil }