nvidia: improve component mapping, firmware, statuses and check times
This commit is contained in:
44
internal/parser/vendors/nvidia/parser.go
vendored
44
internal/parser/vendors/nvidia/parser.go
vendored
@@ -14,7 +14,7 @@ import (
|
||||
|
||||
// parserVersion - version of this parser module
|
||||
// IMPORTANT: Increment this version when making changes to parser logic!
|
||||
const parserVersion = "1.2.4"
|
||||
const parserVersion = "1.3.0"
|
||||
|
||||
func init() {
|
||||
parser.Register(&Parser{})
|
||||
@@ -70,7 +70,7 @@ func (p *Parser) Detect(files []parser.ExtractedFile) int {
|
||||
if strings.HasSuffix(path, "output.log") {
|
||||
// Check if it contains dmidecode output
|
||||
if strings.Contains(string(f.Content), "dmidecode") ||
|
||||
strings.Contains(string(f.Content), "System Information") {
|
||||
strings.Contains(string(f.Content), "System Information") {
|
||||
confidence += 10
|
||||
}
|
||||
}
|
||||
@@ -106,6 +106,8 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
||||
GPUs: make([]models.GPU, 0),
|
||||
}
|
||||
gpuStatuses := make(map[string]string)
|
||||
gpuFailureDetails := make(map[string]string)
|
||||
nvswitchStatuses := make(map[string]string)
|
||||
|
||||
// Parse output.log first (contains dmidecode system info)
|
||||
// Find the output.log file that contains dmidecode output
|
||||
@@ -134,9 +136,26 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
||||
}
|
||||
}
|
||||
|
||||
// Parse inventory/inventory.log to enrich PCI BDF mapping for components.
|
||||
inventoryInfoLog := findInventoryInfoLog(files)
|
||||
if inventoryInfoLog != nil {
|
||||
if err := ApplyInventoryPCIIDs(inventoryInfoLog.Content, result); err != nil {
|
||||
_ = err
|
||||
}
|
||||
}
|
||||
|
||||
// Enhance GPU model names using SKU mapping from testspec + inventory summary.
|
||||
ApplyGPUModelsFromSKU(files, result)
|
||||
|
||||
// Parse inventory/nvflash_verbose.log and apply firmware versions by BDF + IDs.
|
||||
// This runs after GPU model/part-number enrichment so firmware tab uses final model labels.
|
||||
nvflashVerbose := findNVFlashVerboseLog(files)
|
||||
if nvflashVerbose != nil {
|
||||
if err := ParseNVFlashVerboseLog(nvflashVerbose.Content, result); err != nil {
|
||||
_ = err
|
||||
}
|
||||
}
|
||||
|
||||
// Parse summary.json (test results summary)
|
||||
if f := parser.FindFileByName(files, "summary.json"); f != nil {
|
||||
events := ParseSummaryJSON(f.Content)
|
||||
@@ -144,6 +163,14 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
||||
for componentID, status := range CollectGPUStatusesFromSummaryJSON(f.Content) {
|
||||
gpuStatuses[componentID] = mergeGPUStatus(gpuStatuses[componentID], status)
|
||||
}
|
||||
for slot, status := range CollectNVSwitchStatusesFromSummaryJSON(f.Content) {
|
||||
nvswitchStatuses[slot] = mergeGPUStatus(nvswitchStatuses[slot], status)
|
||||
}
|
||||
for componentID, detail := range CollectGPUFailureDetailsFromSummaryJSON(f.Content) {
|
||||
if _, exists := gpuFailureDetails[componentID]; !exists && strings.TrimSpace(detail) != "" {
|
||||
gpuFailureDetails[componentID] = strings.TrimSpace(detail)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse summary.csv (alternative format)
|
||||
@@ -153,10 +180,21 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
||||
for componentID, status := range CollectGPUStatusesFromSummaryCSV(f.Content) {
|
||||
gpuStatuses[componentID] = mergeGPUStatus(gpuStatuses[componentID], status)
|
||||
}
|
||||
for slot, status := range CollectNVSwitchStatusesFromSummaryCSV(f.Content) {
|
||||
nvswitchStatuses[slot] = mergeGPUStatus(nvswitchStatuses[slot], status)
|
||||
}
|
||||
for componentID, detail := range CollectGPUFailureDetailsFromSummaryCSV(f.Content) {
|
||||
if _, exists := gpuFailureDetails[componentID]; !exists && strings.TrimSpace(detail) != "" {
|
||||
gpuFailureDetails[componentID] = strings.TrimSpace(detail)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply per-GPU PASS/FAIL status derived from summary files.
|
||||
ApplyGPUStatuses(result, gpuStatuses)
|
||||
ApplyGPUFailureDetails(result, gpuFailureDetails)
|
||||
ApplyNVSwitchStatuses(result, nvswitchStatuses)
|
||||
ApplyGPUAndNVSwitchCheckTimes(result, CollectGPUAndNVSwitchCheckTimes(files))
|
||||
|
||||
// Parse GPU field diagnostics logs
|
||||
gpuFieldiagFiles := parser.FindFileByPattern(files, "gpu_fieldiag/", ".log")
|
||||
@@ -180,7 +218,7 @@ func findDmidecodeOutputLog(files []parser.ExtractedFile) *parser.ExtractedFile
|
||||
// Check if it contains dmidecode output
|
||||
content := string(f.Content)
|
||||
if strings.Contains(content, "dmidecode") &&
|
||||
strings.Contains(content, "System Information") {
|
||||
strings.Contains(content, "System Information") {
|
||||
return &f
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user