package nvidia import ( "regexp" "strconv" "strings" "time" "git.mchus.pro/mchus/logpile/internal/models" "git.mchus.pro/mchus/logpile/internal/parser" ) var verboseRunTestingLineRegex = regexp.MustCompile(`^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+\s+-\s+Testing\s+([a-zA-Z0-9_]+)\s*$`) var runLogStartTimeRegex = regexp.MustCompile(`^Start time\s+([A-Za-z]{3}, \d{2} [A-Za-z]{3} \d{4} \d{2}:\d{2}:\d{2})\s*$`) var runLogTestDurationRegex = regexp.MustCompile(`^Testing\s+([a-zA-Z0-9_]+)\s+\S+\s+\[\s*([0-9]+):([0-9]{2})s\s*\]\s*$`) var modsStartLineRegex = regexp.MustCompile(`(?m)^MODS start:\s+([A-Za-z]{3}\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+\d{4})\s*$`) var gpuFieldiagOutputPathRegex = regexp.MustCompile(`(?i)gpu_fieldiag[\\/]+sxm(\d+)_sn_([^\\/]+)[\\/]+output\.log$`) var nvswitchDevnameRegex = regexp.MustCompile(`devname=[^,\s]+,(NVSWITCH\d+)`) type componentCheckTimes struct { GPUDefault time.Time NVSwitchDefault time.Time GPUBySerial map[string]time.Time // key: GPU serial GPUBySlot map[string]time.Time // key: GPUSXM NVSwitchBySlot map[string]time.Time // key: NVSWITCH } // CollectGPUAndNVSwitchCheckTimes extracts GPU/NVSwitch check timestamps from NVIDIA logs. // Priority: // 1) verbose_run.log "Testing " timestamps // 2) run.log start time + cumulative durations func CollectGPUAndNVSwitchCheckTimes(files []parser.ExtractedFile) componentCheckTimes { gpuBySerial := make(map[string]time.Time) gpuBySlot := make(map[string]time.Time) nvsBySlot := make(map[string]time.Time) for _, f := range files { path := strings.TrimSpace(f.Path) pathLower := strings.ToLower(path) // Per-GPU timestamp from gpu_fieldiag//output.log if strings.HasSuffix(pathLower, "output.log") && strings.Contains(pathLower, "gpu_fieldiag/") { ts := parseModsStartTime(f.Content) if ts.IsZero() { continue } matches := gpuFieldiagOutputPathRegex.FindStringSubmatch(path) if len(matches) == 3 { slot := "GPUSXM" + strings.TrimSpace(matches[1]) serial := strings.TrimSpace(matches[2]) if slot != "" { gpuBySlot[slot] = ts } if serial != "" { gpuBySerial[serial] = ts } } } // Per-NVSwitch timestamp and slot list from nvswitch/output.log if strings.HasSuffix(pathLower, "nvswitch/output.log") || strings.HasSuffix(pathLower, "nvswitch\\output.log") { ts := parseModsStartTime(f.Content) if ts.IsZero() { continue } for _, slot := range parseNVSwitchSlotsFromOutput(f.Content) { nvsBySlot[slot] = ts } } } testStarts := make(map[string]time.Time) if f := parser.FindFileByName(files, "verbose_run.log"); f != nil { for testName, ts := range parseVerboseRunTestStartTimes(f.Content) { testStarts[strings.ToLower(strings.TrimSpace(testName))] = ts } } if len(testStarts) == 0 { if f := parser.FindFileByName(files, "run.log"); f != nil { for testName, ts := range parseRunLogTestStartTimes(f.Content) { testStarts[strings.ToLower(strings.TrimSpace(testName))] = ts } } } return componentCheckTimes{ GPUDefault: pickFirstTestTime(testStarts, "gpu_fieldiag", "gpumem", "gpustress", "pcie", "inventory"), NVSwitchDefault: pickFirstTestTime(testStarts, "nvswitch", "inventory"), GPUBySerial: gpuBySerial, GPUBySlot: gpuBySlot, NVSwitchBySlot: nvsBySlot, } } func pickFirstTestTime(testStarts map[string]time.Time, names ...string) time.Time { for _, name := range names { if ts := testStarts[strings.ToLower(strings.TrimSpace(name))]; !ts.IsZero() { return ts } } return time.Time{} } func parseVerboseRunTestStartTimes(content []byte) map[string]time.Time { result := make(map[string]time.Time) lines := strings.Split(string(content), "\n") for _, line := range lines { matches := verboseRunTestingLineRegex.FindStringSubmatch(strings.TrimSpace(line)) if len(matches) != 3 { continue } ts, err := parser.ParseInDefaultArchiveLocation("2006-01-02 15:04:05", strings.TrimSpace(matches[1])) if err != nil { continue } testName := strings.ToLower(strings.TrimSpace(matches[2])) if testName == "" { continue } if _, exists := result[testName]; !exists { result[testName] = ts } } return result } func parseRunLogTestStartTimes(content []byte) map[string]time.Time { lines := strings.Split(string(content), "\n") start := time.Time{} for _, line := range lines { matches := runLogStartTimeRegex.FindStringSubmatch(strings.TrimSpace(line)) if len(matches) != 2 { continue } parsed, err := parser.ParseInDefaultArchiveLocation("Mon, 02 Jan 2006 15:04:05", strings.TrimSpace(matches[1])) if err != nil { continue } start = parsed break } if start.IsZero() { return nil } result := make(map[string]time.Time) cursor := start for _, line := range lines { matches := runLogTestDurationRegex.FindStringSubmatch(strings.TrimSpace(line)) if len(matches) != 4 { continue } testName := strings.ToLower(strings.TrimSpace(matches[1])) minutes, errMin := strconv.Atoi(strings.TrimSpace(matches[2])) seconds, errSec := strconv.Atoi(strings.TrimSpace(matches[3])) if errMin != nil || errSec != nil { continue } if _, exists := result[testName]; !exists { result[testName] = cursor } cursor = cursor.Add(time.Duration(minutes)*time.Minute + time.Duration(seconds)*time.Second) } return result } func parseModsStartTime(content []byte) time.Time { matches := modsStartLineRegex.FindSubmatch(content) if len(matches) != 2 { return time.Time{} } tsRaw := strings.TrimSpace(string(matches[1])) if tsRaw == "" { return time.Time{} } ts, err := parser.ParseInDefaultArchiveLocation("Mon Jan 2 15:04:05 2006", tsRaw) if err != nil { return time.Time{} } return ts } func parseNVSwitchSlotsFromOutput(content []byte) []string { matches := nvswitchDevnameRegex.FindAllSubmatch(content, -1) if len(matches) == 0 { return nil } seen := make(map[string]struct{}) out := make([]string, 0, len(matches)) for _, m := range matches { if len(m) != 2 { continue } slot := strings.ToUpper(strings.TrimSpace(string(m[1]))) if slot == "" { continue } if _, exists := seen[slot]; exists { continue } seen[slot] = struct{}{} out = append(out, slot) } return out } // ApplyGPUAndNVSwitchCheckTimes writes parsed check timestamps to component status metadata. func ApplyGPUAndNVSwitchCheckTimes(result *models.AnalysisResult, times componentCheckTimes) { if result == nil || result.Hardware == nil { return } for i := range result.Hardware.GPUs { gpu := &result.Hardware.GPUs[i] ts := time.Time{} if serial := strings.TrimSpace(gpu.SerialNumber); serial != "" { ts = times.GPUBySerial[serial] } if ts.IsZero() { ts = times.GPUBySlot[strings.ToUpper(strings.TrimSpace(gpu.Slot))] } if ts.IsZero() { ts = times.GPUDefault } if ts.IsZero() { continue } gpu.StatusCheckedAt = ts status := strings.TrimSpace(gpu.Status) if status == "" { status = "Unknown" } gpu.StatusAtCollect = &models.StatusAtCollection{ Status: status, At: ts, } } for i := range result.Hardware.PCIeDevices { dev := &result.Hardware.PCIeDevices[i] slot := normalizeNVSwitchSlot(strings.TrimSpace(dev.Slot)) if slot == "" { continue } slot = strings.ToUpper(slot) if !strings.EqualFold(strings.TrimSpace(dev.DeviceClass), "NVSwitch") && !strings.HasPrefix(slot, "NVSWITCH") { continue } ts := times.NVSwitchBySlot[slot] if ts.IsZero() { ts = times.NVSwitchDefault } if ts.IsZero() { continue } dev.StatusCheckedAt = ts status := strings.TrimSpace(dev.Status) if status == "" { status = "Unknown" } dev.StatusAtCollect = &models.StatusAtCollection{ Status: status, At: ts, } } }