sync file-type support across upload/convert and fix collected_at timezone handling

This commit is contained in:
2026-02-28 23:27:49 +03:00
parent 736b77f055
commit 4940cd9645
20 changed files with 931 additions and 49 deletions

View File

@@ -9,23 +9,38 @@ import (
"io"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
const maxSingleFileSize = 10 * 1024 * 1024
const maxZipArchiveSize = 50 * 1024 * 1024
const maxGzipDecompressedSize = 50 * 1024 * 1024
var supportedArchiveExt = map[string]struct{}{
".gz": {},
".tgz": {},
".tar": {},
".zip": {},
".txt": {},
".log": {},
}
// ExtractedFile represents a file extracted from archive
type ExtractedFile struct {
Path string
Content []byte
ModTime time.Time
Truncated bool
TruncatedMessage string
}
// ExtractArchive extracts tar.gz or zip archive and returns file contents
func ExtractArchive(archivePath string) ([]ExtractedFile, error) {
if !IsSupportedArchiveFilename(archivePath) {
return nil, fmt.Errorf("unsupported archive format: %s", strings.ToLower(filepath.Ext(archivePath)))
}
ext := strings.ToLower(filepath.Ext(archivePath))
switch ext {
@@ -44,6 +59,9 @@ func ExtractArchive(archivePath string) ([]ExtractedFile, error) {
// ExtractArchiveFromReader extracts archive from reader
func ExtractArchiveFromReader(r io.Reader, filename string) ([]ExtractedFile, error) {
if !IsSupportedArchiveFilename(filename) {
return nil, fmt.Errorf("unsupported archive format: %s", strings.ToLower(filepath.Ext(filename)))
}
ext := strings.ToLower(filepath.Ext(filename))
switch ext {
@@ -60,6 +78,27 @@ func ExtractArchiveFromReader(r io.Reader, filename string) ([]ExtractedFile, er
}
}
// IsSupportedArchiveFilename reports whether filename extension is supported by archive extractor.
func IsSupportedArchiveFilename(filename string) bool {
ext := strings.ToLower(strings.TrimSpace(filepath.Ext(filename)))
if ext == "" {
return false
}
_, ok := supportedArchiveExt[ext]
return ok
}
// SupportedArchiveExtensions returns sorted list of archive/file extensions
// accepted by archive extractor.
func SupportedArchiveExtensions() []string {
out := make([]string, 0, len(supportedArchiveExt))
for ext := range supportedArchiveExt {
out = append(out, ext)
}
sort.Strings(out)
return out
}
func extractTarGz(archivePath string) ([]ExtractedFile, error) {
f, err := os.Open(archivePath)
if err != nil {
@@ -111,6 +150,7 @@ func extractTarFromReader(r io.Reader) ([]ExtractedFile, error) {
files = append(files, ExtractedFile{
Path: header.Name,
Content: content,
ModTime: header.ModTime,
})
}
@@ -152,6 +192,7 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
file := ExtractedFile{
Path: baseName,
Content: decompressed,
ModTime: gzr.ModTime,
}
if gzipTruncated {
file.Truncated = true
@@ -180,6 +221,7 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
files = append(files, ExtractedFile{
Path: header.Name,
Content: content,
ModTime: header.ModTime,
})
}
}
@@ -230,6 +272,7 @@ func extractZip(archivePath string) ([]ExtractedFile, error) {
files = append(files, ExtractedFile{
Path: f.Name,
Content: content,
ModTime: f.Modified,
})
}
@@ -281,6 +324,7 @@ func extractZipFromReader(r io.Reader) ([]ExtractedFile, error) {
files = append(files, ExtractedFile{
Path: f.Name,
Content: content,
ModTime: f.Modified,
})
}
@@ -288,13 +332,24 @@ func extractZipFromReader(r io.Reader) ([]ExtractedFile, error) {
}
func extractSingleFile(path string) ([]ExtractedFile, error) {
info, err := os.Stat(path)
if err != nil {
return nil, fmt.Errorf("stat file: %w", err)
}
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open file: %w", err)
}
defer f.Close()
return extractSingleFileFromReader(f, filepath.Base(path))
files, err := extractSingleFileFromReader(f, filepath.Base(path))
if err != nil {
return nil, err
}
if len(files) > 0 {
files[0].ModTime = info.ModTime()
}
return files, nil
}
func extractSingleFileFromReader(r io.Reader, filename string) ([]ExtractedFile, error) {

View File

@@ -69,3 +69,25 @@ func TestExtractArchiveFromReaderTXT_TruncatedWhenTooLarge(t *testing.T) {
t.Fatalf("expected truncation message")
}
}
func TestIsSupportedArchiveFilename(t *testing.T) {
cases := []struct {
name string
want bool
}{
{name: "dump.tar.gz", want: true},
{name: "nvidia-bug-report-1651124000923.log.gz", want: true},
{name: "snapshot.zip", want: true},
{name: "report.log", want: true},
{name: "xigmanas.txt", want: true},
{name: "raw_export.json", want: false},
{name: "archive.bin", want: false},
}
for _, tc := range cases {
got := IsSupportedArchiveFilename(tc.name)
if got != tc.want {
t.Fatalf("IsSupportedArchiveFilename(%q)=%v, want %v", tc.name, got, tc.want)
}
}
}

View File

@@ -66,11 +66,41 @@ func (p *BMCParser) parseFiles() error {
result.Filename = p.result.Filename
appendExtractionWarnings(result, p.files)
if result.CollectedAt.IsZero() {
if ts := inferCollectedAtFromExtractedFiles(p.files); !ts.IsZero() {
result.CollectedAt = ts.UTC()
}
}
p.result = result
return nil
}
func inferCollectedAtFromExtractedFiles(files []ExtractedFile) time.Time {
var latestReliable time.Time
var latestAny time.Time
for _, f := range files {
ts := f.ModTime
if ts.IsZero() {
continue
}
if latestAny.IsZero() || ts.After(latestAny) {
latestAny = ts
}
// Ignore placeholder archive mtimes like 1980-01-01.
if ts.Year() < 2000 {
continue
}
if latestReliable.IsZero() || ts.After(latestReliable) {
latestReliable = ts
}
}
if !latestReliable.IsZero() {
return latestReliable
}
return latestAny
}
func appendExtractionWarnings(result *models.AnalysisResult, files []ExtractedFile) {
if result == nil {
return

View File

@@ -2,6 +2,7 @@ package parser
import (
"testing"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
)
@@ -32,3 +33,30 @@ func TestAppendExtractionWarnings(t *testing.T) {
t.Fatalf("expected warning details in RawData")
}
}
func TestInferCollectedAtFromExtractedFiles_PrefersReliableMTime(t *testing.T) {
files := []ExtractedFile{
{Path: "a.log", ModTime: time.Date(1980, 1, 1, 0, 0, 0, 0, time.UTC)},
{Path: "b.log", ModTime: time.Date(2025, 12, 12, 10, 14, 49, 0, time.FixedZone("EST", -5*3600))},
{Path: "c.log", ModTime: time.Date(2026, 2, 28, 4, 18, 18, 0, time.FixedZone("UTC+8", 8*3600))},
}
got := inferCollectedAtFromExtractedFiles(files)
want := files[2].ModTime
if !got.Equal(want) {
t.Fatalf("expected %s, got %s", want, got)
}
}
func TestInferCollectedAtFromExtractedFiles_FallsBackToAnyMTime(t *testing.T) {
files := []ExtractedFile{
{Path: "a.log", ModTime: time.Date(1980, 1, 1, 0, 0, 0, 0, time.UTC)},
{Path: "b.log", ModTime: time.Date(1970, 1, 2, 0, 0, 0, 0, time.UTC)},
}
got := inferCollectedAtFromExtractedFiles(files)
want := files[0].ModTime
if !got.Equal(want) {
t.Fatalf("expected fallback %s, got %s", want, got)
}
}

View File

@@ -8,6 +8,7 @@ package inspur
import (
"fmt"
"strings"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
"git.mchus.pro/mchus/logpile/internal/parser"
@@ -86,6 +87,8 @@ func containsInspurMarkers(content []byte) bool {
// Parse parses Inspur/Kaytus archive
func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, error) {
selLocation := inferInspurArchiveLocation(files)
result := &models.AnalysisResult{
Events: make([]models.Event, 0),
FRU: make([]models.FRUInfo, 0),
@@ -145,7 +148,7 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
// Parse SEL list (selelist.csv)
if f := parser.FindFileByName(files, "selelist.csv"); f != nil {
selEvents := ParseSELList(f.Content)
selEvents := ParseSELListWithLocation(f.Content, selLocation)
result.Events = append(result.Events, selEvents...)
}
@@ -184,6 +187,43 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
return result, nil
}
func inferInspurArchiveLocation(files []parser.ExtractedFile) *time.Location {
fallback := parser.DefaultArchiveLocation()
f := parser.FindFileByName(files, "timezone.conf")
if f == nil {
return fallback
}
locName := parseTimezoneConfigLocation(f.Content)
if strings.TrimSpace(locName) == "" {
return fallback
}
loc, err := time.LoadLocation(locName)
if err != nil {
return fallback
}
return loc
}
func parseTimezoneConfigLocation(content []byte) string {
lines := strings.Split(string(content), "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "[") || strings.HasPrefix(line, "#") || strings.HasPrefix(line, ";") {
continue
}
parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 {
continue
}
key := strings.ToLower(strings.TrimSpace(parts[0]))
val := strings.TrimSpace(parts[1])
if key == "timezone" && val != "" {
return val
}
}
return ""
}
func (p *Parser) parseDeviceFruSDR(content []byte, result *models.AnalysisResult) {
lines := string(content)

View File

@@ -13,6 +13,12 @@ import (
// Format: ID, Date (MM/DD/YYYY), Time (HH:MM:SS), Sensor, Event, Status
// Example: 1,04/18/2025,09:31:18,Event Logging Disabled SEL_Status,Log area reset/cleared,Asserted
func ParseSELList(content []byte) []models.Event {
return ParseSELListWithLocation(content, parser.DefaultArchiveLocation())
}
// ParseSELListWithLocation parses selelist.csv using provided source timezone
// for timestamps that don't contain an explicit offset.
func ParseSELListWithLocation(content []byte, location *time.Location) []models.Event {
var events []models.Event
text := string(content)
@@ -49,7 +55,7 @@ func ParseSELList(content []byte) []models.Event {
status := strings.TrimSpace(records[5])
// Parse timestamp: MM/DD/YYYY HH:MM:SS
timestamp := parseSELTimestamp(dateStr, timeStr)
timestamp := parseSELTimestamp(dateStr, timeStr, location)
// Extract sensor type and name
sensorType, sensorName := parseSensorInfo(sensorStr)
@@ -77,12 +83,16 @@ func ParseSELList(content []byte) []models.Event {
}
// parseSELTimestamp parses MM/DD/YYYY and HH:MM:SS into time.Time
func parseSELTimestamp(dateStr, timeStr string) time.Time {
func parseSELTimestamp(dateStr, timeStr string, location *time.Location) time.Time {
// Combine date and time: MM/DD/YYYY HH:MM:SS
timestampStr := dateStr + " " + timeStr
if location == nil {
location = parser.DefaultArchiveLocation()
}
// Try parsing with MM/DD/YYYY format
t, err := parser.ParseInDefaultArchiveLocation("01/02/2006 15:04:05", timestampStr)
t, err := time.ParseInLocation("01/02/2006 15:04:05", timestampStr, location)
if err != nil {
// Fallback to current time
return time.Now()

View File

@@ -0,0 +1,33 @@
package inspur
import (
"testing"
"time"
)
func TestParseSELListWithLocation_UsesProvidedTimezone(t *testing.T) {
content := []byte("sel elist:\n1,02/28/2026,04:18:18,Sensor X,Event,Asserted\n")
shanghai, err := time.LoadLocation("Asia/Shanghai")
if err != nil {
t.Fatalf("load location: %v", err)
}
events := ParseSELListWithLocation(content, shanghai)
if len(events) != 1 {
t.Fatalf("expected 1 event, got %d", len(events))
}
// 04:18:18 +08:00 == 20:18:18Z (previous day)
want := time.Date(2026, 2, 27, 20, 18, 18, 0, time.UTC)
if !events[0].Timestamp.UTC().Equal(want) {
t.Fatalf("unexpected timestamp: got %s want %s", events[0].Timestamp.UTC(), want)
}
}
func TestParseTimezoneConfigLocation(t *testing.T) {
content := []byte("[TimeZoneConfig]\ntimezone=Asia/Shanghai\n")
got := parseTimezoneConfigLocation(content)
if got != "Asia/Shanghai" {
t.Fatalf("unexpected timezone: %q", got)
}
}

View File

@@ -3,14 +3,33 @@
package nvidia_bug_report
import (
"fmt"
"regexp"
"strings"
"time"
"git.mchus.pro/mchus/logpile/internal/models"
"git.mchus.pro/mchus/logpile/internal/parser"
)
// parserVersion - version of this parser module
const parserVersion = "1.0.0"
const parserVersion = "1.1.0"
var bugReportDateLineRegex = regexp.MustCompile(`(?m)^Date:\s+(.+?)\s*$`)
var dateWithTZAbbrevRegex = regexp.MustCompile(`^([A-Za-z]{3}\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})\s+([A-Za-z]{2,5})\s+(\d{4})$`)
var timezoneAbbrevToOffset = map[string]string{
"UTC": "+00:00",
"GMT": "+00:00",
"EST": "-05:00",
"EDT": "-04:00",
"CST": "-06:00",
"CDT": "-05:00",
"MST": "-07:00",
"MDT": "-06:00",
"PST": "-08:00",
"PDT": "-07:00",
}
func init() {
parser.Register(&Parser{})
@@ -81,6 +100,10 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
}
content := string(files[0].Content)
if collectedAt, tzOffset, ok := parseBugReportCollectedAt(content); ok {
result.CollectedAt = collectedAt.UTC()
result.SourceTimezone = tzOffset
}
// Parse system information
parseSystemInfo(content, result)
@@ -105,3 +128,49 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
return result, nil
}
func parseBugReportCollectedAt(content string) (time.Time, string, bool) {
matches := bugReportDateLineRegex.FindStringSubmatch(content)
if len(matches) != 2 {
return time.Time{}, "", false
}
raw := strings.TrimSpace(matches[1])
if raw == "" {
return time.Time{}, "", false
}
if m := dateWithTZAbbrevRegex.FindStringSubmatch(raw); len(m) == 4 {
if offset, ok := timezoneAbbrevToOffset[strings.ToUpper(strings.TrimSpace(m[2]))]; ok {
layout := "Mon Jan 2 15:04:05 -07:00 2006"
normalized := strings.TrimSpace(m[1]) + " " + offset + " " + strings.TrimSpace(m[3])
if ts, err := time.Parse(layout, normalized); err == nil {
return ts, offset, true
}
}
}
layouts := []string{
"Mon Jan 2 15:04:05 MST 2006",
"Mon Jan 2 15:04:05 2006",
}
for _, layout := range layouts {
ts, err := time.Parse(layout, raw)
if err != nil {
continue
}
return ts, formatOffset(ts), true
}
return time.Time{}, "", false
}
func formatOffset(t time.Time) string {
_, sec := t.Zone()
sign := '+'
if sec < 0 {
sign = '-'
sec = -sec
}
h := sec / 3600
m := (sec % 3600) / 60
return fmt.Sprintf("%c%02d:%02d", sign, h, m)
}

View File

@@ -0,0 +1,54 @@
package nvidia_bug_report
import (
"testing"
"time"
"git.mchus.pro/mchus/logpile/internal/parser"
)
func TestParseBugReportCollectedAt(t *testing.T) {
content := `
Start of NVIDIA bug report log file
Date: Fri Dec 12 10:14:49 EST 2025
`
got, tz, ok := parseBugReportCollectedAt(content)
if !ok {
t.Fatalf("expected collected_at to be parsed")
}
if tz != "-05:00" {
t.Fatalf("expected tz offset -05:00, got %q", tz)
}
wantUTC := time.Date(2025, 12, 12, 15, 14, 49, 0, time.UTC)
if !got.UTC().Equal(wantUTC) {
t.Fatalf("expected %s, got %s", wantUTC, got.UTC())
}
}
func TestNvidiaBugReportParser_SetsCollectedAtAndTimezone(t *testing.T) {
p := &Parser{}
files := []parser.ExtractedFile{
{
Path: "nvidia-bug-report-1653925023938.log",
Content: []byte(`
Start of NVIDIA bug report log file
nvidia-bug-report.sh Version: 34275561
Date: Fri Dec 12 10:14:49 EST 2025
`),
},
}
result, err := p.Parse(files)
if err != nil {
t.Fatalf("parse failed: %v", err)
}
if result.SourceTimezone != "-05:00" {
t.Fatalf("expected source timezone -05:00, got %q", result.SourceTimezone)
}
wantUTC := time.Date(2025, 12, 12, 15, 14, 49, 0, time.UTC)
if !result.CollectedAt.Equal(wantUTC) {
t.Fatalf("expected collected_at %s, got %s", wantUTC, result.CollectedAt)
}
}