sync file-type support across upload/convert and fix collected_at timezone handling

This commit is contained in:
2026-02-28 23:27:49 +03:00
parent 736b77f055
commit 4940cd9645
20 changed files with 931 additions and 49 deletions

View File

@@ -9,23 +9,38 @@ import (
"io"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
const maxSingleFileSize = 10 * 1024 * 1024
const maxZipArchiveSize = 50 * 1024 * 1024
const maxGzipDecompressedSize = 50 * 1024 * 1024
var supportedArchiveExt = map[string]struct{}{
".gz": {},
".tgz": {},
".tar": {},
".zip": {},
".txt": {},
".log": {},
}
// ExtractedFile represents a file extracted from archive
type ExtractedFile struct {
Path string
Content []byte
ModTime time.Time
Truncated bool
TruncatedMessage string
}
// ExtractArchive extracts tar.gz or zip archive and returns file contents
func ExtractArchive(archivePath string) ([]ExtractedFile, error) {
if !IsSupportedArchiveFilename(archivePath) {
return nil, fmt.Errorf("unsupported archive format: %s", strings.ToLower(filepath.Ext(archivePath)))
}
ext := strings.ToLower(filepath.Ext(archivePath))
switch ext {
@@ -44,6 +59,9 @@ func ExtractArchive(archivePath string) ([]ExtractedFile, error) {
// ExtractArchiveFromReader extracts archive from reader
func ExtractArchiveFromReader(r io.Reader, filename string) ([]ExtractedFile, error) {
if !IsSupportedArchiveFilename(filename) {
return nil, fmt.Errorf("unsupported archive format: %s", strings.ToLower(filepath.Ext(filename)))
}
ext := strings.ToLower(filepath.Ext(filename))
switch ext {
@@ -60,6 +78,27 @@ func ExtractArchiveFromReader(r io.Reader, filename string) ([]ExtractedFile, er
}
}
// IsSupportedArchiveFilename reports whether filename extension is supported by archive extractor.
func IsSupportedArchiveFilename(filename string) bool {
ext := strings.ToLower(strings.TrimSpace(filepath.Ext(filename)))
if ext == "" {
return false
}
_, ok := supportedArchiveExt[ext]
return ok
}
// SupportedArchiveExtensions returns sorted list of archive/file extensions
// accepted by archive extractor.
func SupportedArchiveExtensions() []string {
out := make([]string, 0, len(supportedArchiveExt))
for ext := range supportedArchiveExt {
out = append(out, ext)
}
sort.Strings(out)
return out
}
func extractTarGz(archivePath string) ([]ExtractedFile, error) {
f, err := os.Open(archivePath)
if err != nil {
@@ -111,6 +150,7 @@ func extractTarFromReader(r io.Reader) ([]ExtractedFile, error) {
files = append(files, ExtractedFile{
Path: header.Name,
Content: content,
ModTime: header.ModTime,
})
}
@@ -152,6 +192,7 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
file := ExtractedFile{
Path: baseName,
Content: decompressed,
ModTime: gzr.ModTime,
}
if gzipTruncated {
file.Truncated = true
@@ -180,6 +221,7 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
files = append(files, ExtractedFile{
Path: header.Name,
Content: content,
ModTime: header.ModTime,
})
}
}
@@ -230,6 +272,7 @@ func extractZip(archivePath string) ([]ExtractedFile, error) {
files = append(files, ExtractedFile{
Path: f.Name,
Content: content,
ModTime: f.Modified,
})
}
@@ -281,6 +324,7 @@ func extractZipFromReader(r io.Reader) ([]ExtractedFile, error) {
files = append(files, ExtractedFile{
Path: f.Name,
Content: content,
ModTime: f.Modified,
})
}
@@ -288,13 +332,24 @@ func extractZipFromReader(r io.Reader) ([]ExtractedFile, error) {
}
func extractSingleFile(path string) ([]ExtractedFile, error) {
info, err := os.Stat(path)
if err != nil {
return nil, fmt.Errorf("stat file: %w", err)
}
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open file: %w", err)
}
defer f.Close()
return extractSingleFileFromReader(f, filepath.Base(path))
files, err := extractSingleFileFromReader(f, filepath.Base(path))
if err != nil {
return nil, err
}
if len(files) > 0 {
files[0].ModTime = info.ModTime()
}
return files, nil
}
func extractSingleFileFromReader(r io.Reader, filename string) ([]ExtractedFile, error) {