Update parser and project changes

This commit is contained in:
2026-02-15 22:02:07 +03:00
parent c7b2a7ab29
commit 5e49adaf05
23 changed files with 959 additions and 292 deletions

View File

@@ -14,11 +14,14 @@ import (
const maxSingleFileSize = 10 * 1024 * 1024
const maxZipArchiveSize = 50 * 1024 * 1024
const maxGzipDecompressedSize = 50 * 1024 * 1024
// ExtractedFile represents a file extracted from archive
type ExtractedFile struct {
Path string
Content []byte
Path string
Content []byte
Truncated bool
TruncatedMessage string
}
// ExtractArchive extracts tar.gz or zip archive and returns file contents
@@ -121,12 +124,16 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
}
defer gzr.Close()
// Read all decompressed content into buffer
// Limit to 50MB for plain gzip files, 10MB per file for tar.gz
decompressed, err := io.ReadAll(io.LimitReader(gzr, 50*1024*1024))
// Read decompressed content with a hard cap.
// When the payload exceeds the cap, keep the first chunk and mark it as truncated.
decompressed, err := io.ReadAll(io.LimitReader(gzr, maxGzipDecompressedSize+1))
if err != nil {
return nil, fmt.Errorf("read gzip content: %w", err)
}
gzipTruncated := len(decompressed) > maxGzipDecompressedSize
if gzipTruncated {
decompressed = decompressed[:maxGzipDecompressedSize]
}
// Try to read as tar archive
tr := tar.NewReader(bytes.NewReader(decompressed))
@@ -142,12 +149,19 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
baseName = gzr.Name
}
return []ExtractedFile{
{
Path: baseName,
Content: decompressed,
},
}, nil
file := ExtractedFile{
Path: baseName,
Content: decompressed,
}
if gzipTruncated {
file.Truncated = true
file.TruncatedMessage = fmt.Sprintf(
"decompressed gzip content exceeded %d bytes and was truncated",
maxGzipDecompressedSize,
)
}
return []ExtractedFile{file}, nil
}
return nil, fmt.Errorf("tar read: %w", err)
}
@@ -288,16 +302,24 @@ func extractSingleFileFromReader(r io.Reader, filename string) ([]ExtractedFile,
if err != nil {
return nil, fmt.Errorf("read file content: %w", err)
}
if len(content) > maxSingleFileSize {
return nil, fmt.Errorf("file too large: max %d bytes", maxSingleFileSize)
truncated := len(content) > maxSingleFileSize
if truncated {
content = content[:maxSingleFileSize]
}
return []ExtractedFile{
{
Path: filepath.Base(filename),
Content: content,
},
}, nil
file := ExtractedFile{
Path: filepath.Base(filename),
Content: content,
}
if truncated {
file.Truncated = true
file.TruncatedMessage = fmt.Sprintf(
"file exceeded %d bytes and was truncated",
maxSingleFileSize,
)
}
return []ExtractedFile{file}, nil
}
// FindFileByPattern finds files matching pattern in extracted files