Update parser and project changes
This commit is contained in:
@@ -14,11 +14,14 @@ import (
|
||||
|
||||
const maxSingleFileSize = 10 * 1024 * 1024
|
||||
const maxZipArchiveSize = 50 * 1024 * 1024
|
||||
const maxGzipDecompressedSize = 50 * 1024 * 1024
|
||||
|
||||
// ExtractedFile represents a file extracted from archive
|
||||
type ExtractedFile struct {
|
||||
Path string
|
||||
Content []byte
|
||||
Path string
|
||||
Content []byte
|
||||
Truncated bool
|
||||
TruncatedMessage string
|
||||
}
|
||||
|
||||
// ExtractArchive extracts tar.gz or zip archive and returns file contents
|
||||
@@ -121,12 +124,16 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
|
||||
}
|
||||
defer gzr.Close()
|
||||
|
||||
// Read all decompressed content into buffer
|
||||
// Limit to 50MB for plain gzip files, 10MB per file for tar.gz
|
||||
decompressed, err := io.ReadAll(io.LimitReader(gzr, 50*1024*1024))
|
||||
// Read decompressed content with a hard cap.
|
||||
// When the payload exceeds the cap, keep the first chunk and mark it as truncated.
|
||||
decompressed, err := io.ReadAll(io.LimitReader(gzr, maxGzipDecompressedSize+1))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read gzip content: %w", err)
|
||||
}
|
||||
gzipTruncated := len(decompressed) > maxGzipDecompressedSize
|
||||
if gzipTruncated {
|
||||
decompressed = decompressed[:maxGzipDecompressedSize]
|
||||
}
|
||||
|
||||
// Try to read as tar archive
|
||||
tr := tar.NewReader(bytes.NewReader(decompressed))
|
||||
@@ -142,12 +149,19 @@ func extractTarGzFromReader(r io.Reader, filename string) ([]ExtractedFile, erro
|
||||
baseName = gzr.Name
|
||||
}
|
||||
|
||||
return []ExtractedFile{
|
||||
{
|
||||
Path: baseName,
|
||||
Content: decompressed,
|
||||
},
|
||||
}, nil
|
||||
file := ExtractedFile{
|
||||
Path: baseName,
|
||||
Content: decompressed,
|
||||
}
|
||||
if gzipTruncated {
|
||||
file.Truncated = true
|
||||
file.TruncatedMessage = fmt.Sprintf(
|
||||
"decompressed gzip content exceeded %d bytes and was truncated",
|
||||
maxGzipDecompressedSize,
|
||||
)
|
||||
}
|
||||
|
||||
return []ExtractedFile{file}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("tar read: %w", err)
|
||||
}
|
||||
@@ -288,16 +302,24 @@ func extractSingleFileFromReader(r io.Reader, filename string) ([]ExtractedFile,
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read file content: %w", err)
|
||||
}
|
||||
if len(content) > maxSingleFileSize {
|
||||
return nil, fmt.Errorf("file too large: max %d bytes", maxSingleFileSize)
|
||||
truncated := len(content) > maxSingleFileSize
|
||||
if truncated {
|
||||
content = content[:maxSingleFileSize]
|
||||
}
|
||||
|
||||
return []ExtractedFile{
|
||||
{
|
||||
Path: filepath.Base(filename),
|
||||
Content: content,
|
||||
},
|
||||
}, nil
|
||||
file := ExtractedFile{
|
||||
Path: filepath.Base(filename),
|
||||
Content: content,
|
||||
}
|
||||
if truncated {
|
||||
file.Truncated = true
|
||||
file.TruncatedMessage = fmt.Sprintf(
|
||||
"file exceeded %d bytes and was truncated",
|
||||
maxSingleFileSize,
|
||||
)
|
||||
}
|
||||
|
||||
return []ExtractedFile{file}, nil
|
||||
}
|
||||
|
||||
// FindFileByPattern finds files matching pattern in extracted files
|
||||
|
||||
Reference in New Issue
Block a user