package parser import ( "archive/tar" "archive/zip" "compress/gzip" "fmt" "io" "os" "path/filepath" "strings" ) // ExtractedFile represents a file extracted from archive type ExtractedFile struct { Path string Content []byte } // ExtractArchive extracts tar.gz or zip archive and returns file contents func ExtractArchive(archivePath string) ([]ExtractedFile, error) { ext := strings.ToLower(filepath.Ext(archivePath)) switch ext { case ".gz", ".tgz": return extractTarGz(archivePath) case ".zip": return extractZip(archivePath) default: return nil, fmt.Errorf("unsupported archive format: %s", ext) } } // ExtractArchiveFromReader extracts archive from reader func ExtractArchiveFromReader(r io.Reader, filename string) ([]ExtractedFile, error) { ext := strings.ToLower(filepath.Ext(filename)) switch ext { case ".gz", ".tgz": return extractTarGzFromReader(r) default: return nil, fmt.Errorf("unsupported archive format: %s", ext) } } func extractTarGz(archivePath string) ([]ExtractedFile, error) { f, err := os.Open(archivePath) if err != nil { return nil, fmt.Errorf("open archive: %w", err) } defer f.Close() return extractTarGzFromReader(f) } func extractTarGzFromReader(r io.Reader) ([]ExtractedFile, error) { gzr, err := gzip.NewReader(r) if err != nil { return nil, fmt.Errorf("gzip reader: %w", err) } defer gzr.Close() tr := tar.NewReader(gzr) var files []ExtractedFile for { header, err := tr.Next() if err == io.EOF { break } if err != nil { return nil, fmt.Errorf("tar read: %w", err) } // Skip directories if header.Typeflag == tar.TypeDir { continue } // Skip large files (>10MB) if header.Size > 10*1024*1024 { continue } content, err := io.ReadAll(tr) if err != nil { return nil, fmt.Errorf("read file %s: %w", header.Name, err) } files = append(files, ExtractedFile{ Path: header.Name, Content: content, }) } return files, nil } func extractZip(archivePath string) ([]ExtractedFile, error) { r, err := zip.OpenReader(archivePath) if err != nil { return nil, fmt.Errorf("open zip: %w", err) } defer r.Close() var files []ExtractedFile for _, f := range r.File { if f.FileInfo().IsDir() { continue } // Skip large files (>10MB) if f.FileInfo().Size() > 10*1024*1024 { continue } rc, err := f.Open() if err != nil { return nil, fmt.Errorf("open file %s: %w", f.Name, err) } content, err := io.ReadAll(rc) rc.Close() if err != nil { return nil, fmt.Errorf("read file %s: %w", f.Name, err) } files = append(files, ExtractedFile{ Path: f.Name, Content: content, }) } return files, nil } // FindFileByPattern finds files matching pattern in extracted files func FindFileByPattern(files []ExtractedFile, patterns ...string) []ExtractedFile { var result []ExtractedFile for _, f := range files { for _, pattern := range patterns { if strings.Contains(strings.ToLower(f.Path), strings.ToLower(pattern)) { result = append(result, f) break } } } return result } // FindFileByName finds file by exact name (case-insensitive) func FindFileByName(files []ExtractedFile, name string) *ExtractedFile { for _, f := range files { if strings.EqualFold(filepath.Base(f.Path), name) { return &f } } return nil }