package services import ( "archive/zip" "bytes" "encoding/xml" "fmt" "io" "path/filepath" "regexp" "sort" "strconv" "strings" "time" "git.mchus.pro/mchus/priceforge/internal/lotmatch" ) var ( reISODate = regexp.MustCompile(`\b(20\d{2})-(\d{2})-(\d{2})\b`) reRuDate = regexp.MustCompile(`\b([0-3]\d)\.([01]\d)\.(20\d{2})\b`) mxlCellRe = regexp.MustCompile(`(?s)\{16,\d+,\s*\{1,\d+,\s*\{\s*"ru","((?:""|[^"])*)"\s*\}\s*\},0\},(\d+),`) ) func parseStockRows(filename string, content []byte) ([]stockImportRow, error) { switch strings.ToLower(filepath.Ext(filename)) { case ".mxl": return parseMXLRows(content) case ".xlsx": return parseXLSXRows(content) default: return nil, fmt.Errorf("unsupported file format: %s", filepath.Ext(filename)) } } func parseMXLRows(content []byte) ([]stockImportRow, error) { text := string(content) matches := mxlCellRe.FindAllStringSubmatch(text, -1) if len(matches) == 0 { return nil, fmt.Errorf("mxl parsing failed: no cells found") } rows := make([]map[int]string, 0, 128) current := map[int]string{} for _, m := range matches { val := strings.ReplaceAll(m[1], `""`, `"`) col, err := strconv.Atoi(m[2]) if err != nil { continue } if col == 1 && len(current) > 0 { rows = append(rows, current) current = map[int]string{} } current[col] = strings.TrimSpace(val) } if len(current) > 0 { rows = append(rows, current) } return parseGridRows(rows, "mxl") } func parseXLSXRows(content []byte) ([]stockImportRow, error) { zr, err := zip.NewReader(bytes.NewReader(content), int64(len(content))) if err != nil { return nil, fmt.Errorf("opening xlsx: %w", err) } sharedStrings, _ := readSharedStrings(zr) sheetPath := firstWorksheetPath(zr) if sheetPath == "" { return nil, fmt.Errorf("xlsx parsing failed: worksheet not found") } sheetData, err := readZipFile(zr, sheetPath) if err != nil { return nil, err } type xlsxInline struct { T string `xml:"t"` } type xlsxCell struct { R string `xml:"r,attr"` T string `xml:"t,attr"` V string `xml:"v"` IS *xlsxInline `xml:"is"` } type xlsxRow struct { C []xlsxCell `xml:"c"` } type xlsxSheet struct { Rows []xlsxRow `xml:"sheetData>row"` } var ws xlsxSheet if err := xml.Unmarshal(sheetData, &ws); err != nil { return nil, fmt.Errorf("decode worksheet: %w", err) } grid := make([]map[int]string, 0, len(ws.Rows)) for _, r := range ws.Rows { rowMap := make(map[int]string, len(r.C)) for _, c := range r.C { colIdx := excelRefColumn(c.R) if colIdx < 0 { continue } inlineText := "" if c.IS != nil { inlineText = c.IS.T } rowMap[colIdx] = decodeXLSXCell(c.T, c.V, inlineText, sharedStrings) } grid = append(grid, rowMap) } return parseGridRows(grid, "xlsx") } func parseGridRows(grid []map[int]string, format string) ([]stockImportRow, error) { headerRow := -1 headers := map[string]int{} for i, row := range grid { rowHeaders := map[string]int{} for idx, val := range row { norm := normalizeHeader(val) switch norm { case "папка", "артикул", "описание", "вендор", "стоимость", "свободно": rowHeaders[norm] = idx } } if _, hasArticle := rowHeaders["артикул"]; !hasArticle { continue } if _, hasPrice := rowHeaders["стоимость"]; !hasPrice { continue } headers = rowHeaders headerRow = i break } if headerRow < 0 { return nil, fmt.Errorf("%s parsing failed: header row not found", format) } result := make([]stockImportRow, 0, len(grid)-headerRow-1) idxFolder, hasFolder := headers["папка"] idxArticle := headers["артикул"] idxDesc, hasDesc := headers["описание"] idxVendor, hasVendor := headers["вендор"] idxPrice := headers["стоимость"] idxQty, hasQty := headers["свободно"] if !hasQty { return nil, fmt.Errorf("%s parsing failed: qty column 'Свободно' not found", format) } for i := headerRow + 1; i < len(grid); i++ { row := grid[i] article := strings.TrimSpace(row[idxArticle]) if article == "" { continue } price, err := parseLocalizedFloat(row[idxPrice]) if err != nil { continue } qtyRaw := strings.TrimSpace(row[idxQty]) qty, err := parseLocalizedQty(qtyRaw) qtyInvalid := false if err != nil { qty = 0 qtyInvalid = true } folder := "" if hasFolder { folder = strings.TrimSpace(row[idxFolder]) } description := "" if hasDesc { description = strings.TrimSpace(row[idxDesc]) } vendor := "" if hasVendor { vendor = strings.TrimSpace(row[idxVendor]) } result = append(result, stockImportRow{ Folder: folder, Article: article, Description: description, Vendor: vendor, Price: price, Qty: qty, QtyRaw: qtyRaw, QtyInvalid: qtyInvalid, }) } return result, nil } func parseLocalizedFloat(value string) (float64, error) { clean := strings.TrimSpace(value) clean = strings.ReplaceAll(clean, "\u00a0", "") clean = strings.ReplaceAll(clean, " ", "") clean = strings.ReplaceAll(clean, ",", ".") if clean == "" { return 0, fmt.Errorf("empty number") } return strconv.ParseFloat(clean, 64) } func parseLocalizedQty(value string) (float64, error) { clean := strings.TrimSpace(value) if clean == "" { return 0, fmt.Errorf("empty qty") } if v, err := parseLocalizedFloat(clean); err == nil { return v, nil } // Tolerate strings like "1 200 шт" by extracting the first numeric token. re := regexp.MustCompile(`[-+]?\d[\d\s\u00a0]*(?:[.,]\d+)?`) match := re.FindString(clean) if strings.TrimSpace(match) == "" { return 0, fmt.Errorf("invalid qty: %s", value) } return parseLocalizedFloat(match) } func detectImportDate(content []byte, filename string, fileModTime time.Time) time.Time { if d, ok := extractDateFromText(string(content)); ok { return d } if d, ok := extractDateFromFilename(filename); ok { return d } if !fileModTime.IsZero() { return normalizeDate(fileModTime) } return normalizeDate(time.Now()) } func extractDateFromText(text string) (time.Time, bool) { if m := reISODate.FindStringSubmatch(text); len(m) == 4 { d, err := time.Parse("2006-01-02", m[0]) if err == nil { return normalizeDate(d), true } } if m := reRuDate.FindStringSubmatch(text); len(m) == 4 { d, err := time.Parse("02.01.2006", m[0]) if err == nil { return normalizeDate(d), true } } return time.Time{}, false } func extractDateFromFilename(filename string) (time.Time, bool) { base := filepath.Base(filename) if m := reISODate.FindStringSubmatch(base); len(m) == 4 { d, err := time.Parse("2006-01-02", m[0]) if err == nil { return normalizeDate(d), true } } if m := reRuDate.FindStringSubmatch(base); len(m) == 4 { d, err := time.Parse("02.01.2006", m[0]) if err == nil { return normalizeDate(d), true } } return time.Time{}, false } func normalizeDate(t time.Time) time.Time { y, m, d := t.Date() return time.Date(y, m, d, 0, 0, 0, 0, time.Local) } func median(values []float64) float64 { if len(values) == 0 { return 0 } c := append([]float64(nil), values...) sort.Float64s(c) n := len(c) if n%2 == 0 { return (c[n/2-1] + c[n/2]) / 2 } return c[n/2] } func weightedMedian(values []weightedPricePoint) float64 { if len(values) == 0 { return 0 } type pair struct { price float64 weight float64 } items := make([]pair, 0, len(values)) totalWeight := 0.0 prices := make([]float64, 0, len(values)) for _, v := range values { if v.price <= 0 { continue } prices = append(prices, v.price) w := v.weight if w > 0 { items = append(items, pair{price: v.price, weight: w}) totalWeight += w } } // Fallback for rows without positive weights. if totalWeight <= 0 { return median(prices) } sort.Slice(items, func(i, j int) bool { if items[i].price == items[j].price { return items[i].weight < items[j].weight } return items[i].price < items[j].price }) threshold := totalWeight / 2.0 acc := 0.0 for _, it := range items { acc += it.weight if acc >= threshold { return it.price } } return items[len(items)-1].price } func normalizeKey(v string) string { return lotmatch.NormalizeKey(v) } func readZipFile(zr *zip.Reader, name string) ([]byte, error) { for _, f := range zr.File { if f.Name != name { continue } rc, err := f.Open() if err != nil { return nil, err } defer rc.Close() return io.ReadAll(rc) } return nil, fmt.Errorf("zip entry not found: %s", name) } func firstWorksheetPath(zr *zip.Reader) string { candidates := make([]string, 0, 4) for _, f := range zr.File { if strings.HasPrefix(f.Name, "xl/worksheets/") && strings.HasSuffix(f.Name, ".xml") { candidates = append(candidates, f.Name) } } if len(candidates) == 0 { return "" } sort.Strings(candidates) for _, c := range candidates { if strings.HasSuffix(c, "sheet1.xml") { return c } } return candidates[0] } func readSharedStrings(zr *zip.Reader) ([]string, error) { data, err := readZipFile(zr, "xl/sharedStrings.xml") if err != nil { return nil, err } type richRun struct { Text string `xml:"t"` } type si struct { Text string `xml:"t"` Runs []richRun `xml:"r"` } type sst struct { Items []si `xml:"si"` } var parsed sst if err := xml.Unmarshal(data, &parsed); err != nil { return nil, err } values := make([]string, 0, len(parsed.Items)) for _, item := range parsed.Items { if item.Text != "" { values = append(values, item.Text) continue } var b strings.Builder for _, run := range item.Runs { b.WriteString(run.Text) } values = append(values, b.String()) } return values, nil } func decodeXLSXCell(cellType, value, inlineText string, sharedStrings []string) string { switch cellType { case "s": idx, err := strconv.Atoi(strings.TrimSpace(value)) if err == nil && idx >= 0 && idx < len(sharedStrings) { return strings.TrimSpace(sharedStrings[idx]) } case "inlineStr": return strings.TrimSpace(inlineText) default: return strings.TrimSpace(value) } return strings.TrimSpace(value) } func excelRefColumn(ref string) int { if ref == "" { return -1 } var letters []rune for _, r := range ref { if r >= 'A' && r <= 'Z' { letters = append(letters, r) } else if r >= 'a' && r <= 'z' { letters = append(letters, r-'a'+'A') } else { break } } if len(letters) == 0 { return -1 } col := 0 for _, r := range letters { col = col*26 + int(r-'A'+1) } return col - 1 } func normalizeHeader(v string) string { return strings.ToLower(strings.TrimSpace(strings.ReplaceAll(v, "\u00a0", " "))) }