ui: deduplicate files by name and SHA-256 hash before batch convert

On folder selection, filter out duplicate files before conversion:
- First pass: same basename → skip (same filename in different subdirs)
- Second pass: same SHA-256 hash → skip (identical content, different path)

Duplicates are excluded from the convert queue and shown as a warning
in the summary with reason (same name / same content).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-06 12:45:09 +03:00
parent f09344e288
commit 62d6ad6f66

View File

@@ -14,6 +14,7 @@ document.addEventListener('DOMContentLoaded', () => {
let sourceType = 'archive';
let convertFiles = [];
let isConvertRunning = false;
let convertDuplicates = [];
const CONVERT_MAX_FILES_PER_BATCH = 1000;
let supportedUploadExtensions = null;
let supportedConvertExtensions = null;
@@ -623,8 +624,16 @@ function initConvertMode() {
return;
}
folderInput.addEventListener('change', () => {
convertFiles = Array.from(folderInput.files || []).filter(file => file && file.name);
folderInput.addEventListener('change', async () => {
const raw = Array.from(folderInput.files || []).filter(file => file && file.name);
const summary = document.getElementById('convert-folder-summary');
if (summary) {
summary.textContent = 'Проверка дубликатов…';
summary.className = 'api-connect-status';
}
const { unique, duplicates } = await deduplicateConvertFiles(raw);
convertFiles = unique;
convertDuplicates = duplicates;
renderConvertSummary();
});
@@ -657,7 +666,14 @@ function renderConvertSummary() {
const batchCount = Math.ceil(supportedFiles.length / CONVERT_MAX_FILES_PER_BATCH);
const batchesText = batchCount > 1 ? ` Будет ${batchCount} прохода(ов) по ${CONVERT_MAX_FILES_PER_BATCH} файлов.` : '';
summary.innerHTML = `<strong>${supportedFiles.length}</strong> файлов готовы к конвертации.${previewText ? ` ${previewText}` : ''}${remaining > 0 ? ` и ещё ${remaining}` : ''}.${skippedText}${batchesText}`;
let dupText = '';
if (convertDuplicates.length > 0) {
const names = convertDuplicates.map(d => escapeHtml(d.name)).join(', ');
const reasons = convertDuplicates.map(d => d.reason === 'hash' ? 'одинаковое содержимое' : 'одинаковое имя');
const uniqueReasons = [...new Set(reasons)].join(', ');
dupText = ` <span style="color:#c0392b">⚠ Пропущено дубликатов: ${convertDuplicates.length} (${uniqueReasons}): ${names}.</span>`;
}
summary.innerHTML = `<strong>${supportedFiles.length}</strong> файлов готовы к конвертации.${previewText ? ` ${previewText}` : ''}${remaining > 0 ? ` и ещё ${remaining}` : ''}.${skippedText}${batchesText}${dupText}`;
summary.className = 'api-connect-status';
}
@@ -860,6 +876,40 @@ function parseConvertErrorPayload(bodyText) {
}
}
async function deduplicateConvertFiles(files) {
// First pass: deduplicate by basename
const seenNames = new Map(); // name -> index in unique
const unique = [];
const duplicates = [];
for (const file of files) {
if (seenNames.has(file.name)) {
duplicates.push({ name: file.webkitRelativePath || file.name, reason: 'name' });
} else {
seenNames.set(file.name, unique.length);
unique.push(file);
}
}
// Second pass: deduplicate by SHA-256 hash
const seenHashes = new Map(); // hash -> file.name
const hashUnique = [];
for (const file of unique) {
try {
const buf = await file.arrayBuffer();
const hashBuf = await crypto.subtle.digest('SHA-256', buf);
const hash = Array.from(new Uint8Array(hashBuf)).map(b => b.toString(16).padStart(2, '0')).join('');
if (seenHashes.has(hash)) {
duplicates.push({ name: file.webkitRelativePath || file.name, reason: 'hash' });
} else {
seenHashes.set(hash, file.name);
hashUnique.push(file);
}
} catch (_) {
hashUnique.push(file);
}
}
return { unique: hashUnique, duplicates };
}
function isSupportedConvertFileName(filename) {
const name = String(filename || '').trim().toLowerCase();
if (!name) {