ui: deduplicate files by name and SHA-256 hash before batch convert
On folder selection, filter out duplicate files before conversion: - First pass: same basename → skip (same filename in different subdirs) - Second pass: same SHA-256 hash → skip (identical content, different path) Duplicates are excluded from the convert queue and shown as a warning in the summary with reason (same name / same content). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
let sourceType = 'archive';
|
||||
let convertFiles = [];
|
||||
let isConvertRunning = false;
|
||||
let convertDuplicates = [];
|
||||
const CONVERT_MAX_FILES_PER_BATCH = 1000;
|
||||
let supportedUploadExtensions = null;
|
||||
let supportedConvertExtensions = null;
|
||||
@@ -623,8 +624,16 @@ function initConvertMode() {
|
||||
return;
|
||||
}
|
||||
|
||||
folderInput.addEventListener('change', () => {
|
||||
convertFiles = Array.from(folderInput.files || []).filter(file => file && file.name);
|
||||
folderInput.addEventListener('change', async () => {
|
||||
const raw = Array.from(folderInput.files || []).filter(file => file && file.name);
|
||||
const summary = document.getElementById('convert-folder-summary');
|
||||
if (summary) {
|
||||
summary.textContent = 'Проверка дубликатов…';
|
||||
summary.className = 'api-connect-status';
|
||||
}
|
||||
const { unique, duplicates } = await deduplicateConvertFiles(raw);
|
||||
convertFiles = unique;
|
||||
convertDuplicates = duplicates;
|
||||
renderConvertSummary();
|
||||
});
|
||||
|
||||
@@ -657,7 +666,14 @@ function renderConvertSummary() {
|
||||
const batchCount = Math.ceil(supportedFiles.length / CONVERT_MAX_FILES_PER_BATCH);
|
||||
const batchesText = batchCount > 1 ? ` Будет ${batchCount} прохода(ов) по ${CONVERT_MAX_FILES_PER_BATCH} файлов.` : '';
|
||||
|
||||
summary.innerHTML = `<strong>${supportedFiles.length}</strong> файлов готовы к конвертации.${previewText ? ` ${previewText}` : ''}${remaining > 0 ? ` и ещё ${remaining}` : ''}.${skippedText}${batchesText}`;
|
||||
let dupText = '';
|
||||
if (convertDuplicates.length > 0) {
|
||||
const names = convertDuplicates.map(d => escapeHtml(d.name)).join(', ');
|
||||
const reasons = convertDuplicates.map(d => d.reason === 'hash' ? 'одинаковое содержимое' : 'одинаковое имя');
|
||||
const uniqueReasons = [...new Set(reasons)].join(', ');
|
||||
dupText = ` <span style="color:#c0392b">⚠ Пропущено дубликатов: ${convertDuplicates.length} (${uniqueReasons}): ${names}.</span>`;
|
||||
}
|
||||
summary.innerHTML = `<strong>${supportedFiles.length}</strong> файлов готовы к конвертации.${previewText ? ` ${previewText}` : ''}${remaining > 0 ? ` и ещё ${remaining}` : ''}.${skippedText}${batchesText}${dupText}`;
|
||||
summary.className = 'api-connect-status';
|
||||
}
|
||||
|
||||
@@ -860,6 +876,40 @@ function parseConvertErrorPayload(bodyText) {
|
||||
}
|
||||
}
|
||||
|
||||
async function deduplicateConvertFiles(files) {
|
||||
// First pass: deduplicate by basename
|
||||
const seenNames = new Map(); // name -> index in unique
|
||||
const unique = [];
|
||||
const duplicates = [];
|
||||
for (const file of files) {
|
||||
if (seenNames.has(file.name)) {
|
||||
duplicates.push({ name: file.webkitRelativePath || file.name, reason: 'name' });
|
||||
} else {
|
||||
seenNames.set(file.name, unique.length);
|
||||
unique.push(file);
|
||||
}
|
||||
}
|
||||
// Second pass: deduplicate by SHA-256 hash
|
||||
const seenHashes = new Map(); // hash -> file.name
|
||||
const hashUnique = [];
|
||||
for (const file of unique) {
|
||||
try {
|
||||
const buf = await file.arrayBuffer();
|
||||
const hashBuf = await crypto.subtle.digest('SHA-256', buf);
|
||||
const hash = Array.from(new Uint8Array(hashBuf)).map(b => b.toString(16).padStart(2, '0')).join('');
|
||||
if (seenHashes.has(hash)) {
|
||||
duplicates.push({ name: file.webkitRelativePath || file.name, reason: 'hash' });
|
||||
} else {
|
||||
seenHashes.set(hash, file.name);
|
||||
hashUnique.push(file);
|
||||
}
|
||||
} catch (_) {
|
||||
hashUnique.push(file);
|
||||
}
|
||||
}
|
||||
return { unique: hashUnique, duplicates };
|
||||
}
|
||||
|
||||
function isSupportedConvertFileName(filename) {
|
||||
const name = String(filename || '').trim().toLowerCase();
|
||||
if (!name) {
|
||||
|
||||
Reference in New Issue
Block a user