Files
PriceForge/migrations/025_dedup_vendor_seen_by_partnumber.sql

133 lines
4.4 KiB
SQL

-- Deduplicate seen registry by partnumber only and enforce unique partnumber key.
-- Note: MariaDB unique index on VARCHAR can treat trailing spaces as equal, so normalize first.
UPDATE qt_vendor_partnumber_seen
SET partnumber = RTRIM(partnumber)
WHERE partnumber <> RTRIM(partnumber);
CREATE TEMPORARY TABLE tmp_qt_seen_keep AS
SELECT
LOWER(TRIM(partnumber)) AS partnumber_key,
CAST(SUBSTRING_INDEX(
GROUP_CONCAT(
id
ORDER BY
(TRIM(COALESCE(vendor, '')) <> '') DESC,
(source_type = 'stock') DESC,
(is_ignored = 1) DESC,
last_seen_at DESC,
id DESC
SEPARATOR ','
),
',',
1
) AS UNSIGNED) AS keep_id,
MAX(source_type = 'stock') AS has_stock,
MAX(is_ignored) AS any_ignored,
MAX(last_seen_at) AS max_last_seen,
MAX(CASE WHEN is_ignored = 1 THEN ignored_at END) AS max_ignored_at,
SUBSTRING_INDEX(
GROUP_CONCAT(
CASE
WHEN is_ignored = 1 AND NULLIF(TRIM(ignored_by), '') IS NOT NULL THEN ignored_by
ELSE NULL
END
ORDER BY ignored_at DESC, updated_at DESC, id DESC
SEPARATOR '\n'
),
'\n',
1
) AS keep_ignored_by,
SUBSTRING_INDEX(
GROUP_CONCAT(
NULLIF(TRIM(description), '')
ORDER BY
(source_type = 'stock') DESC,
(TRIM(COALESCE(vendor, '')) <> '') DESC,
last_seen_at DESC,
id DESC
SEPARATOR '\n'
),
'\n',
1
) AS keep_description,
SUBSTRING_INDEX(
GROUP_CONCAT(
NULLIF(TRIM(vendor), '')
ORDER BY
(source_type = 'stock') DESC,
last_seen_at DESC,
id DESC
SEPARATOR '\n'
),
'\n',
1
) AS keep_vendor
FROM qt_vendor_partnumber_seen
WHERE TRIM(COALESCE(partnumber, '')) <> ''
GROUP BY LOWER(TRIM(partnumber));
UPDATE qt_vendor_partnumber_seen s
JOIN tmp_qt_seen_keep k ON s.id = k.keep_id
SET
s.source_type = CASE WHEN k.has_stock = 1 THEN 'stock' ELSE 'manual' END,
s.vendor = COALESCE(NULLIF(TRIM(s.vendor), ''), NULLIF(TRIM(k.keep_vendor), ''), ''),
s.partnumber = TRIM(s.partnumber),
s.description = COALESCE(NULLIF(TRIM(s.description), ''), NULLIF(TRIM(k.keep_description), ''), s.description),
s.last_seen_at = GREATEST(s.last_seen_at, k.max_last_seen),
s.is_ignored = k.any_ignored,
s.ignored_at = CASE
WHEN k.any_ignored = 1 THEN COALESCE(s.ignored_at, k.max_ignored_at)
ELSE NULL
END,
s.ignored_by = CASE
WHEN k.any_ignored = 1 THEN COALESCE(NULLIF(TRIM(s.ignored_by), ''), NULLIF(TRIM(k.keep_ignored_by), ''))
ELSE NULL
END;
DELETE s
FROM qt_vendor_partnumber_seen s
LEFT JOIN tmp_qt_seen_keep k ON LOWER(TRIM(s.partnumber)) = k.partnumber_key
WHERE k.keep_id IS NULL OR s.id <> k.keep_id;
DROP TEMPORARY TABLE IF EXISTS tmp_qt_seen_keep;
-- Safety pass: remove any remaining duplicates by normalized partnumber key.
DELETE s1
FROM qt_vendor_partnumber_seen s1
INNER JOIN qt_vendor_partnumber_seen s2
ON LOWER(TRIM(s1.partnumber)) = LOWER(TRIM(s2.partnumber))
AND s1.id < s2.id;
SET @seen_idx_old_exists := (
SELECT COUNT(*)
FROM information_schema.statistics
WHERE table_schema = DATABASE()
AND table_name = 'qt_vendor_partnumber_seen'
AND index_name = 'uq_qt_vendor_partnumber_seen_source_key'
);
SET @seen_idx_old_sql := IF(
@seen_idx_old_exists > 0,
'ALTER TABLE qt_vendor_partnumber_seen DROP INDEX uq_qt_vendor_partnumber_seen_source_key',
'SELECT 1'
);
PREPARE stmt_seen_idx_old FROM @seen_idx_old_sql;
EXECUTE stmt_seen_idx_old;
DEALLOCATE PREPARE stmt_seen_idx_old;
SET @seen_idx_new_exists := (
SELECT COUNT(*)
FROM information_schema.statistics
WHERE table_schema = DATABASE()
AND table_name = 'qt_vendor_partnumber_seen'
AND index_name = 'uq_qt_vendor_partnumber_seen_partnumber'
);
SET @seen_idx_new_sql := IF(
@seen_idx_new_exists > 0,
'SELECT 1',
'ALTER TABLE qt_vendor_partnumber_seen ADD UNIQUE INDEX uq_qt_vendor_partnumber_seen_partnumber (partnumber)'
);
PREPARE stmt_seen_idx_new FROM @seen_idx_new_sql;
EXECUTE stmt_seen_idx_new;
DEALLOCATE PREPARE stmt_seen_idx_new;