Async ingest, deferred history, batch delete, vendor normalization, CI identifiers

- history/worker: fix deadlock by moving stale job requeue out of claimNextJob
  into dedicated staleJobRequeuer goroutine (runs every 2 min)
- history/service,tx_apply,cross_entity: add deferred=true mode — write events+
  snapshots but skip projection updates; queue recompute after commit
- ingest/service: IngestHardwareDeferred uses deferred mode; CSV workers up to 8
  (INGEST_CSV_WORKERS env); serial/prefetch lookups use normalize.SerialKey
- api/ingest: JSON /ingest/hardware now async (202 + job_id); new GET
  /ingest/hardware/jobs/{id} endpoint; CSV already async
- history/admin_cancel: replace per-event softDelete loop with batchSoftDeleteEvents
  using IN-clause chunks of 500 to prevent request timeout on large deletes
- normalize: new internal/normalize package with VendorKey, VendorDisplay,
  VendorDisplayPtr, SerialKey, FirmwareKey
- ingest/parser_hardware: vendor fields use normalize.VendorDisplayPtr
- migrations/0021_ci_identifiers: change identifier columns to utf8mb4_unicode_ci
  (case-insensitive) in parts, machines, observations, machine_firmware_states
- bible submodule: update to add identifier-normalization contract

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-01 22:23:17 +03:00
parent abb3f10f3c
commit 4c284505a8
16 changed files with 780 additions and 150 deletions

View File

@@ -0,0 +1,22 @@
-- Revert to default collation (utf8mb4_0900_ai_ci on MySQL 8, utf8mb4_general_ci on older).
-- Adjust the target COLLATE if your server default differs.
ALTER TABLE parts
MODIFY vendor_serial VARCHAR(255) CHARACTER SET utf8mb4 NOT NULL,
MODIFY vendor VARCHAR(255) CHARACTER SET utf8mb4 NULL,
MODIFY model VARCHAR(255) CHARACTER SET utf8mb4 NULL,
MODIFY firmware_version VARCHAR(255) CHARACTER SET utf8mb4 NULL,
MODIFY component_type VARCHAR(128) CHARACTER SET utf8mb4 NULL;
ALTER TABLE machines
MODIFY vendor_serial VARCHAR(255) CHARACTER SET utf8mb4 NOT NULL,
MODIFY vendor VARCHAR(255) CHARACTER SET utf8mb4 NULL,
MODIFY model VARCHAR(255) CHARACTER SET utf8mb4 NULL,
MODIFY cpu_model VARCHAR(255) CHARACTER SET utf8mb4 NULL;
ALTER TABLE observations
MODIFY firmware_version VARCHAR(255) CHARACTER SET utf8mb4 NULL;
ALTER TABLE machine_firmware_states
MODIFY device_name VARCHAR(255) CHARACTER SET utf8mb4 NOT NULL,
MODIFY firmware_version VARCHAR(255) CHARACTER SET utf8mb4 NOT NULL;

View File

@@ -0,0 +1,34 @@
-- Migration 0021: case-insensitive identifiers
--
-- Per identifier-normalization contract: original values are stored as-is,
-- all comparisons (including UNIQUE constraints) are case-insensitive.
--
-- WARNING: if the database already contains rows that differ only in case
-- (e.g. "SN-001" and "sn-001") this migration will fail with a duplicate-key
-- error. Deduplicate the data first.
--
-- We use utf8mb4_unicode_ci which is case-insensitive and accent-insensitive.
-- ── parts ────────────────────────────────────────────────────────────────────
ALTER TABLE parts
MODIFY vendor_serial VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
MODIFY vendor VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
MODIFY model VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
MODIFY firmware_version VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
MODIFY component_type VARCHAR(128) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL;
-- ── machines ─────────────────────────────────────────────────────────────────
ALTER TABLE machines
MODIFY vendor_serial VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
MODIFY vendor VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
MODIFY model VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
MODIFY cpu_model VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL;
-- ── observations ─────────────────────────────────────────────────────────────
ALTER TABLE observations
MODIFY firmware_version VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL;
-- ── machine_firmware_states ───────────────────────────────────────────────────
ALTER TABLE machine_firmware_states
MODIFY device_name VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
MODIFY firmware_version VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL;