Async ingest, deferred history, batch delete, vendor normalization, CI identifiers

- history/worker: fix deadlock by moving stale job requeue out of claimNextJob
  into dedicated staleJobRequeuer goroutine (runs every 2 min)
- history/service,tx_apply,cross_entity: add deferred=true mode — write events+
  snapshots but skip projection updates; queue recompute after commit
- ingest/service: IngestHardwareDeferred uses deferred mode; CSV workers up to 8
  (INGEST_CSV_WORKERS env); serial/prefetch lookups use normalize.SerialKey
- api/ingest: JSON /ingest/hardware now async (202 + job_id); new GET
  /ingest/hardware/jobs/{id} endpoint; CSV already async
- history/admin_cancel: replace per-event softDelete loop with batchSoftDeleteEvents
  using IN-clause chunks of 500 to prevent request timeout on large deletes
- normalize: new internal/normalize package with VendorKey, VendorDisplay,
  VendorDisplayPtr, SerialKey, FirmwareKey
- ingest/parser_hardware: vendor fields use normalize.VendorDisplayPtr
- migrations/0021_ci_identifiers: change identifier columns to utf8mb4_unicode_ci
  (case-insensitive) in parts, machines, observations, machine_firmware_states
- bible submodule: update to add identifier-normalization contract

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-01 22:23:17 +03:00
parent abb3f10f3c
commit 4c284505a8
16 changed files with 780 additions and 150 deletions

View File

@@ -0,0 +1,104 @@
// Package normalize provides identifier normalization per the bible contract:
// original values are stored as-is; comparisons use canonical keys.
package normalize
import "strings"
// vendorAliases maps lowercase canonical key → normalized display name.
// Add entries here as new vendor variants are discovered.
var vendorAliases = map[string]string{
"intel": "Intel",
"intel corp": "Intel",
"intel corporation": "Intel",
"emulex": "Emulex",
"emulex corp": "Emulex",
"emulex corporation": "Emulex",
}
// corporateSuffixes are stripped when building a vendor key so that
// "Intel Corp" and "Intel" resolve to the same canonical key.
var corporateSuffixes = []string{
" corporation",
" corp",
" incorporated",
" inc",
" limited",
" ltd",
" llc",
" co.",
" co",
" gmbh",
" s.a.",
" ag",
}
// VendorKey returns a lowercase canonical key for vendor deduplication.
// Two vendor strings with the same key are considered the same vendor.
//
// VendorKey("Intel Corp") → "intel"
// VendorKey("INTEL CORPORATION") → "intel"
// VendorKey("intel") → "intel"
func VendorKey(v string) string {
key := strings.ToLower(strings.TrimSpace(v))
// Check exact alias match first.
if _, ok := vendorAliases[key]; ok {
return key
}
// Strip known corporate suffixes.
for _, suffix := range corporateSuffixes {
if strings.HasSuffix(key, suffix) {
key = strings.TrimSpace(key[:len(key)-len(suffix)])
break
}
}
return key
}
// VendorDisplay returns the canonical display name for a vendor string.
// If the vendor is in the alias map it is normalized to the canonical name;
// otherwise the original trimmed value is returned unchanged (preserving case).
//
// VendorDisplay("intel corp") → "Intel"
// VendorDisplay("Samsung") → "Samsung"
func VendorDisplay(v string) string {
if v == "" {
return v
}
key := strings.ToLower(strings.TrimSpace(v))
if display, ok := vendorAliases[key]; ok {
return display
}
// Check key with suffixes stripped.
stripped := VendorKey(v)
if display, ok := vendorAliases[stripped]; ok {
return display
}
return strings.TrimSpace(v)
}
// VendorDisplayPtr is VendorDisplay for a pointer; returns nil unchanged.
func VendorDisplayPtr(v *string) *string {
if v == nil {
return nil
}
out := VendorDisplay(*v)
if out == "" {
return nil
}
return &out
}
// SerialKey returns a lowercase key for case-insensitive serial deduplication.
//
// SerialKey("SN-001-ABC") → "sn-001-abc"
func SerialKey(v string) string {
return strings.ToLower(strings.TrimSpace(v))
}
// FirmwareKey returns a lowercase key for case-insensitive firmware comparison.
//
// FirmwareKey("v1.2.3-RC1") → "v1.2.3-rc1"
func FirmwareKey(v string) string {
return strings.ToLower(strings.TrimSpace(v))
}