Async ingest, deferred history, batch delete, vendor normalization, CI identifiers
- history/worker: fix deadlock by moving stale job requeue out of claimNextJob
into dedicated staleJobRequeuer goroutine (runs every 2 min)
- history/service,tx_apply,cross_entity: add deferred=true mode — write events+
snapshots but skip projection updates; queue recompute after commit
- ingest/service: IngestHardwareDeferred uses deferred mode; CSV workers up to 8
(INGEST_CSV_WORKERS env); serial/prefetch lookups use normalize.SerialKey
- api/ingest: JSON /ingest/hardware now async (202 + job_id); new GET
/ingest/hardware/jobs/{id} endpoint; CSV already async
- history/admin_cancel: replace per-event softDelete loop with batchSoftDeleteEvents
using IN-clause chunks of 500 to prevent request timeout on large deletes
- normalize: new internal/normalize package with VendorKey, VendorDisplay,
VendorDisplayPtr, SerialKey, FirmwareKey
- ingest/parser_hardware: vendor fields use normalize.VendorDisplayPtr
- migrations/0021_ci_identifiers: change identifier columns to utf8mb4_unicode_ci
(case-insensitive) in parts, machines, observations, machine_firmware_states
- bible submodule: update to add identifier-normalization contract
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
104
internal/normalize/vendor.go
Normal file
104
internal/normalize/vendor.go
Normal file
@@ -0,0 +1,104 @@
|
||||
// Package normalize provides identifier normalization per the bible contract:
|
||||
// original values are stored as-is; comparisons use canonical keys.
|
||||
package normalize
|
||||
|
||||
import "strings"
|
||||
|
||||
// vendorAliases maps lowercase canonical key → normalized display name.
|
||||
// Add entries here as new vendor variants are discovered.
|
||||
var vendorAliases = map[string]string{
|
||||
"intel": "Intel",
|
||||
"intel corp": "Intel",
|
||||
"intel corporation": "Intel",
|
||||
|
||||
"emulex": "Emulex",
|
||||
"emulex corp": "Emulex",
|
||||
"emulex corporation": "Emulex",
|
||||
}
|
||||
|
||||
// corporateSuffixes are stripped when building a vendor key so that
|
||||
// "Intel Corp" and "Intel" resolve to the same canonical key.
|
||||
var corporateSuffixes = []string{
|
||||
" corporation",
|
||||
" corp",
|
||||
" incorporated",
|
||||
" inc",
|
||||
" limited",
|
||||
" ltd",
|
||||
" llc",
|
||||
" co.",
|
||||
" co",
|
||||
" gmbh",
|
||||
" s.a.",
|
||||
" ag",
|
||||
}
|
||||
|
||||
// VendorKey returns a lowercase canonical key for vendor deduplication.
|
||||
// Two vendor strings with the same key are considered the same vendor.
|
||||
//
|
||||
// VendorKey("Intel Corp") → "intel"
|
||||
// VendorKey("INTEL CORPORATION") → "intel"
|
||||
// VendorKey("intel") → "intel"
|
||||
func VendorKey(v string) string {
|
||||
key := strings.ToLower(strings.TrimSpace(v))
|
||||
// Check exact alias match first.
|
||||
if _, ok := vendorAliases[key]; ok {
|
||||
return key
|
||||
}
|
||||
// Strip known corporate suffixes.
|
||||
for _, suffix := range corporateSuffixes {
|
||||
if strings.HasSuffix(key, suffix) {
|
||||
key = strings.TrimSpace(key[:len(key)-len(suffix)])
|
||||
break
|
||||
}
|
||||
}
|
||||
return key
|
||||
}
|
||||
|
||||
// VendorDisplay returns the canonical display name for a vendor string.
|
||||
// If the vendor is in the alias map it is normalized to the canonical name;
|
||||
// otherwise the original trimmed value is returned unchanged (preserving case).
|
||||
//
|
||||
// VendorDisplay("intel corp") → "Intel"
|
||||
// VendorDisplay("Samsung") → "Samsung"
|
||||
func VendorDisplay(v string) string {
|
||||
if v == "" {
|
||||
return v
|
||||
}
|
||||
key := strings.ToLower(strings.TrimSpace(v))
|
||||
if display, ok := vendorAliases[key]; ok {
|
||||
return display
|
||||
}
|
||||
// Check key with suffixes stripped.
|
||||
stripped := VendorKey(v)
|
||||
if display, ok := vendorAliases[stripped]; ok {
|
||||
return display
|
||||
}
|
||||
return strings.TrimSpace(v)
|
||||
}
|
||||
|
||||
// VendorDisplayPtr is VendorDisplay for a pointer; returns nil unchanged.
|
||||
func VendorDisplayPtr(v *string) *string {
|
||||
if v == nil {
|
||||
return nil
|
||||
}
|
||||
out := VendorDisplay(*v)
|
||||
if out == "" {
|
||||
return nil
|
||||
}
|
||||
return &out
|
||||
}
|
||||
|
||||
// SerialKey returns a lowercase key for case-insensitive serial deduplication.
|
||||
//
|
||||
// SerialKey("SN-001-ABC") → "sn-001-abc"
|
||||
func SerialKey(v string) string {
|
||||
return strings.ToLower(strings.TrimSpace(v))
|
||||
}
|
||||
|
||||
// FirmwareKey returns a lowercase key for case-insensitive firmware comparison.
|
||||
//
|
||||
// FirmwareKey("v1.2.3-RC1") → "v1.2.3-rc1"
|
||||
func FirmwareKey(v string) string {
|
||||
return strings.ToLower(strings.TrimSpace(v))
|
||||
}
|
||||
Reference in New Issue
Block a user