@@ -13,6 +13,7 @@ import { seeders } from './seeders';
1313
1414import dotenv from 'dotenv' ;
1515import { installSqlitePragmas } from './sqlite-pragmas.js' ;
16+ import { matchingUpdatedAtAttr , isMirrorInSync } from './mirror-sync-gate.js' ;
1617dotenv . config ( { quiet : true } ) ;
1718
1819// possible values: local, test
@@ -428,132 +429,6 @@ export const initMirrorModel = (initFn) => {
428429 */
429430const BACKFILL_BATCH_SIZE = 1000 ;
430431
431- // Sequelize attribute names for the auto-managed updatedAt column. V1
432- // models use the default 'updatedAt'; V2 models declare
433- // `updatedAt: 'updated_at'` with `underscored: true`, which makes
434- // 'updated_at' the rawAttribute key. Probe both candidates so the same
435- // gate works against either convention without depending on Sequelize
436- // internals (_timestampAttributes is undocumented).
437- const UPDATED_AT_ATTR_CANDIDATES = [ 'updatedAt' , 'updated_at' ] ;
438-
439- /**
440- * Find a Sequelize attribute name for the updatedAt column that is
441- * present on BOTH source and mirror models. Returns the attribute name,
442- * or null when no consistent name exists (either model is missing the
443- * column, or they disagree on naming).
444- *
445- * Disagreement is intentionally treated as "no gate" rather than picking
446- * one side: a mismatched name would force one side's MAX() call to
447- * reference a non-existent column and throw, which would mask a real
448- * drift behind a swallowed error.
449- */
450- const matchingUpdatedAtAttr = ( source , mirror ) => {
451- const sourceAttrs = source . rawAttributes || { } ;
452- const mirrorAttrs = mirror . rawAttributes || { } ;
453- for ( const attr of UPDATED_AT_ATTR_CANDIDATES ) {
454- if ( attr in sourceAttrs && attr in mirrorAttrs ) {
455- return attr ;
456- }
457- }
458- return null ;
459- } ;
460-
461- /**
462- * Cheap "is the mirror table already caught up?" check used by
463- * backfillMirror to short-circuit the bulk-upsert pass when there is
464- * nothing to do. The orphan sweep runs unconditionally BEFORE this
465- * helper so PK-swap drift (count and MAX preserved, row identities
466- * differ) is exposed as a post-sweep count mismatch and falls through
467- * to the upsert. See the call site for the full ordering rationale.
468- *
469- * Returns true ONLY when BOTH conditions hold:
470- * 1. count(source) === count(mirror)
471- * 2. Either both counts are 0 (truly empty on both sides), OR
472- * max(mirror[updatedAtAttr]) >= max(source[updatedAtAttr])
473- *
474- * Returns false in every other case (including any error), so the
475- * caller falls through to the existing full upsert. Crucially, false
476- * is the safe default: a false negative just causes the existing
477- * (correct) full sync to run, while a false positive would silently
478- * leave the mirror stale. We deliberately bias toward the
479- * cheap-but-fully-correct fall-through.
480- *
481- * Known limitation - non-max-row UPDATE drift: if a row is updated in
482- * source via raw SQL with an updatedAt that's strictly below the
483- * table's existing MAX(updatedAt), the gate can't see the change.
484- * Sequelize-driven UPDATEs auto-bump updatedAt to NOW() (necessarily
485- * greater than any prior MAX), so this only happens via raw SQL that
486- * bypasses the ORM or via clock-skew adjustments. No such code path
487- * exists in CADT today. If composite drift patterns become a concern,
488- * replace this with a SUM(UNIX_TIMESTAMP(updatedAt)) checksum or a
489- * per-table hash digest.
490- */
491- const isMirrorInSync = async ( source , mirror , name , updatedAtAttr ) => {
492- try {
493- const [ sourceCount , mirrorCount , sourceMax , mirrorMax ] = await Promise . all ( [
494- source . count ( ) ,
495- mirror . count ( ) ,
496- source . max ( updatedAtAttr ) ,
497- mirror . max ( updatedAtAttr ) ,
498- ] ) ;
499-
500- if ( sourceCount !== mirrorCount ) {
501- logger . debug (
502- `Mirror backfill: ${ name } - gate failed (count mismatch: source=${ sourceCount } mirror=${ mirrorCount } )` ,
503- ) ;
504- return false ;
505- }
506-
507- if ( sourceCount === 0 ) {
508- logger . debug (
509- `Mirror backfill: ${ name } - in sync, skipping (empty on both sides)` ,
510- ) ;
511- return true ;
512- }
513-
514- // Counts agree and are non-zero. A null MAX(updatedAt) at this
515- // point means rows exist with null timestamps - we can't compare
516- // freshness, so fall through to the full sync rather than skip.
517- if ( sourceMax == null || mirrorMax == null ) {
518- logger . debug (
519- `Mirror backfill: ${ name } - gate failed (max(updatedAt) null with ${ sourceCount } rows)` ,
520- ) ;
521- return false ;
522- }
523-
524- // Sequelize.max returns either a Date (for DATE columns) or whatever
525- // raw value the dialect returned. Coerce through Date so SQLite string
526- // timestamps and MySQL Date instances compare consistently.
527- const sourceMs = new Date ( sourceMax ) . getTime ( ) ;
528- const mirrorMs = new Date ( mirrorMax ) . getTime ( ) ;
529- if ( Number . isNaN ( sourceMs ) || Number . isNaN ( mirrorMs ) ) {
530- logger . debug (
531- `Mirror backfill: ${ name } - gate failed (non-parseable max(updatedAt))` ,
532- ) ;
533- return false ;
534- }
535-
536- if ( mirrorMs >= sourceMs ) {
537- logger . debug (
538- `Mirror backfill: ${ name } - in sync, skipping (${ sourceCount } rows, max(updatedAt) mirror=${ mirrorMs } >= source=${ sourceMs } )` ,
539- ) ;
540- return true ;
541- }
542-
543- logger . debug (
544- `Mirror backfill: ${ name } - gate failed (mirror max(updatedAt)=${ mirrorMs } < source=${ sourceMs } )` ,
545- ) ;
546- return false ;
547- } catch ( error ) {
548- // Never block the existing sync path on a gate error - just fall
549- // through to the full upsert.
550- logger . debug (
551- `Mirror backfill: ${ name } - gate check failed (${ error . message } ), falling through to full sync` ,
552- ) ;
553- return false ;
554- }
555- } ;
556-
557432// NOTE: when this early-returns for composite PKs, the in-sync gate
558433// that runs after it in backfillMirror operates on raw (un-swept)
559434// state. mirror-model-init.spec.js currently enforces single-column
@@ -809,6 +684,7 @@ const runBackfillMirror = async () => {
809684 mirror ,
810685 name ,
811686 updatedAtAttr ,
687+ logger ,
812688 ) ;
813689 if ( inSync ) {
814690 totalGateSkipped += 1 ;
0 commit comments