Skip to content

Commit 81428f1

Browse files
committed
perf: defer FTS triggers, bulk-write audit, index audit on sync
Reduces sync write cost on large catch-ups, primarily for Pi-class hardware: - Add composite indexes on the `audit` table so the per-tick last- generation lookup is served by an index in both V1 (registryId, generation) + (orgUid, generation) and V2 (registry_id, generation) + (org_uid, generation). The V1 mostRecentOrgAuditRecord lookup in sync-registries.js now orders by generation DESC so the new composite index serves it (the old createdAt DESC ordering would not). - Replace per-row Audit.create with a single Audit.bulkCreate per generation in V1 and V2 sync. V1 grows a new Audit.bulkCreate static that threads mirrorTransaction through to the mirror bulkCreate call. Bulk inserts cap at batchSize 500 to stay under SQLite's SQLITE_MAX_VARIABLE_NUMBER limit on very large generations. - Defer projects_fts / units_fts (V1) and projects_v2_fts / units_v2_fts (V2) maintenance triggers while any subscribed org is mid-catch-up. New helpers in src/utils/fts5-deferral.js and src/utils/fts5-deferral-v2.js snapshot the currently-installed trigger DDL into `meta` via sqlite_master introspection, drop the triggers, and recreate them from the snapshot once all orgs are caught up. Snapshotting at defer time instead of hardcoding DDL removes a silent schema-drift class entirely. Persist the deferral state in `meta` so a crash mid-catch-up is recovered on the next boot. The post-loop restore decision runs in a `finally` block so a thrown sync error doesn't leave triggers permanently dropped, and boot recovery is non-fatal so a transient FTS rebuild failure can't brick startup. The deferral path is gated on NODE_ENV !== 'test' so the existing integration-test fixtures (which seed many subscribed-but-unsynced orgs alongside live FTS-dependent specs) keep their FTS triggers installed; the new sync-write-perf integration specs exercise the deferral helpers directly. New integration tests cover: - audit composite indexes + EXPLAIN-plan checks for both registry and org-uid lookups - per-generation Audit.bulkCreate row shape, including a real-DB test of the V1 mirror branch and isolation between source and mirror transactions - FTS5 trigger drop / recreate / rebuild helpers, the ensure/restore state machine, and the boot-recovery path
1 parent e88ac04 commit 81428f1

13 files changed

Lines changed: 2418 additions & 161 deletions

src/database/index.js

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,20 @@ const dbConfigKey = nodeEnv;
2121
// Safety check: In test mode, ensure we're using test database configuration
2222
if (nodeEnv === 'test') {
2323
const testConfig = config[dbConfigKey];
24-
if (!testConfig || !testConfig.storage || !testConfig.storage.includes('test')) {
24+
if (
25+
!testConfig ||
26+
!testConfig.storage ||
27+
!testConfig.storage.includes('test')
28+
) {
2529
const errorMsg = `SAFETY CHECK FAILED: Test mode detected but database config '${dbConfigKey}' does not appear to be a test database. Storage: ${testConfig?.storage || 'undefined'}. This prevents accidental production database access.`;
2630
console.error(errorMsg);
2731
throw new Error(errorMsg);
2832
}
2933
// Additional check: test database should be under tests/test-dbs/, not in home directory
30-
if (testConfig.storage.includes('~') || testConfig.storage.includes(os.homedir())) {
34+
if (
35+
testConfig.storage.includes('~') ||
36+
testConfig.storage.includes(os.homedir())
37+
) {
3138
const errorMsg = `SAFETY CHECK FAILED: Test database path appears to be in home directory: ${testConfig.storage}. Test databases must be under tests/test-dbs/.`;
3239
console.error(errorMsg);
3340
throw new Error(errorMsg);
@@ -206,9 +213,7 @@ export const prepareMysqlMirror = async () => {
206213
mirrorSetupSucceeded = true;
207214
return true;
208215
} catch (error) {
209-
logger.error(
210-
`Error setting up MySQL mirror database: ${error.message}`,
211-
);
216+
logger.error(`Error setting up MySQL mirror database: ${error.message}`);
212217
return false;
213218
}
214219
})().finally(() => {
@@ -318,10 +323,7 @@ export const safeMirrorDbHandler = (callback) => {
318323
// below. Skip quietly - the next authenticate success will
319324
// re-enter startReconnectBackfill and retry setup. Symmetric
320325
// with safeMirrorDbHandlerV2.
321-
if (
322-
isMysqlMirrorConfiguredForReconnect() &&
323-
!mirrorSetupSucceeded
324-
) {
326+
if (isMysqlMirrorConfiguredForReconnect() && !mirrorSetupSucceeded) {
325327
return;
326328
}
327329

@@ -523,13 +525,25 @@ export const backfillMirror = async () => {
523525
const mirrorPairs = [
524526
{ source: models.Project, mirror: ProjectMirror, name: 'project' },
525527
{ source: models.CoBenefit, mirror: CoBenefitMirror, name: 'co_benefit' },
526-
{ source: models.ProjectLocation, mirror: ProjectLocationMirror, name: 'location' },
528+
{
529+
source: models.ProjectLocation,
530+
mirror: ProjectLocationMirror,
531+
name: 'location',
532+
},
527533
{ source: models.Label, mirror: LabelMirror, name: 'label' },
528534
{ source: models.Rating, mirror: RatingMirror, name: 'rating' },
529-
{ source: models.RelatedProject, mirror: RelatedProjectMirror, name: 'related_project' },
535+
{
536+
source: models.RelatedProject,
537+
mirror: RelatedProjectMirror,
538+
name: 'related_project',
539+
},
530540
{ source: models.Unit, mirror: UnitMirror, name: 'unit' },
531541
{ source: models.Issuance, mirror: IssuanceMirror, name: 'issuance' },
532-
{ source: models.Estimation, mirror: EstimationMirror, name: 'estimation' },
542+
{
543+
source: models.Estimation,
544+
mirror: EstimationMirror,
545+
name: 'estimation',
546+
},
533547
{ source: models.LabelUnit, mirror: LabelUnitMirror, name: 'label_unit' },
534548
{ source: models.Audit, mirror: AuditMirror, name: 'audit' },
535549
];
@@ -539,8 +553,13 @@ export const backfillMirror = async () => {
539553

540554
for (const { source, mirror, name } of mirrorPairs) {
541555
try {
542-
if (!mirror.rawAttributes || Object.keys(mirror.rawAttributes).length === 0) {
543-
logger.warn(`Mirror backfill: ${name} - mirror model not initialized, skipping`);
556+
if (
557+
!mirror.rawAttributes ||
558+
Object.keys(mirror.rawAttributes).length === 0
559+
) {
560+
logger.warn(
561+
`Mirror backfill: ${name} - mirror model not initialized, skipping`,
562+
);
544563
continue;
545564
}
546565

@@ -581,7 +600,9 @@ export const backfillMirror = async () => {
581600
// eslint-disable-next-line no-constant-condition
582601
while (true) {
583602
const where =
584-
lastPk === null ? undefined : { [pk]: { [Sequelize.Op.gt]: lastPk } };
603+
lastPk === null
604+
? undefined
605+
: { [pk]: { [Sequelize.Op.gt]: lastPk } };
585606
// NOTE: Do NOT pass `raw: true` to findAll. With raw:true
586607
// Sequelize returns SQLite values as-stored (strings),
587608
// including DATE columns as "YYYY-MM-DD HH:mm:ss.SSS +00:00".
@@ -746,6 +767,31 @@ export const prepareDb = async () => {
746767

747768
await checkForMigrations(sequelize);
748769

770+
// FTS5 deferral crash-recovery (SQLite only). If the previous run
771+
// crashed mid-catch-up while the project/unit FTS triggers were
772+
// dropped, the deferral flag in `meta` is still set and the FTS tables
773+
// are stale. Restore the triggers + rebuild FTS content from
774+
// projects/units before any reads can observe stale data.
775+
//
776+
// Failure here is logged and swallowed: stale FTS reads are vastly
777+
// preferable to a dead app on boot, and the next sync tick re-tries the
778+
// restore via the same helper once all subscribed orgs are caught up.
779+
//
780+
// Lazy import so this file doesn't pull in src/models/index.js (which
781+
// imports from this file) at module load.
782+
if (sequelize.getDialect() === 'sqlite') {
783+
try {
784+
const { restoreV1FtsTriggersAndRebuildIfDeferred } =
785+
// eslint-disable-next-line no-restricted-syntax -- circular dep guard
786+
await import('../utils/fts5-deferral.js');
787+
await restoreV1FtsTriggersAndRebuildIfDeferred();
788+
} catch (error) {
789+
logger.error(
790+
`[v1]: FTS5 deferral recovery on boot failed; FTS reads may be stale until the next caught-up sync tick re-runs the restore: ${error?.message || error}`,
791+
);
792+
}
793+
}
794+
749795
// Run the mirror backfill after main migrations so all source and
750796
// mirror tables exist. This catches up rows that were inserted/
751797
// updated/deleted while the mirror was unavailable on a previous run.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
'use strict';
2+
3+
// Composite indexes on the V1 audit table to make the per-tick "find last
4+
// processed generation for this org" query usable by the SQLite planner.
5+
//
6+
// Hot-path query in src/tasks/sync-registries.js:
7+
//
8+
// Audit.findOne({
9+
// where: { registryId: organization.registryId },
10+
// order: [['generation', 'DESC']],
11+
// })
12+
//
13+
// The legacy schema has only the implicit `id` PK, so this query degrades to
14+
// a full table scan + sort once the audit table is large. The same pattern
15+
// applies to orgUid lookups elsewhere in the codebase. Indexes are
16+
// idempotent (IF NOT EXISTS via Sequelize's migration meta) so this is safe
17+
// to re-run.
18+
19+
const INDEXES = [
20+
{
21+
fields: ['registryId', 'generation'],
22+
name: 'audit_registry_id_generation',
23+
},
24+
{ fields: ['orgUid', 'generation'], name: 'audit_org_uid_generation' },
25+
];
26+
27+
const isDuplicateIndexError = (error) => {
28+
const message = error?.message || '';
29+
return /already exists/i.test(message) || /duplicate key name/i.test(message);
30+
};
31+
32+
export default {
33+
async up(queryInterface) {
34+
for (const { fields, name } of INDEXES) {
35+
try {
36+
await queryInterface.addIndex('audit', fields, { name });
37+
} catch (error) {
38+
// Idempotent: ignore "already exists" so re-running the migration
39+
// (e.g. after a partial run) doesn't fail.
40+
if (!isDuplicateIndexError(error)) {
41+
throw error;
42+
}
43+
}
44+
}
45+
},
46+
47+
async down(queryInterface) {
48+
for (const { name } of INDEXES) {
49+
try {
50+
await queryInterface.removeIndex('audit', name);
51+
} catch {
52+
// Best-effort drop on rollback.
53+
}
54+
}
55+
},
56+
};

src/database/migrations/index.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import OrgSyncStatus from './20231020201652-OrgSyncStatus';
3434
import OrgSyncRemaining from './20231020214357-OrgSyncRemainingCount';
3535
import AddGenerationIndexToAudit from './20231207142225-AddGenerationIndexToAudit';
3636
import AddDataModelVersionStoreToOrganizationTable from './20241211153456-add-data-model-version-store-to-organization-table.js';
37+
import AddAuditSyncIndexes from './20260301120000-add-audit-sync-indexes.js';
3738

3839
export const migrations = [
3940
{
@@ -184,4 +185,8 @@ export const migrations = [
184185
migration: AddDataModelVersionStoreToOrganizationTable,
185186
name: '20241211153456-add-data-model-version-store-to-organization-table',
186187
},
188+
{
189+
migration: AddAuditSyncIndexes,
190+
name: '20260301120000-add-audit-sync-indexes',
191+
},
187192
];

0 commit comments

Comments
 (0)