Skip to content

Commit 8550c8f

Browse files
committed
fix: persist v1/v2 org-creation progress incrementally and fix v2 helper endpoint
The V1 and V2 _createStoresInParallel functions only persisted store-id state via a single saveCreationState call after Promise.all resolved. A slow 4th store (e.g. blocked on per-store waitForNewStoreToBeConfirmed polling) hid the other 3 successful creations from GET /v{1,2}/organizations/creation-status, which made the live-api "stuck state" detector trigger at 5 minutes even though the server was actively making progress. Introduce createIncrementalStateWriter (mutex-protected, snapshot-then- persist) so each successful store creation immediately updates the persisted state. A successful on-chain createDataLayerStoreWithRetry followed by a transient saveCreationState failure is no longer treated as a creation failure: the storeId is retained in the per-store result and reconciled in a final save before _createStoresInParallel returns, so we never throw "Failed to create stores" for an actually-created on-chain store and we never leak it on retry. Two test-side fixes accompany the server change: 1. Bump stuckStateThresholdMs from 5 min to 10 min in V1/V2 waitForOrganizationReady. Realistic V1/V2 org creation can take 5+ min when a transient wallet desync triggers 30s retry backoffs and per-store blockchain-confirmation polling. With the new incremental persistence the threshold only fires when no store has progressed for the full window. 2. Fix waitForV2OrganizationReady to poll /v2/organizations/creation-status (returns {state, stores, inProgress, error}) instead of /v2/organizations/status (which is homeOrgSyncStatus, returning {ready, status, success}). The wrong endpoint silently disabled the FAILED-fast-fail and stuck-state detector entirely for V2. Adds focused unit tests for the writer's incremental save, concurrent Promise.all behavior, V1 Meta-model variant, and rollback on persist failure.
1 parent 9e7897c commit 8550c8f

5 files changed

Lines changed: 299 additions & 24 deletions

File tree

src/models/organizations/organizations.model.js

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ import {
5151
loadCreationState,
5252
clearCreationState,
5353
hasInProgressCreation,
54+
createIncrementalStateWriter,
5455
} from '../../utils/organization-creation-state.js';
5556
import { mirrorOrgStores } from '../../tasks/mirror-check.js';
5657
import { updateOrgLockStatus } from '../../utils/org-operation-lock.js';
@@ -497,18 +498,39 @@ class Organization extends Model {
497498
const maxRetries = 10;
498499
const retryDelayMs = 30000;
499500

500-
// Create all stores in parallel, each with independent retry logic
501+
// Persist each successful store creation immediately so the
502+
// /v1/organizations/creation-status endpoint reflects partial progress.
503+
// Without this, all 4 stores stay marked pending until the slowest
504+
// promise resolves, which makes the live-api "stuck state" detector
505+
// fire while the server is still actively making progress.
506+
const stateWriter = createIncrementalStateWriter(state, Meta);
507+
508+
// Create all stores in parallel, each with independent retry logic.
509+
// The persist call is wrapped in its own try so that a transient
510+
// saveCreationState failure (DB busy, sequelize hiccup) does NOT
511+
// mask a successful on-chain store creation as a creation failure;
512+
// any missing persist will be reconciled in a final save below.
501513
const createPromises = storesToCreate.map(async (storeType) => {
502514
for (let attempt = 1; attempt <= maxRetries; attempt++) {
503515
try {
504-
logState(state, `Creating ${storeType} store (attempt ${attempt}/${maxRetries})`);
516+
logState(stateWriter.getCurrent(), `Creating ${storeType} store (attempt ${attempt}/${maxRetries})`);
505517
const storeId = await datalayer.createDataLayerStoreWithRetry();
506-
logState(state, `Created ${storeType} store: ${storeId}`);
518+
logState(stateWriter.getCurrent(), `Created ${storeType} store: ${storeId}`);
519+
try {
520+
await stateWriter.persistStoreCreated(storeType, storeId);
521+
} catch (persistError) {
522+
logState(
523+
stateWriter.getCurrent(),
524+
`Created ${storeType} store ${storeId} but failed to persist incremental progress: ` +
525+
`${persistError.message}. Store id retained in result; final save will reconcile.`,
526+
'warn',
527+
);
528+
}
507529
return { storeType, storeId, success: true };
508530
} catch (error) {
509531
if (isTransientWalletError(error) && attempt < maxRetries) {
510532
logState(
511-
state,
533+
stateWriter.getCurrent(),
512534
`Transient error creating ${storeType} store ` +
513535
`(attempt ${attempt}/${maxRetries}): ${error.message}. ` +
514536
`Retrying in ${retryDelayMs / 1000}s...`,
@@ -517,7 +539,7 @@ class Organization extends Model {
517539
await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
518540
continue;
519541
}
520-
logState(state, `Failed to create ${storeType} store: ${error.message}`, 'error');
542+
logState(stateWriter.getCurrent(), `Failed to create ${storeType} store: ${error.message}`, 'error');
521543
return { storeType, storeId: null, success: false, error: error.message };
522544
}
523545
}
@@ -526,13 +548,21 @@ class Organization extends Model {
526548

527549
const results = await Promise.all(createPromises);
528550

529-
// Update state with created store IDs
551+
// Reconcile: pick up incremental updates that already landed, then
552+
// re-apply any successful storeIds whose persist failed mid-flight.
553+
// A single final saveCreationState makes sure the persisted view
554+
// matches in-memory state before returning.
555+
state = stateWriter.getCurrent();
556+
let reconcileNeeded = false;
530557
for (const result of results) {
531-
if (result.success) {
558+
if (result.success && state.stores[result.storeType].id !== result.storeId) {
532559
state = markStoreCreated(state, result.storeType, result.storeId);
560+
reconcileNeeded = true;
533561
}
534562
}
535-
await saveCreationState(state, Meta);
563+
if (reconcileNeeded) {
564+
await saveCreationState(state, Meta);
565+
}
536566

537567
// Check if all stores were created
538568
const failed = results.filter((r) => !r.success);

src/models/v2/organizations-v2.model.js

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ import {
7070
loadCreationState,
7171
clearCreationState,
7272
hasInProgressCreation,
73+
createIncrementalStateWriter,
7374
} from '../../utils/organization-creation-state.js';
7475

7576
import ModelTypes from './organizations-v2.modeltypes.js';
@@ -523,18 +524,39 @@ class OrganizationsV2 extends Model {
523524
const maxRetries = 10;
524525
const retryDelayMs = 30000;
525526

526-
// Create all stores in parallel, each with independent retry logic
527+
// Persist each successful store creation immediately so the
528+
// /v2/organizations/creation-status endpoint reflects partial progress.
529+
// Without this, all 4 stores stay marked pending until the slowest
530+
// promise resolves, which makes the live-api "stuck state" detector
531+
// fire while the server is still actively making progress.
532+
const stateWriter = createIncrementalStateWriter(state, MetaV2);
533+
534+
// Create all stores in parallel, each with independent retry logic.
535+
// The persist call is wrapped in its own try so that a transient
536+
// saveCreationState failure (DB busy, sequelize hiccup) does NOT
537+
// mask a successful on-chain store creation as a creation failure;
538+
// any missing persist will be reconciled in a final save below.
527539
const createPromises = storesToCreate.map(async (storeType) => {
528540
for (let attempt = 1; attempt <= maxRetries; attempt++) {
529541
try {
530-
logState(state, `Creating ${storeType} store (attempt ${attempt}/${maxRetries})`);
542+
logState(stateWriter.getCurrent(), `Creating ${storeType} store (attempt ${attempt}/${maxRetries})`);
531543
const storeId = await datalayer.createDataLayerStoreWithRetry();
532-
logState(state, `Created ${storeType} store: ${storeId}`);
544+
logState(stateWriter.getCurrent(), `Created ${storeType} store: ${storeId}`);
545+
try {
546+
await stateWriter.persistStoreCreated(storeType, storeId);
547+
} catch (persistError) {
548+
logState(
549+
stateWriter.getCurrent(),
550+
`Created ${storeType} store ${storeId} but failed to persist incremental progress: ` +
551+
`${persistError.message}. Store id retained in result; final save will reconcile.`,
552+
'warn',
553+
);
554+
}
533555
return { storeType, storeId, success: true };
534556
} catch (error) {
535557
if (isTransientWalletError(error) && attempt < maxRetries) {
536558
logState(
537-
state,
559+
stateWriter.getCurrent(),
538560
`Transient error creating ${storeType} store ` +
539561
`(attempt ${attempt}/${maxRetries}): ${error.message}. ` +
540562
`Retrying in ${retryDelayMs / 1000}s...`,
@@ -543,7 +565,7 @@ class OrganizationsV2 extends Model {
543565
await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
544566
continue;
545567
}
546-
logState(state, `Failed to create ${storeType} store: ${error.message}`, 'error');
568+
logState(stateWriter.getCurrent(), `Failed to create ${storeType} store: ${error.message}`, 'error');
547569
return { storeType, storeId: null, success: false, error: error.message };
548570
}
549571
}
@@ -552,13 +574,21 @@ class OrganizationsV2 extends Model {
552574

553575
const results = await Promise.all(createPromises);
554576

555-
// Update state with created store IDs
577+
// Reconcile: pick up incremental updates that already landed, then
578+
// re-apply any successful storeIds whose persist failed mid-flight.
579+
// A single final saveCreationState makes sure the persisted view
580+
// matches in-memory state before returning.
581+
state = stateWriter.getCurrent();
582+
let reconcileNeeded = false;
556583
for (const result of results) {
557-
if (result.success) {
584+
if (result.success && state.stores[result.storeType].id !== result.storeId) {
558585
state = markStoreCreated(state, result.storeType, result.storeId);
586+
reconcileNeeded = true;
559587
}
560588
}
561-
await saveCreationState(state, MetaV2);
589+
if (reconcileNeeded) {
590+
await saveCreationState(state, MetaV2);
591+
}
562592

563593
// Check if all stores were created
564594
const failed = results.filter((r) => !r.success);

src/utils/organization-creation-state.js

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* - FAILED: Organization creation failed after max retries
1515
*/
1616

17+
import { Mutex } from 'async-mutex';
18+
1719
import { logger, loggerV2 } from '../config/logger.js';
1820

1921
// State constants
@@ -478,3 +480,45 @@ export const hasInProgressCreation = async (MetaModel, apiVersion) => {
478480
}
479481
return true;
480482
};
483+
484+
/**
485+
* Creates a mutex-protected state writer that lets multiple concurrent
486+
* store-creation promises in `_createStoresInParallel` mark each
487+
* successful creation immediately and persist it to the Meta table
488+
* without clobbering each other's updates.
489+
*
490+
* Without this helper, `state` is a closure variable that each promise
491+
* would read-then-overwrite at the end of `Promise.all`, so the
492+
* /v{1,2}/organizations/creation-status endpoint cannot reflect partial
493+
* progress until the slowest store finishes. That made the live-api
494+
* "stuck state" detector fire in cases where 1-3 of the 4 stores were
495+
* actually completing on the server side.
496+
*
497+
* @param {Object} initialState - Starting state object
498+
* @param {Object} MetaModel - The Meta model to persist into (Meta or MetaV2)
499+
* @returns {{ persistStoreCreated: Function, getCurrent: Function }}
500+
*/
501+
export const createIncrementalStateWriter = (initialState, MetaModel) => {
502+
const stateRef = { current: initialState };
503+
const stateMutex = new Mutex();
504+
505+
const persistStoreCreated = async (storeType, storeId) => {
506+
const release = await stateMutex.acquire();
507+
try {
508+
// Compute the next state, persist it first, and only update the
509+
// shared reference if the write succeeds. If saveCreationState
510+
// throws, stateRef.current still matches what is on disk, which
511+
// keeps the in-memory and persisted views consistent for any
512+
// other reader that picks up the writer mid-flight.
513+
const next = markStoreCreated(stateRef.current, storeType, storeId);
514+
await saveCreationState(next, MetaModel);
515+
stateRef.current = next;
516+
} finally {
517+
release();
518+
}
519+
};
520+
521+
const getCurrent = () => stateRef.current;
522+
523+
return { persistStoreCreated, getCurrent };
524+
};

tests/v2/integration/organization-creation-status-v2.spec.js

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {
1919
markStoreConfirmed,
2020
markStoreDataWritten,
2121
STORE_TYPES,
22+
createIncrementalStateWriter,
2223
} from '../../../src/utils/organization-creation-state.js';
2324

2425
const { USE_SIMULATOR } = getConfig().APP;
@@ -178,6 +179,160 @@ describe('Organization Creation Status Tests', function () {
178179
});
179180
});
180181

182+
describe('createIncrementalStateWriter', function () {
183+
it('should persist each store creation immediately so partial progress is visible', async function () {
184+
// Simulates the production race: 4 parallel store-creation promises
185+
// resolve at different times, and the live-api stuck-state detector
186+
// polls the creation-status endpoint between them. With the writer,
187+
// each completion makes the next poll see new progress and reset
188+
// the detector's timer.
189+
const initialState = createInitialState('Incremental V2 Org', '', 'v2', 'v2');
190+
initialState.state = ORG_CREATION_STATES.STORES_CREATING;
191+
await saveCreationState(initialState, MetaV2);
192+
193+
const writer = createIncrementalStateWriter(initialState, MetaV2);
194+
195+
const storeIds = {
196+
[STORE_TYPES.ORG_UID]: 'orgUid-id',
197+
[STORE_TYPES.REGISTRY]: 'registry-id',
198+
[STORE_TYPES.DATA_MODEL_VERSION]: 'dataModelVersion-id',
199+
[STORE_TYPES.FILE_STORE]: 'fileStore-id',
200+
};
201+
const orderOfCompletion = [
202+
STORE_TYPES.REGISTRY,
203+
STORE_TYPES.DATA_MODEL_VERSION,
204+
STORE_TYPES.ORG_UID,
205+
STORE_TYPES.FILE_STORE,
206+
];
207+
208+
const persistedAfterEach = [];
209+
for (const storeType of orderOfCompletion) {
210+
await writer.persistStoreCreated(storeType, storeIds[storeType]);
211+
const persisted = await loadCreationState(MetaV2, 'v2');
212+
persistedAfterEach.push(persisted);
213+
}
214+
215+
// After 1st write the registry is set and the rest are still null.
216+
expect(persistedAfterEach[0].stores[STORE_TYPES.REGISTRY].id).to.equal('registry-id');
217+
expect(persistedAfterEach[0].stores[STORE_TYPES.ORG_UID].id).to.be.null;
218+
expect(persistedAfterEach[0].stores[STORE_TYPES.DATA_MODEL_VERSION].id).to.be.null;
219+
expect(persistedAfterEach[0].stores[STORE_TYPES.FILE_STORE].id).to.be.null;
220+
221+
// After 2nd write registry + dataModelVersion are set.
222+
expect(persistedAfterEach[1].stores[STORE_TYPES.REGISTRY].id).to.equal('registry-id');
223+
expect(persistedAfterEach[1].stores[STORE_TYPES.DATA_MODEL_VERSION].id).to.equal('dataModelVersion-id');
224+
expect(persistedAfterEach[1].stores[STORE_TYPES.ORG_UID].id).to.be.null;
225+
expect(persistedAfterEach[1].stores[STORE_TYPES.FILE_STORE].id).to.be.null;
226+
227+
// After 3rd write three of four are set.
228+
expect(persistedAfterEach[2].stores[STORE_TYPES.ORG_UID].id).to.equal('orgUid-id');
229+
expect(persistedAfterEach[2].stores[STORE_TYPES.FILE_STORE].id).to.be.null;
230+
231+
// Final state: all four set, in-memory and persisted match.
232+
const final = persistedAfterEach[3];
233+
expect(final.stores[STORE_TYPES.ORG_UID].id).to.equal('orgUid-id');
234+
expect(final.stores[STORE_TYPES.REGISTRY].id).to.equal('registry-id');
235+
expect(final.stores[STORE_TYPES.DATA_MODEL_VERSION].id).to.equal('dataModelVersion-id');
236+
expect(final.stores[STORE_TYPES.FILE_STORE].id).to.equal('fileStore-id');
237+
238+
const current = writer.getCurrent();
239+
expect(current.stores[STORE_TYPES.ORG_UID].id).to.equal('orgUid-id');
240+
expect(current.stores[STORE_TYPES.REGISTRY].id).to.equal('registry-id');
241+
expect(current.stores[STORE_TYPES.DATA_MODEL_VERSION].id).to.equal('dataModelVersion-id');
242+
expect(current.stores[STORE_TYPES.FILE_STORE].id).to.equal('fileStore-id');
243+
});
244+
245+
it('should preserve all updates when persistStoreCreated calls are awaited in Promise.all', async function () {
246+
// Sanity check that running the four writes in parallel via
247+
// Promise.all does not lose any of them and that both the
248+
// in-memory and persisted views agree at the end. This isn't a
249+
// strict regression guard against removing the mutex (single-
250+
// threaded JS plus serialised SQLite writes would still produce
251+
// the same final state in many cases), but it does protect
252+
// against any future change that reorders the snapshot/persist
253+
// steps such that an early failure would leave a fake storeId
254+
// in the in-memory state.
255+
const initialState = createInitialState('Concurrent V2 Org', '', 'v2', 'v2');
256+
initialState.state = ORG_CREATION_STATES.STORES_CREATING;
257+
await saveCreationState(initialState, MetaV2);
258+
259+
const writer = createIncrementalStateWriter(initialState, MetaV2);
260+
261+
await Promise.all([
262+
writer.persistStoreCreated(STORE_TYPES.ORG_UID, 'concurrent-org-uid'),
263+
writer.persistStoreCreated(STORE_TYPES.REGISTRY, 'concurrent-registry'),
264+
writer.persistStoreCreated(STORE_TYPES.DATA_MODEL_VERSION, 'concurrent-dmv'),
265+
writer.persistStoreCreated(STORE_TYPES.FILE_STORE, 'concurrent-filestore'),
266+
]);
267+
268+
const current = writer.getCurrent();
269+
expect(current.stores[STORE_TYPES.ORG_UID].id).to.equal('concurrent-org-uid');
270+
expect(current.stores[STORE_TYPES.REGISTRY].id).to.equal('concurrent-registry');
271+
expect(current.stores[STORE_TYPES.DATA_MODEL_VERSION].id).to.equal('concurrent-dmv');
272+
expect(current.stores[STORE_TYPES.FILE_STORE].id).to.equal('concurrent-filestore');
273+
274+
const persisted = await loadCreationState(MetaV2, 'v2');
275+
expect(persisted.stores[STORE_TYPES.ORG_UID].id).to.equal('concurrent-org-uid');
276+
expect(persisted.stores[STORE_TYPES.REGISTRY].id).to.equal('concurrent-registry');
277+
expect(persisted.stores[STORE_TYPES.DATA_MODEL_VERSION].id).to.equal('concurrent-dmv');
278+
expect(persisted.stores[STORE_TYPES.FILE_STORE].id).to.equal('concurrent-filestore');
279+
});
280+
281+
it('should NOT update stateRef.current when saveCreationState fails', async function () {
282+
// Ensures the writer keeps its in-memory state in sync with what
283+
// is on disk: if persistence throws, the storeId we just tried to
284+
// record is NOT silently kept in stateRef.current. This is what
285+
// lets a caller treat a failed persist as "store creation failed"
286+
// without leaking a half-applied update.
287+
const initialState = createInitialState('Failing Persist Org', '', 'v2', 'v2');
288+
initialState.state = ORG_CREATION_STATES.STORES_CREATING;
289+
await saveCreationState(initialState, MetaV2);
290+
291+
const writer = createIncrementalStateWriter(initialState, MetaV2);
292+
293+
const failingMeta = {
294+
findOne: async () => { throw new Error('simulated DB outage'); },
295+
update: async () => { throw new Error('simulated DB outage'); },
296+
create: async () => { throw new Error('simulated DB outage'); },
297+
};
298+
const failingWriter = createIncrementalStateWriter(initialState, failingMeta);
299+
300+
try {
301+
await failingWriter.persistStoreCreated(STORE_TYPES.ORG_UID, 'should-not-leak');
302+
expect.fail('persistStoreCreated should have thrown');
303+
} catch (e) {
304+
expect(e.message).to.include('simulated DB outage');
305+
}
306+
307+
// In-memory state must not contain the un-persisted storeId.
308+
expect(failingWriter.getCurrent().stores[STORE_TYPES.ORG_UID].id).to.be.null;
309+
310+
// Original successful writer is unaffected.
311+
await writer.persistStoreCreated(STORE_TYPES.ORG_UID, 'good-id');
312+
expect(writer.getCurrent().stores[STORE_TYPES.ORG_UID].id).to.equal('good-id');
313+
const persisted = await loadCreationState(MetaV2, 'v2');
314+
expect(persisted.stores[STORE_TYPES.ORG_UID].id).to.equal('good-id');
315+
});
316+
317+
it('should also work for V1 (Meta model)', async function () {
318+
const initialState = createInitialState('Incremental V1 Org', '', 'v1', 'v1');
319+
initialState.state = ORG_CREATION_STATES.STORES_CREATING;
320+
await saveCreationState(initialState, Meta);
321+
322+
const writer = createIncrementalStateWriter(initialState, Meta);
323+
324+
await writer.persistStoreCreated(STORE_TYPES.ORG_UID, 'v1-org-uid');
325+
let persisted = await loadCreationState(Meta, 'v1');
326+
expect(persisted.stores[STORE_TYPES.ORG_UID].id).to.equal('v1-org-uid');
327+
expect(persisted.stores[STORE_TYPES.REGISTRY].id).to.be.null;
328+
329+
await writer.persistStoreCreated(STORE_TYPES.REGISTRY, 'v1-registry');
330+
persisted = await loadCreationState(Meta, 'v1');
331+
expect(persisted.stores[STORE_TYPES.ORG_UID].id).to.equal('v1-org-uid');
332+
expect(persisted.stores[STORE_TYPES.REGISTRY].id).to.equal('v1-registry');
333+
});
334+
});
335+
181336
describe('V2 Creation Status API Endpoint', function () {
182337
it('should return no creation in progress when no state exists', async function () {
183338
const response = await supertest(app)

0 commit comments

Comments
 (0)