Skip to content

Commit 026afdf

Browse files
committed
fix(V2): address bugbot review of CSV batch staging
Fix two findings flagged by Cursor Bugbot on the CSV batch upload consolidation path: 1. Keep the staging row's uuid column in sync with the entity PK when stageConsolidatedCsvRecord updates an existing pending row. The staging convention is uuid === entity PK, and downstream commit logic uses it as the datalayer changelist key; a stale uuid from a prior staging row (e.g. one written by a different code path) would produce the wrong changelist entry. 2. Collapse the per-CSV-row staging scans from three to one. buildPendingCsvMergeBase now issues a single query covering DELETE, INSERT, and UPDATE actions and buckets results client-side. The INSERT/UPDATE rows are passed through to stageConsolidatedCsvRecord via a new pendingRows option so it no longer re-scans the staging table. For N CSV rows with M pending staging records this drops the work from O(3*N*M) full scans + JSON parses to O(N*M). Also add regression assertions to the existing "merge CSV updates into an already-staged ... update" integration tests that verify the staging row's uuid column equals the entity PK after consolidation, so a silent revert of the uuid fix would fail CI.
1 parent 446f268 commit 026afdf

5 files changed

Lines changed: 75 additions & 22 deletions

File tree

src/models/v2/project-v2.model.js

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -419,15 +419,21 @@ class ProjectV2 extends Model {
419419
let action;
420420
let mergedRecord;
421421

422+
let pendingRows = [];
422423
if (projectId) {
423424
const existing = await ProjectV2.findByPk(projectId);
425+
const mergeResult = await buildPendingCsvMergeBase(
426+
ProjectV2,
427+
projectId,
428+
existing,
429+
{ transaction },
430+
);
424431
const {
425432
mergedBase,
426433
hasPendingDelete,
427434
hasMultiRecordPendingRow,
428-
} = await buildPendingCsvMergeBase(ProjectV2, projectId, existing, {
429-
transaction,
430-
});
435+
} = mergeResult;
436+
pendingRows = mergeResult.pendingRows;
431437

432438
if (hasPendingDelete) {
433439
errors.push({
@@ -499,6 +505,7 @@ class ProjectV2 extends Model {
499505
action,
500506
cleaned,
501507
transaction,
508+
{ pendingRows },
502509
);
503510
stagedCount++;
504511
} catch (err) {

src/models/v2/unit-v2.model.js

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -484,15 +484,21 @@ class UnitV2 extends Model {
484484
let action;
485485
let mergedRecord;
486486

487+
let pendingRows = [];
487488
if (unitId) {
488489
const existing = await UnitV2.findByPk(unitId);
490+
const mergeResult = await buildPendingCsvMergeBase(
491+
UnitV2,
492+
unitId,
493+
existing,
494+
{ transaction },
495+
);
489496
const {
490497
mergedBase,
491498
hasPendingDelete,
492499
hasMultiRecordPendingRow,
493-
} = await buildPendingCsvMergeBase(UnitV2, unitId, existing, {
494-
transaction,
495-
});
500+
} = mergeResult;
501+
pendingRows = mergeResult.pendingRows;
496502

497503
if (hasPendingDelete) {
498504
errors.push({
@@ -572,6 +578,7 @@ class UnitV2 extends Model {
572578
action,
573579
cleaned,
574580
transaction,
581+
{ pendingRows },
575582
);
576583
stagedCount++;
577584
} catch (err) {

src/utils/v2-xls.js

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -383,27 +383,38 @@ export async function getPendingStagedRowsForPk(
383383
* rows on top of the committed DB row. This lets CSV batch uploads reconcile
384384
* with already-staged edits instead of clobbering them.
385385
*
386+
* Performs a single staging table scan and buckets results by action client
387+
* side, then returns the INSERT/UPDATE pending rows so the caller can hand
388+
* them to stageConsolidatedCsvRecord without scanning the staging table a
389+
* second time.
390+
*
386391
* @param {import('sequelize').Model} modelClass
387392
* @param {string} pk
388393
* @param {Object|null} persistedRecord - Sequelize instance or null
389394
* @param {{ transaction?: import('sequelize').Transaction }} [options]
390-
* @returns {Promise<{ mergedBase: Object, pendingRows: Array, hasPendingDelete: boolean }>}
395+
* @returns {Promise<{ mergedBase: Object, pendingRows: Array, hasPendingDelete: boolean, hasMultiRecordPendingRow: boolean }>}
391396
*/
392397
export async function buildPendingCsvMergeBase(
393398
modelClass,
394399
pk,
395400
persistedRecord,
396401
{ transaction } = {},
397402
) {
398-
const pendingDeleteRows = await getPendingStagedRowsForPk(modelClass, pk, {
399-
transaction,
400-
actions: ['DELETE'],
401-
});
402-
const pendingRows = await getPendingStagedRowsForPk(modelClass, pk, {
403+
const allPendingRows = await getPendingStagedRowsForPk(modelClass, pk, {
403404
transaction,
404-
actions: ['INSERT', 'UPDATE'],
405+
actions: ['DELETE', 'INSERT', 'UPDATE'],
405406
});
406407

408+
const pendingRows = [];
409+
let hasPendingDelete = false;
410+
for (const entry of allPendingRows) {
411+
if (entry.stagingRecord.action === 'DELETE') {
412+
hasPendingDelete = true;
413+
} else {
414+
pendingRows.push(entry);
415+
}
416+
}
417+
407418
let mergedBase = persistedRecord ? persistedRecord.toJSON() : {};
408419
const hasMultiRecordPendingRow = pendingRows.some(
409420
({ recordCount }) => recordCount > 1,
@@ -418,7 +429,7 @@ export async function buildPendingCsvMergeBase(
418429
return {
419430
mergedBase,
420431
pendingRows,
421-
hasPendingDelete: pendingDeleteRows.length > 0,
432+
hasPendingDelete,
422433
hasMultiRecordPendingRow,
423434
};
424435
}
@@ -432,11 +443,16 @@ export async function buildPendingCsvMergeBase(
432443
* If any existing pending row is an INSERT, the consolidated row remains an
433444
* INSERT because the record has not been committed yet.
434445
*
446+
* Callers that already have the pending INSERT/UPDATE rows in hand (e.g. from
447+
* buildPendingCsvMergeBase during the same row of a CSV batch) may pass them
448+
* via options.pendingRows to avoid re-scanning the staging table.
449+
*
435450
* @param {import('sequelize').Model} modelClass
436451
* @param {string} pk
437452
* @param {'INSERT'|'UPDATE'} action
438453
* @param {Object} cleanedRecord - DB-field (snake_case) row
439454
* @param {import('sequelize').Transaction} transaction
455+
* @param {{ pendingRows?: Array<{ stagingRecord: Object }> }} [options]
440456
* @returns {Promise<void>}
441457
*/
442458
export async function stageConsolidatedCsvRecord(
@@ -445,26 +461,39 @@ export async function stageConsolidatedCsvRecord(
445461
action,
446462
cleanedRecord,
447463
transaction,
464+
{ pendingRows } = {},
448465
) {
449-
const pendingRows = await getPendingStagedRowsForPk(modelClass, pk, {
450-
transaction,
451-
actions: ['INSERT', 'UPDATE'],
452-
});
466+
// Callers pass an Array (possibly empty) when they already know the
467+
// pending INSERT/UPDATE rows. Anything else — undefined/null — means
468+
// "unknown, please scan". Guarding on Array.isArray avoids accidentally
469+
// re-scanning when a caller explicitly passes `[]` (known-empty).
470+
const resolvedPendingRows = Array.isArray(pendingRows)
471+
? pendingRows
472+
: await getPendingStagedRowsForPk(modelClass, pk, {
473+
transaction,
474+
actions: ['INSERT', 'UPDATE'],
475+
});
453476

454-
const effectiveAction = pendingRows.some(
477+
const effectiveAction = resolvedPendingRows.some(
455478
({ stagingRecord }) => stagingRecord.action === 'INSERT',
456479
)
457480
? 'INSERT'
458481
: action;
459482

460-
if (pendingRows.length > 0) {
461-
const targetRow = pendingRows[pendingRows.length - 1].stagingRecord;
462-
const duplicateIds = pendingRows
483+
if (resolvedPendingRows.length > 0) {
484+
const targetRow = resolvedPendingRows[resolvedPendingRows.length - 1].stagingRecord;
485+
const duplicateIds = resolvedPendingRows
463486
.slice(0, -1)
464487
.map(({ stagingRecord }) => stagingRecord.id);
465488

489+
// Keep the staging row's uuid column in sync with the entity PK. The
490+
// staging table convention is that uuid === entity primary key, and
491+
// downstream commit logic uses it as the datalayer changelist key; a
492+
// stale uuid from a prior staging row would produce the wrong changelist
493+
// entry.
466494
await StagingV2.update(
467495
{
496+
uuid: pk,
468497
action: effectiveAction,
469498
data: JSON.stringify([cleanedRecord]),
470499
},

tests/v2/integration/project-v2.spec.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1831,6 +1831,11 @@ ${project.cadTrustProjectId},CSV Updated Name`;
18311831
expect(merged.project_name).to.equal('CSV Updated Name');
18321832
expect(merged.project_description).to.equal('Already staged description');
18331833
expect(merged.project_registry_name).to.equal('Test Registry');
1834+
// Staging row's uuid column must equal the entity PK; downstream
1835+
// commit logic uses it as the datalayer changelist key. The pre-
1836+
// seeded row above had a deliberately-mismatched random uuid, so
1837+
// this asserts the consolidation code rewrote it to the PK.
1838+
expect(staged[0].uuid).to.equal(project.cadTrustProjectId);
18341839
});
18351840

18361841
it('should merge CSV updates into an already-staged project INSERT and keep it as INSERT', async function () {

tests/v2/integration/unit-v2.spec.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,6 +1778,11 @@ ${unit.cadTrustUnitId},75`;
17781778
expect(merged.unit_count).to.equal('75');
17791779
expect(merged.unit_link).to.equal('https://example.com/already-staged-unit');
17801780
expect(merged.unit_serial_id).to.equal('STAGED-MERGE-UNIT');
1781+
// Staging row's uuid column must equal the entity PK; downstream
1782+
// commit logic uses it as the datalayer changelist key. The pre-
1783+
// seeded row above had a deliberately-mismatched random uuid, so
1784+
// this asserts the consolidation code rewrote it to the PK.
1785+
expect(staged[0].uuid).to.equal(unit.cadTrustUnitId);
17811786
});
17821787

17831788
it('should merge CSV updates into an already-staged unit INSERT and keep it as INSERT', async function () {

0 commit comments

Comments
 (0)