fix(V2): address bugbot review of CSV batch staging

TheLastCicada · TheLastCicada · commit 026afdfe41b6 · 2026-04-23T14:54:40.000-07:00
Fix two findings flagged by Cursor Bugbot on the CSV batch upload
consolidation path:

1. Keep the staging row's uuid column in sync with the entity PK when
   stageConsolidatedCsvRecord updates an existing pending row. The
   staging convention is uuid === entity PK, and downstream commit
   logic uses it as the datalayer changelist key; a stale uuid from a
   prior staging row (e.g. one written by a different code path) would
   produce the wrong changelist entry.

2. Collapse the per-CSV-row staging scans from three to one.
   buildPendingCsvMergeBase now issues a single query covering DELETE,
   INSERT, and UPDATE actions and buckets results client-side. The
   INSERT/UPDATE rows are passed through to stageConsolidatedCsvRecord
   via a new pendingRows option so it no longer re-scans the staging
   table. For N CSV rows with M pending staging records this drops the
   work from O(3*N*M) full scans + JSON parses to O(N*M).

Also add regression assertions to the existing "merge CSV updates into
an already-staged ... update" integration tests that verify the
staging row's uuid column equals the entity PK after consolidation,
so a silent revert of the uuid fix would fail CI.
diff --git a/src/models/v2/project-v2.model.js b/src/models/v2/project-v2.model.js
@@ -419,15 +419,21 @@ class ProjectV2 extends Model {
           let action;
           let mergedRecord;
 
+          let pendingRows = [];
           if (projectId) {
             const existing = await ProjectV2.findByPk(projectId);
+            const mergeResult = await buildPendingCsvMergeBase(
+              ProjectV2,
+              projectId,
+              existing,
+              { transaction },
+            );
             const {
               mergedBase,
               hasPendingDelete,
               hasMultiRecordPendingRow,
-            } = await buildPendingCsvMergeBase(ProjectV2, projectId, existing, {
-              transaction,
-            });
+            } = mergeResult;
+            pendingRows = mergeResult.pendingRows;
 
             if (hasPendingDelete) {
               errors.push({
@@ -499,6 +505,7 @@ class ProjectV2 extends Model {
             action,
             cleaned,
             transaction,
+            { pendingRows },
           );
           stagedCount++;
         } catch (err) {
diff --git a/src/models/v2/unit-v2.model.js b/src/models/v2/unit-v2.model.js
@@ -484,15 +484,21 @@ class UnitV2 extends Model {
           let action;
           let mergedRecord;
 
+          let pendingRows = [];
           if (unitId) {
             const existing = await UnitV2.findByPk(unitId);
+            const mergeResult = await buildPendingCsvMergeBase(
+              UnitV2,
+              unitId,
+              existing,
+              { transaction },
+            );
             const {
               mergedBase,
               hasPendingDelete,
               hasMultiRecordPendingRow,
-            } = await buildPendingCsvMergeBase(UnitV2, unitId, existing, {
-              transaction,
-            });
+            } = mergeResult;
+            pendingRows = mergeResult.pendingRows;
 
             if (hasPendingDelete) {
               errors.push({
@@ -572,6 +578,7 @@ class UnitV2 extends Model {
             action,
             cleaned,
             transaction,
+            { pendingRows },
           );
           stagedCount++;
         } catch (err) {
diff --git a/src/utils/v2-xls.js b/src/utils/v2-xls.js
@@ -383,27 +383,38 @@ export async function getPendingStagedRowsForPk(
  * rows on top of the committed DB row. This lets CSV batch uploads reconcile
  * with already-staged edits instead of clobbering them.
  *
+ * Performs a single staging table scan and buckets results by action client
+ * side, then returns the INSERT/UPDATE pending rows so the caller can hand
+ * them to stageConsolidatedCsvRecord without scanning the staging table a
+ * second time.
+ *
  * @param {import('sequelize').Model} modelClass
  * @param {string} pk
  * @param {Object|null} persistedRecord - Sequelize instance or null
  * @param {{ transaction?: import('sequelize').Transaction }} [options]
- * @returns {Promise<{ mergedBase: Object, pendingRows: Array, hasPendingDelete: boolean }>}
+ * @returns {Promise<{ mergedBase: Object, pendingRows: Array, hasPendingDelete: boolean, hasMultiRecordPendingRow: boolean }>}
  */
 export async function buildPendingCsvMergeBase(
   modelClass,
   pk,
   persistedRecord,
   { transaction } = {},
 ) {
-  const pendingDeleteRows = await getPendingStagedRowsForPk(modelClass, pk, {
-    transaction,
-    actions: ['DELETE'],
-  });
-  const pendingRows = await getPendingStagedRowsForPk(modelClass, pk, {
+  const allPendingRows = await getPendingStagedRowsForPk(modelClass, pk, {
     transaction,
-    actions: ['INSERT', 'UPDATE'],
+    actions: ['DELETE', 'INSERT', 'UPDATE'],
   });
 
+  const pendingRows = [];
+  let hasPendingDelete = false;
+  for (const entry of allPendingRows) {
+    if (entry.stagingRecord.action === 'DELETE') {
+      hasPendingDelete = true;
+    } else {
+      pendingRows.push(entry);
+    }
+  }
+
   let mergedBase = persistedRecord ? persistedRecord.toJSON() : {};
   const hasMultiRecordPendingRow = pendingRows.some(
     ({ recordCount }) => recordCount > 1,
@@ -418,7 +429,7 @@ export async function buildPendingCsvMergeBase(
   return {
     mergedBase,
     pendingRows,
-    hasPendingDelete: pendingDeleteRows.length > 0,
+    hasPendingDelete,
     hasMultiRecordPendingRow,
   };
 }
@@ -432,11 +443,16 @@ export async function buildPendingCsvMergeBase(
  * If any existing pending row is an INSERT, the consolidated row remains an
  * INSERT because the record has not been committed yet.
  *
+ * Callers that already have the pending INSERT/UPDATE rows in hand (e.g. from
+ * buildPendingCsvMergeBase during the same row of a CSV batch) may pass them
+ * via options.pendingRows to avoid re-scanning the staging table.
+ *
  * @param {import('sequelize').Model} modelClass
  * @param {string} pk
  * @param {'INSERT'|'UPDATE'} action
  * @param {Object} cleanedRecord - DB-field (snake_case) row
  * @param {import('sequelize').Transaction} transaction
+ * @param {{ pendingRows?: Array<{ stagingRecord: Object }> }} [options]
  * @returns {Promise<void>}
  */
 export async function stageConsolidatedCsvRecord(
@@ -445,26 +461,39 @@ export async function stageConsolidatedCsvRecord(
   action,
   cleanedRecord,
   transaction,
+  { pendingRows } = {},
 ) {
-  const pendingRows = await getPendingStagedRowsForPk(modelClass, pk, {
-    transaction,
-    actions: ['INSERT', 'UPDATE'],
-  });
+  // Callers pass an Array (possibly empty) when they already know the
+  // pending INSERT/UPDATE rows. Anything else — undefined/null — means
+  // "unknown, please scan". Guarding on Array.isArray avoids accidentally
+  // re-scanning when a caller explicitly passes `[]` (known-empty).
+  const resolvedPendingRows = Array.isArray(pendingRows)
+    ? pendingRows
+    : await getPendingStagedRowsForPk(modelClass, pk, {
+        transaction,
+        actions: ['INSERT', 'UPDATE'],
+      });
 
-  const effectiveAction = pendingRows.some(
+  const effectiveAction = resolvedPendingRows.some(
     ({ stagingRecord }) => stagingRecord.action === 'INSERT',
   )
     ? 'INSERT'
     : action;
 
-  if (pendingRows.length > 0) {
-    const targetRow = pendingRows[pendingRows.length - 1].stagingRecord;
-    const duplicateIds = pendingRows
+  if (resolvedPendingRows.length > 0) {
+    const targetRow = resolvedPendingRows[resolvedPendingRows.length - 1].stagingRecord;
+    const duplicateIds = resolvedPendingRows
       .slice(0, -1)
       .map(({ stagingRecord }) => stagingRecord.id);
 
+    // Keep the staging row's uuid column in sync with the entity PK. The
+    // staging table convention is that uuid === entity primary key, and
+    // downstream commit logic uses it as the datalayer changelist key; a
+    // stale uuid from a prior staging row would produce the wrong changelist
+    // entry.
     await StagingV2.update(
       {
+        uuid: pk,
         action: effectiveAction,
         data: JSON.stringify([cleanedRecord]),
       },
diff --git a/tests/v2/integration/project-v2.spec.js b/tests/v2/integration/project-v2.spec.js
@@ -1831,6 +1831,11 @@ ${project.cadTrustProjectId},CSV Updated Name`;
         expect(merged.project_name).to.equal('CSV Updated Name');
         expect(merged.project_description).to.equal('Already staged description');
         expect(merged.project_registry_name).to.equal('Test Registry');
+        // Staging row's uuid column must equal the entity PK; downstream
+        // commit logic uses it as the datalayer changelist key. The pre-
+        // seeded row above had a deliberately-mismatched random uuid, so
+        // this asserts the consolidation code rewrote it to the PK.
+        expect(staged[0].uuid).to.equal(project.cadTrustProjectId);
       });
 
       it('should merge CSV updates into an already-staged project INSERT and keep it as INSERT', async function () {
diff --git a/tests/v2/integration/unit-v2.spec.js b/tests/v2/integration/unit-v2.spec.js
@@ -1778,6 +1778,11 @@ ${unit.cadTrustUnitId},75`;
         expect(merged.unit_count).to.equal('75');
         expect(merged.unit_link).to.equal('https://example.com/already-staged-unit');
         expect(merged.unit_serial_id).to.equal('STAGED-MERGE-UNIT');
+        // Staging row's uuid column must equal the entity PK; downstream
+        // commit logic uses it as the datalayer changelist key. The pre-
+        // seeded row above had a deliberately-mismatched random uuid, so
+        // this asserts the consolidation code rewrote it to the PK.
+        expect(staged[0].uuid).to.equal(unit.cadTrustUnitId);
       });
 
       it('should merge CSV updates into an already-staged unit INSERT and keep it as INSERT', async function () {