perf(linter/plugins): use >> not >>> in binary search loops (#21129)

overlookmotel · overlookmotel · commit 43482c7bb13d · 2026-04-07T17:10:32.000Z
`&gt;&gt;` is cheaper than `&gt;&gt;&gt;` because `&gt;&gt;` produces a 32-bit _signed_ integer which is V8's native number type (SMI). `&gt;&gt;&gt;` produces a 32-bit _unsigned_ integer, which needs to be boxed and stored on the heap.

Source text in raw transfer is limited to 1 GiB, and therefore source offsets, number of lines, number of tokens, and number of comments after all less than `1 &lt;&lt; 30`. Therefore even the sum of 2 of them cannot reach `1 &lt;&lt; 31` (the maximum positive integer which can be stored as a positive signed 31-bit int. Therefore it's safe to use `&gt;&gt;` in these binary loops.
diff --git a/apps/oxlint/src-js/plugins/location.ts b/apps/oxlint/src-js/plugins/location.ts
@@ -163,11 +163,14 @@ export function getLineColumnFromOffset(offset: number): LineColumn {
   // This is also the 1-indexed line number of the line containing `offset`.
   // e.g. if `offset` is on the 3rd line, `low` = 3, and `lineStartIndices[2]` is that line's start.
   // `do...while` is safe because `lineStartIndices` always has at least one entry, so `low < high` at start of loop.
+  //
+  // Note: Source text is limited to 1 GiB max, so offsets cannot exceed 2^30.
+  // This makes it safe to use `>> 1` for division by 2 below (which is faster than `>>> 1`).
   let low = 0,
     high = lineStartIndices.length,
     mid: number;
   do {
-    mid = (low + high) >>> 1;
+    mid = (low + high) >> 1;
     if (offset < lineStartIndices[mid]) {
       high = mid;
     } else {
@@ -350,11 +353,14 @@ export function computeLoc(start: number, end: number): Location {
   // This is also the 1-indexed line number of the line containing `start`.
   // e.g. if `start` is on the 3rd line, `line` = 3, and `lineStartIndices[2]` is that line's start.
   // `do...while` is safe because `lineStartIndices` always has at least one entry, so `line < high` at start of loop.
+  //
+  // Note: Source text is limited to 1 GiB max, so number of lines cannot exceed 2^30.
+  // This makes it safe to use `>> 1` for division by 2 below (which is faster than `>>> 1`).
   let line = 0,
     high = linesLen,
     mid: number;
   do {
-    mid = (line + high) >>> 1;
+    mid = (line + high) >> 1;
     if (start < lineStartIndices[mid]) {
       high = mid;
     } else {
@@ -387,7 +393,7 @@ export function computeLoc(start: number, end: number): Location {
     line++;
     high = linesLen;
     while (line < high) {
-      mid = (line + high) >>> 1;
+      mid = (line + high) >> 1;
       if (end < lineStartIndices[mid]) {
         high = mid;
       } else {
diff --git a/apps/oxlint/src-js/plugins/source_code.ts b/apps/oxlint/src-js/plugins/source_code.ts
@@ -64,6 +64,13 @@ export function initSourceText(): void {
     programPos = uint32[DATA_POINTER_POS_32];
   sourceStartPos = uint32[(programPos + SOURCE_START_OFFSET) >> 2];
   sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2];
+
+  // This will throw an error "Cannot create a string longer than 0x1fffffe8 characters"
+  // if `sourceByteLen > (2 ** 29 - 24)` (slightly less than 512 MiB).
+  // This is a useful invariant as it means source text offsets, number of lines, and number of tokens are limited
+  // in range so they're always valid SMIs.
+  // This makes it safe to use `>>` for division on these numbers without risking turning them into negative numbers.
+  // So we can use the cheaper `>>` operator instead of `>>>` in various places.
   sourceText = utf8Slice.call(buffer, sourceStartPos, sourceStartPos + sourceByteLen);
 }
 
diff --git a/apps/oxlint/src-js/plugins/tokens_methods.ts b/apps/oxlint/src-js/plugins/tokens_methods.ts
@@ -1284,9 +1284,12 @@ export function getTokenByRangeStart<Options extends RangeOptions | null | undef
     len = tokensAndCommentsLen;
   }
 
-  // Binary search for token starting at the given index
+  // Binary search for token starting at the given index.
+  //
+  // Note: Source text is limited to 1 GiB max, so offsets cannot exceed 2^30.
+  // This makes it safe to use `>> 1` for division by 2 (which is faster than `>>> 1`).
   for (let lo = 0, hi = len; lo < hi; ) {
-    const mid = (lo + hi) >>> 1;
+    const mid = (lo + hi) >> 1;
     const tokenStart = uint32[mid << 2];
     if (tokenStart < offset) {
       lo = mid + 1;
@@ -1528,6 +1531,9 @@ function collectEntries(
  *
  * Returns `length` if all entries have `start` < `offset`.
  *
+ * Note: Source text is limited to 1 GiB max, so number of tokens cannot exceed 2^30.
+ * This makes it safe to use `>> 1` for division by 2 below (which is faster than `>>> 1`).
+ *
  * @param u32 - Uint32Array buffer (tokens, comments, or tokensAndComments)
  * @param offset - Source offset to search for
  * @param startIndex - Starting entry index for the search
@@ -1541,7 +1547,7 @@ export function firstTokenAtOrAfter(
   length: number,
 ): number {
   for (let endIndex = length; startIndex < endIndex; ) {
-    const mid = (startIndex + endIndex) >>> 1;
+    const mid = (startIndex + endIndex) >> 1;
     if (uint32[mid << 2] < offset) {
       startIndex = mid + 1;
     } else {
diff --git a/napi/parser/src/raw_transfer.rs b/napi/parser/src/raw_transfer.rs
@@ -32,11 +32,11 @@ use crate::{
 // This is advantageous for 2 reasons:
 //
 // 1. V8 stores small integers ("SMI"s) inline, rather than on heap, which is more performant.
-//    But when V8 pointer compression is enabled, 31 bits is the max integer considered an SMI.
-//    So using 32 bits for offsets would be a large perf hit when pointer compression is enabled.
+//    But 31 bits is the max positive integer considered an SMI.
+//
 // 2. JS bitwise operators work only on signed 32-bit integers, with 32nd bit as sign bit.
-//    So avoiding the 32nd bit being set enables using `>>` bitshift operator, which may be cheaper
-//    than `>>>`, without offsets being interpreted as negative.
+//    So avoiding the 32nd bit being set enables using `>>` bitshift operator,
+//    which is cheaper than `>>>`, and does not risk offsets being interpreted as negative.
 
 const BUMP_ALIGN: usize = 16;