Skip to content

Commit 43482c7

Browse files
committed
perf(linter/plugins): use >> not >>> in binary search loops (#21129)
`>>` is cheaper than `>>>` because `>>` produces a 32-bit _signed_ integer which is V8's native number type (SMI). `>>>` produces a 32-bit _unsigned_ integer, which needs to be boxed and stored on the heap. Source text in raw transfer is limited to 1 GiB, and therefore source offsets, number of lines, number of tokens, and number of comments after all less than `1 << 30`. Therefore even the sum of 2 of them cannot reach `1 << 31` (the maximum positive integer which can be stored as a positive signed 31-bit int. Therefore it's safe to use `>>` in these binary loops.
1 parent 990b73a commit 43482c7

4 files changed

Lines changed: 29 additions & 10 deletions

File tree

apps/oxlint/src-js/plugins/location.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,14 @@ export function getLineColumnFromOffset(offset: number): LineColumn {
163163
// This is also the 1-indexed line number of the line containing `offset`.
164164
// e.g. if `offset` is on the 3rd line, `low` = 3, and `lineStartIndices[2]` is that line's start.
165165
// `do...while` is safe because `lineStartIndices` always has at least one entry, so `low < high` at start of loop.
166+
//
167+
// Note: Source text is limited to 1 GiB max, so offsets cannot exceed 2^30.
168+
// This makes it safe to use `>> 1` for division by 2 below (which is faster than `>>> 1`).
166169
let low = 0,
167170
high = lineStartIndices.length,
168171
mid: number;
169172
do {
170-
mid = (low + high) >>> 1;
173+
mid = (low + high) >> 1;
171174
if (offset < lineStartIndices[mid]) {
172175
high = mid;
173176
} else {
@@ -350,11 +353,14 @@ export function computeLoc(start: number, end: number): Location {
350353
// This is also the 1-indexed line number of the line containing `start`.
351354
// e.g. if `start` is on the 3rd line, `line` = 3, and `lineStartIndices[2]` is that line's start.
352355
// `do...while` is safe because `lineStartIndices` always has at least one entry, so `line < high` at start of loop.
356+
//
357+
// Note: Source text is limited to 1 GiB max, so number of lines cannot exceed 2^30.
358+
// This makes it safe to use `>> 1` for division by 2 below (which is faster than `>>> 1`).
353359
let line = 0,
354360
high = linesLen,
355361
mid: number;
356362
do {
357-
mid = (line + high) >>> 1;
363+
mid = (line + high) >> 1;
358364
if (start < lineStartIndices[mid]) {
359365
high = mid;
360366
} else {
@@ -387,7 +393,7 @@ export function computeLoc(start: number, end: number): Location {
387393
line++;
388394
high = linesLen;
389395
while (line < high) {
390-
mid = (line + high) >>> 1;
396+
mid = (line + high) >> 1;
391397
if (end < lineStartIndices[mid]) {
392398
high = mid;
393399
} else {

apps/oxlint/src-js/plugins/source_code.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ export function initSourceText(): void {
6464
programPos = uint32[DATA_POINTER_POS_32];
6565
sourceStartPos = uint32[(programPos + SOURCE_START_OFFSET) >> 2];
6666
sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2];
67+
68+
// This will throw an error "Cannot create a string longer than 0x1fffffe8 characters"
69+
// if `sourceByteLen > (2 ** 29 - 24)` (slightly less than 512 MiB).
70+
// This is a useful invariant as it means source text offsets, number of lines, and number of tokens are limited
71+
// in range so they're always valid SMIs.
72+
// This makes it safe to use `>>` for division on these numbers without risking turning them into negative numbers.
73+
// So we can use the cheaper `>>` operator instead of `>>>` in various places.
6774
sourceText = utf8Slice.call(buffer, sourceStartPos, sourceStartPos + sourceByteLen);
6875
}
6976

apps/oxlint/src-js/plugins/tokens_methods.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,9 +1284,12 @@ export function getTokenByRangeStart<Options extends RangeOptions | null | undef
12841284
len = tokensAndCommentsLen;
12851285
}
12861286

1287-
// Binary search for token starting at the given index
1287+
// Binary search for token starting at the given index.
1288+
//
1289+
// Note: Source text is limited to 1 GiB max, so offsets cannot exceed 2^30.
1290+
// This makes it safe to use `>> 1` for division by 2 (which is faster than `>>> 1`).
12881291
for (let lo = 0, hi = len; lo < hi; ) {
1289-
const mid = (lo + hi) >>> 1;
1292+
const mid = (lo + hi) >> 1;
12901293
const tokenStart = uint32[mid << 2];
12911294
if (tokenStart < offset) {
12921295
lo = mid + 1;
@@ -1528,6 +1531,9 @@ function collectEntries(
15281531
*
15291532
* Returns `length` if all entries have `start` < `offset`.
15301533
*
1534+
* Note: Source text is limited to 1 GiB max, so number of tokens cannot exceed 2^30.
1535+
* This makes it safe to use `>> 1` for division by 2 below (which is faster than `>>> 1`).
1536+
*
15311537
* @param u32 - Uint32Array buffer (tokens, comments, or tokensAndComments)
15321538
* @param offset - Source offset to search for
15331539
* @param startIndex - Starting entry index for the search
@@ -1541,7 +1547,7 @@ export function firstTokenAtOrAfter(
15411547
length: number,
15421548
): number {
15431549
for (let endIndex = length; startIndex < endIndex; ) {
1544-
const mid = (startIndex + endIndex) >>> 1;
1550+
const mid = (startIndex + endIndex) >> 1;
15451551
if (uint32[mid << 2] < offset) {
15461552
startIndex = mid + 1;
15471553
} else {

napi/parser/src/raw_transfer.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ use crate::{
3232
// This is advantageous for 2 reasons:
3333
//
3434
// 1. V8 stores small integers ("SMI"s) inline, rather than on heap, which is more performant.
35-
// But when V8 pointer compression is enabled, 31 bits is the max integer considered an SMI.
36-
// So using 32 bits for offsets would be a large perf hit when pointer compression is enabled.
35+
// But 31 bits is the max positive integer considered an SMI.
36+
//
3737
// 2. JS bitwise operators work only on signed 32-bit integers, with 32nd bit as sign bit.
38-
// So avoiding the 32nd bit being set enables using `>>` bitshift operator, which may be cheaper
39-
// than `>>>`, without offsets being interpreted as negative.
38+
// So avoiding the 32nd bit being set enables using `>>` bitshift operator,
39+
// which is cheaper than `>>>`, and does not risk offsets being interpreted as negative.
4040

4141
const BUMP_ALIGN: usize = 16;
4242

0 commit comments

Comments
 (0)