Skip to content

Commit 5b35a53

Browse files
committed
perf(napi/parser): deserializing tokens use only int32 array (#21138)
Small perf optimization. When deserializing tokens in `oxc-parser`, use only the `Int32Array` view of the buffer, instead of both `Int32Array` and `Uint8Array`. Less values to keep in registers, and `int32` doesn't need to be obtained with a property lookup on each call.
1 parent f163d10 commit 5b35a53

1 file changed

Lines changed: 18 additions & 13 deletions

File tree

napi/parser/src-js/raw-transfer/eager.js

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,11 @@ const TOKEN_TYPES = [
162162
"JSXIdentifier",
163163
];
164164

165+
// Mask for active bits in `ESTreeKind` discriminants
166+
const TOKEN_KIND_MASK = 15;
167+
165168
// Details of Rust `Token` type
166169
const TOKEN_SIZE = 16;
167-
const KIND_FIELD_OFFSET = 8;
168-
const IS_ESCAPED_FIELD_OFFSET = 10;
169170

170171
/**
171172
* Deserialize tokens from buffer.
@@ -183,7 +184,7 @@ function deserializeTokens(buffer, sourceText, isJs) {
183184

184185
const tokens = [];
185186
while (pos < endPos) {
186-
tokens.push(deserializeToken(pos, buffer, sourceText, isJs));
187+
tokens.push(deserializeToken(pos, int32, sourceText, isJs));
187188
pos += TOKEN_SIZE;
188189
}
189190
return tokens;
@@ -192,21 +193,24 @@ function deserializeTokens(buffer, sourceText, isJs) {
192193
/**
193194
* Deserialize a token from buffer at position `pos`.
194195
* @param {number} pos - Position in buffer containing Rust `Token` type
195-
* @param {Uint8Array} buffer - Buffer containing AST in raw form
196+
* @param {Int32Array} int32 - Buffer containing AST in raw form as an `Int32Array`
196197
* @param {string} sourceText - Source for the file
197198
* @param {boolean} isJs - `true` if parsing in JS mode
198199
* @returns {Object} - Token object
199200
*/
200-
function deserializeToken(pos, buffer, sourceText, isJs) {
201-
const { int32 } = buffer;
202-
203-
const pos32 = pos >> 2;
204-
const start = int32[pos32],
205-
end = int32[pos32 + 1];
201+
function deserializeToken(pos, int32, sourceText, isJs) {
202+
const pos32 = pos >> 2,
203+
start = int32[pos32],
204+
end = int32[pos32 + 1],
205+
kindAndFlags = int32[pos32 + 2];
206206

207207
let value = sourceText.slice(start, end);
208208

209-
const kind = buffer[pos + KIND_FIELD_OFFSET];
209+
// `Kind` is byte at index 8 in `Token`.
210+
// `Kind` has 12 variants numbered from 0 to 11.
211+
// We have to mask the bottom byte (`& 0xFF`), so may as well mask off bits which can't be set in `Kind` at same time.
212+
// This may allow V8 to generate more efficient code for `TOKEN_TYPES[kind]`.
213+
const kind = kindAndFlags & TOKEN_KIND_MASK;
210214

211215
if (kind === REGEXP_KIND) {
212216
const patternEnd = value.lastIndexOf("/");
@@ -225,8 +229,9 @@ function deserializeToken(pos, buffer, sourceText, isJs) {
225229
// Strip leading `#` from private identifiers
226230
if (kind === PRIVATE_IDENTIFIER_KIND) value = value.slice(1);
227231

228-
// Unescape identifiers, keywords, and private identifiers in JS mode
229-
if (isJs && kind <= PRIVATE_IDENTIFIER_KIND && buffer[pos + IS_ESCAPED_FIELD_OFFSET] === 1) {
232+
// Unescape identifiers, keywords, and private identifiers in JS mode.
233+
// `is_escaped` flag is in byte 10 of `Token`, and is a `bool`.
234+
if (isJs && kind <= PRIVATE_IDENTIFIER_KIND && (kindAndFlags & 0x10000) !== 0) {
230235
value = unescapeIdentifier(value);
231236
}
232237

0 commit comments

Comments
 (0)