perf(linter/plugins): transfer tokens via raw transfer (#19893)

overlookmotel · overlookmotel · commit 05ccf9f21e00 · 2026-03-01T21:47:15.000Z
Transfer tokens via raw transfer in Oxlint JS plugins.

Deserializer is written by hand. `Token` is not a normal struct, so deserializer cannot be generated.
diff --git a/apps/oxlint/src-js/plugins/source_code.ts b/apps/oxlint/src-js/plugins/source_code.ts
@@ -29,7 +29,7 @@ import type { BufferWithArrays, Comment, Node } from "./types.ts";
 import type { ScopeManager } from "./scope.ts";
 
 // Text decoder, for decoding source text from buffer
-export const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true });
+const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true });
 
 // Buffer containing AST. Set before linting a file by `setupSourceForFile`.
 export let buffer: BufferWithArrays | null = null;
@@ -245,10 +245,7 @@ export const SOURCE_CODE = Object.freeze({
   // This property is present in ESLint's `SourceCode`, but is undocumented
   get tokensAndComments(): (Token | Comment)[] {
     if (tokensAndComments === null) {
-      if (tokens === null) {
-        if (sourceText === null) initSourceText();
-        initTokens();
-      }
+      if (tokens === null) initTokens();
       initTokensAndComments();
     }
     debugAssertIsNonNull(tokensAndComments);
diff --git a/apps/oxlint/src-js/plugins/tokens.ts b/apps/oxlint/src-js/plugins/tokens.ts
@@ -2,8 +2,7 @@
  * `SourceCode` methods related to tokens.
  */
 
-import { ast, initAst } from "./source_code.ts";
-import { buffer, textDecoder } from "./source_code.ts";
+import { ast, buffer, initAst, initSourceText, sourceText } from "./source_code.ts";
 import { getNodeLoc } from "./location.ts";
 import { TOKENS_OFFSET_POS_32, TOKENS_LEN_POS_32 } from "../generated/constants.ts";
 import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts";
@@ -108,8 +107,8 @@ export interface PunctuatorToken extends BaseToken {
 export interface RegularExpressionToken extends BaseToken {
   type: "RegularExpression";
   regex: {
-    flags: string;
     pattern: string;
+    flags: string;
   };
 }
 
@@ -138,51 +137,133 @@ const TokenProto = Object.create(Object.prototype, {
   },
 });
 
-// Tokens for the current file parsed by TS-ESLint.
+// Tokens for the current file.
 // Created lazily only when needed.
 export let tokens: Token[] | null = null;
 let comments: Comment[] | null = null;
 export let tokensAndComments: TokenOrComment[] | null = null;
 
+let uint32: Uint32Array | null = null;
+
+// `ESTreeKind` discriminants (set by Rust side)
+const PRIVATE_IDENTIFIER_KIND = 2;
+const REGEXP_KIND = 8;
+
+// Indexed by `ESTreeKind` discriminant (matches `ESTreeKind` enum in `estree_kind.rs`)
+const TOKEN_TYPES: Token["type"][] = [
+  "Identifier",
+  "Keyword",
+  "PrivateIdentifier",
+  "Punctuator",
+  "Numeric",
+  "String",
+  "Boolean",
+  "Null",
+  "RegularExpression",
+  "Template",
+  "JSXText",
+  "JSXIdentifier",
+];
+
+// Details of Rust `Token` type
+const TOKEN_SIZE = 16;
+const KIND_FIELD_OFFSET = 8;
+const IS_ESCAPED_FIELD_OFFSET = 10;
+
 /**
- * Initialize TS-ESLint tokens for current file.
+ * Initialize tokens for current file.
  */
 export function initTokens() {
   debugAssert(tokens === null, "Tokens already initialized");
 
-  // Get tokens JSON from buffer, and deserialize it
-  debugAssertIsNonNull(buffer);
-
-  const { uint32 } = buffer;
-  const tokensJsonLen = uint32[TOKENS_LEN_POS_32];
-  if (tokensJsonLen === 0) {
-    tokens = [];
-    return;
-  }
+  // Deserialize tokens from buffer
+  if (sourceText === null) initSourceText();
+  debugAssertIsNonNull(sourceText);
 
-  const tokensJsonOffset = uint32[TOKENS_OFFSET_POS_32];
-  const tokensJson = textDecoder.decode(
-    buffer.subarray(tokensJsonOffset, tokensJsonOffset + tokensJsonLen),
-  );
-  tokens = JSON.parse(tokensJson) as Token[];
+  debugAssertIsNonNull(buffer);
+  uint32 = buffer.uint32;
 
-  // Add `range` property to each token, and set prototype of each to `TokenProto` which provides getter for `loc`
-  for (const token of tokens) {
-    const { start, end } = token;
-    debugAssert(
-      typeof start === "number" && typeof end === "number",
-      "Precomputed tokens should include `start` and `end`",
-    );
+  let pos = uint32[TOKENS_OFFSET_POS_32];
+  const len = uint32[TOKENS_LEN_POS_32];
+  const endPos = pos + len * TOKEN_SIZE;
 
-    token.range = [start, end];
-    // `TokenProto` provides getter for `loc`
-    Object.setPrototypeOf(token, TokenProto);
+  tokens = [];
+  while (pos < endPos) {
+    tokens.push(deserializeToken(pos));
+    pos += TOKEN_SIZE;
   }
 
+  uint32 = null;
+
   // Check `tokens` have valid ranges and are in ascending order
   debugCheckValidRanges(tokens, "token");
 }
 
+/**
+ * Deserialize a token from buffer at position `pos`.
+ * @param pos - Position in buffer containing Rust `Token` type
+ * @returns `Token` object
+ */
+function deserializeToken(pos: number): Token {
+  const pos32 = pos >> 2;
+  const start = uint32![pos32],
+    end = uint32![pos32 + 1];
+
+  let value = sourceText!.slice(start, end);
+
+  const kind = buffer![pos + KIND_FIELD_OFFSET];
+
+  if (kind === REGEXP_KIND) {
+    const patternEnd = value.lastIndexOf("/");
+    return {
+      // @ts-expect-error - TS doesn't understand `__proto__`
+      __proto__: TokenProto,
+      type: "RegularExpression",
+      value,
+      regex: {
+        pattern: value.slice(1, patternEnd),
+        flags: value.slice(patternEnd + 1),
+      },
+      start,
+      end,
+      range: [start, end],
+    };
+  }
+
+  // Strip leading `#` from private identifiers
+  if (kind === PRIVATE_IDENTIFIER_KIND) value = value.slice(1);
+
+  // Unescape identifiers, keywords, and private identifiers
+  if (kind <= PRIVATE_IDENTIFIER_KIND && buffer![pos + IS_ESCAPED_FIELD_OFFSET] === 1) {
+    value = unescapeIdentifier(value);
+  }
+
+  return {
+    // @ts-expect-error - TS doesn't understand `__proto__`
+    __proto__: TokenProto,
+    type: TOKEN_TYPES[kind],
+    value,
+    start,
+    end,
+    range: [start, end],
+  };
+}
+
+/**
+ * Unescape an identifier.
+ *
+ * We do this on JS side, because escaped identifiers are so extremely rare that this function
+ * is never called in practice anyway.
+ *
+ * @param {string} name - Identifier name to unescape
+ * @returns {string} - Unescaped identifier name
+ */
+function unescapeIdentifier(name: string): string {
+  return name.replace(/\\u(?:\{([0-9a-fA-F]+)\}|([0-9a-fA-F]{4}))/g, (_, hex1, hex2) =>
+    String.fromCodePoint(parseInt(hex1 ?? hex2, 16)),
+  );
+}
+
 /**
  * Check `tokens` have valid ranges and are in ascending order.
  *
@@ -332,7 +413,7 @@ function debugCheckTokensAndComments() {
 }
 
 /**
- * Discard TS-ESLint tokens to free memory.
+ * Discard tokens to free memory.
  */
 export function resetTokens() {
   tokens = null;
diff --git a/apps/oxlint/src-js/utils/globals.ts b/apps/oxlint/src-js/utils/globals.ts
@@ -32,3 +32,5 @@ export const { parse: JSONParse, stringify: JSONStringify } = JSON;
 export const { ownKeys: ReflectOwnKeys } = Reflect;
 
 export const { iterator: SymbolIterator } = Symbol;
+
+export const { fromCodePoint: StringFromCodePoint } = String;
diff --git a/apps/oxlint/src/js_plugins/parse.rs b/apps/oxlint/src/js_plugins/parse.rs
@@ -8,7 +8,7 @@ use napi_derive::napi;
 
 use oxc_allocator::Allocator;
 use oxc_ast_visit::utf8_to_utf16::Utf8ToUtf16;
-use oxc_estree_tokens::{ESTreeTokenOptionsJS, to_estree_tokens_json};
+use oxc_estree_tokens::{ESTreeTokenOptionsJS, update_tokens};
 use oxc_linter::RawTransferMetadata2 as RawTransferMetadata;
 use oxc_napi::get_source_type;
 use oxc_parser::{ParseOptions, Parser, ParserReturn, config::RuntimeParserConfig};
@@ -177,7 +177,7 @@ unsafe fn parse_raw_impl(
             })
             .with_config(RuntimeParserConfig::new(true))
             .parse();
-        let ParserReturn { program: parsed_program, errors, tokens, panicked, .. } = parser_ret;
+        let ParserReturn { program: parsed_program, errors, mut tokens, panicked, .. } = parser_ret;
         let program = allocator.alloc(parsed_program);
 
         let mut parsing_failed = panicked || (!errors.is_empty() && !ignore_non_fatal_errors);
@@ -214,21 +214,14 @@ unsafe fn parse_raw_impl(
                 Utf8ToUtf16::new(source_text)
             };
 
-            let tokens_json = to_estree_tokens_json(
-                &tokens,
-                program,
-                original_source_text,
-                &span_converter,
-                ESTreeTokenOptionsJS,
-            );
+            update_tokens(&mut tokens, program, &span_converter, ESTreeTokenOptionsJS);
 
             span_converter.convert_program(program);
             span_converter.convert_comments(&mut program.comments);
 
-            let tokens_json = allocator.alloc_str(&tokens_json);
-            let tokens_offset = tokens_json.as_ptr() as u32;
+            let tokens_offset = tokens.as_ptr() as u32;
             #[expect(clippy::cast_possible_truncation)]
-            let tokens_len = tokens_json.len() as u32;
+            let tokens_len = tokens.len() as u32;
 
             // Return offset of `Program` within buffer (bottom 32 bits of pointer)
             let program_offset = ptr::from_ref(program) as u32;
diff --git a/crates/oxc_linter/src/context/host.rs b/crates/oxc_linter/src/context/host.rs
@@ -109,11 +109,6 @@ impl<'a> ContextSubHost<'a> {
     pub fn framework_options(&self) -> FrameworkOptions {
         self.framework_options
     }
-
-    /// Parser tokens collected for this script block.
-    pub fn parser_tokens(&self) -> Option<&[Token]> {
-        self.parser_tokens.as_ref().map(|tokens| &tokens[..])
-    }
 }
 
 /// Stores shared information about a file being linted.
@@ -239,6 +234,16 @@ impl<'a> ContextHost<'a> {
         &self.current_sub_host().disable_directives
     }
 
+    /// Shared reference to the parser tokens collected for this script block.
+    pub fn parser_tokens(&self) -> Option<&[Token]> {
+        self.current_sub_host().parser_tokens.as_ref().map(|tokens| &tokens[..])
+    }
+
+    /// Mutable reference to the parser tokens collected for this script block.
+    pub fn parser_tokens_mut(&mut self) -> Option<&mut ArenaVec<'a, Token>> {
+        self.current_sub_host_mut().parser_tokens.as_mut()
+    }
+
     /// Path to the file being linted.
     ///
     /// When created from a [`LintService`](`crate::service::LintService`), this
diff --git a/crates/oxc_linter/src/lib.rs b/crates/oxc_linter/src/lib.rs
@@ -14,13 +14,14 @@ use std::{
     string::ToString,
 };
 
-use oxc_allocator::{Allocator, AllocatorPool, CloneIn};
+use oxc_allocator::{Allocator, AllocatorPool, CloneIn, TakeIn};
 use oxc_ast::{ast::Program, ast_kind::AST_TYPE_MAX};
 use oxc_ast_macros::ast;
 use oxc_ast_visit::utf8_to_utf16::Utf8ToUtf16;
 use oxc_data_structures::box_macros::boxed_array;
 use oxc_diagnostics::OxcDiagnostic;
-use oxc_estree_tokens::{ESTreeTokenOptionsJS, to_estree_tokens_json};
+use oxc_estree_tokens::{ESTreeTokenOptionsJS, update_tokens};
+use oxc_parser::Token;
 use oxc_semantic::AstNode;
 use oxc_span::Span;
 
@@ -474,7 +475,18 @@ impl Linter {
         }
 
         // `allocator` is a fixed-size allocator, so no need to clone AST into a new one
-        self.convert_and_call_external_linter(external_rules, path, ctx_host, program, allocator);
+        let tokens = ctx_host
+            .parser_tokens_mut()
+            .map(|tokens| tokens.take_in(allocator).into_bump_slice_mut());
+
+        self.convert_and_call_external_linter(
+            external_rules,
+            path,
+            ctx_host,
+            program,
+            tokens,
+            allocator,
+        );
     }
 
     #[cfg(not(all(target_pointer_width = "64", target_endian = "little")))]
@@ -524,11 +536,15 @@ impl Linter {
             js_allocator.alloc(program)
         };
 
+        // Clone tokens into fixed-size allocator
+        let tokens = ctx_host.parser_tokens().map(|tokens| js_allocator.alloc_slice_copy(tokens));
+
         self.convert_and_call_external_linter(
             external_rules,
             path,
             ctx_host,
             program,
+            tokens,
             &js_allocator,
         );
 
@@ -546,6 +562,7 @@ impl Linter {
         path: &Path,
         ctx_host: &ContextHost<'_>,
         program: &mut Program<'_>,
+        tokens: Option<&mut [Token]>,
         allocator: &Allocator,
     ) {
         // If has BOM, remove it
@@ -569,23 +586,15 @@ impl Linter {
             Utf8ToUtf16::new(source_text)
         };
 
-        let (tokens_offset, tokens_len) =
-            if let Some(tokens) = ctx_host.current_sub_host().parser_tokens() {
-                let tokens_json = to_estree_tokens_json(
-                    tokens,
-                    program,
-                    original_source_text,
-                    &span_converter,
-                    ESTreeTokenOptionsJS,
-                );
-                let tokens_json = allocator.alloc_str(&tokens_json);
-                let tokens_offset = tokens_json.as_ptr() as u32;
-                #[expect(clippy::cast_possible_truncation)]
-                let tokens_len = tokens_json.len() as u32;
-                (tokens_offset, tokens_len)
-            } else {
-                (0, 0)
-            };
+        let (tokens_offset, tokens_len) = if let Some(tokens) = tokens {
+            update_tokens(tokens, program, &span_converter, ESTreeTokenOptionsJS);
+            let tokens_offset = tokens.as_ptr() as u32;
+            #[expect(clippy::cast_possible_truncation)]
+            let tokens_len = tokens.len() as u32;
+            (tokens_offset, tokens_len)
+        } else {
+            (0, 0)
+        };
 
         span_converter.convert_program(program);
         span_converter.convert_comments(&mut program.comments);
@@ -752,9 +761,9 @@ pub struct RawTransferMetadata2 {
     pub is_jsx: bool,
     /// `true` if source text has a BOM.
     pub has_bom: bool,
-    /// Offset of serialized ESTree tokens JSON within buffer.
+    /// Offset of lexer `Token`s within buffer.
     pub tokens_offset: u32,
-    /// UTF-8 byte length of serialized ESTree tokens JSON.
+    /// Number of lexer `Token`s.
     pub tokens_len: u32,
 }
 
diff --git a/napi/parser/src/raw_transfer_types.rs b/napi/parser/src/raw_transfer_types.rs
diff --git a/tasks/ast_tools/src/generators/raw_transfer.rs b/tasks/ast_tools/src/generators/raw_transfer.rs