|
2 | 2 | * `SourceCode` methods related to tokens. |
3 | 3 | */ |
4 | 4 |
|
5 | | -import { ast, initAst } from "./source_code.ts"; |
6 | | -import { buffer, textDecoder } from "./source_code.ts"; |
| 5 | +import { ast, buffer, initAst, initSourceText, sourceText } from "./source_code.ts"; |
7 | 6 | import { getNodeLoc } from "./location.ts"; |
8 | 7 | import { TOKENS_OFFSET_POS_32, TOKENS_LEN_POS_32 } from "../generated/constants.ts"; |
9 | 8 | import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts"; |
@@ -108,8 +107,8 @@ export interface PunctuatorToken extends BaseToken { |
108 | 107 | export interface RegularExpressionToken extends BaseToken { |
109 | 108 | type: "RegularExpression"; |
110 | 109 | regex: { |
111 | | - flags: string; |
112 | 110 | pattern: string; |
| 111 | + flags: string; |
113 | 112 | }; |
114 | 113 | } |
115 | 114 |
|
@@ -138,51 +137,133 @@ const TokenProto = Object.create(Object.prototype, { |
138 | 137 | }, |
139 | 138 | }); |
140 | 139 |
|
141 | | -// Tokens for the current file parsed by TS-ESLint. |
| 140 | +// Tokens for the current file. |
142 | 141 | // Created lazily only when needed. |
143 | 142 | export let tokens: Token[] | null = null; |
144 | 143 | let comments: Comment[] | null = null; |
145 | 144 | export let tokensAndComments: TokenOrComment[] | null = null; |
146 | 145 |
|
| 146 | +let uint32: Uint32Array | null = null; |
| 147 | + |
| 148 | +// `ESTreeKind` discriminants (set by Rust side) |
| 149 | +const PRIVATE_IDENTIFIER_KIND = 2; |
| 150 | +const REGEXP_KIND = 8; |
| 151 | + |
| 152 | +// Indexed by `ESTreeKind` discriminant (matches `ESTreeKind` enum in `estree_kind.rs`) |
| 153 | +const TOKEN_TYPES: Token["type"][] = [ |
| 154 | + "Identifier", |
| 155 | + "Keyword", |
| 156 | + "PrivateIdentifier", |
| 157 | + "Punctuator", |
| 158 | + "Numeric", |
| 159 | + "String", |
| 160 | + "Boolean", |
| 161 | + "Null", |
| 162 | + "RegularExpression", |
| 163 | + "Template", |
| 164 | + "JSXText", |
| 165 | + "JSXIdentifier", |
| 166 | +]; |
| 167 | + |
| 168 | +// Details of Rust `Token` type |
| 169 | +const TOKEN_SIZE = 16; |
| 170 | +const KIND_FIELD_OFFSET = 8; |
| 171 | +const IS_ESCAPED_FIELD_OFFSET = 10; |
| 172 | + |
147 | 173 | /** |
148 | | - * Initialize TS-ESLint tokens for current file. |
| 174 | + * Initialize tokens for current file. |
149 | 175 | */ |
150 | 176 | export function initTokens() { |
151 | 177 | debugAssert(tokens === null, "Tokens already initialized"); |
152 | 178 |
|
153 | | - // Get tokens JSON from buffer, and deserialize it |
154 | | - debugAssertIsNonNull(buffer); |
155 | | - |
156 | | - const { uint32 } = buffer; |
157 | | - const tokensJsonLen = uint32[TOKENS_LEN_POS_32]; |
158 | | - if (tokensJsonLen === 0) { |
159 | | - tokens = []; |
160 | | - return; |
161 | | - } |
| 179 | + // Deserialize tokens from buffer |
| 180 | + if (sourceText === null) initSourceText(); |
| 181 | + debugAssertIsNonNull(sourceText); |
162 | 182 |
|
163 | | - const tokensJsonOffset = uint32[TOKENS_OFFSET_POS_32]; |
164 | | - const tokensJson = textDecoder.decode( |
165 | | - buffer.subarray(tokensJsonOffset, tokensJsonOffset + tokensJsonLen), |
166 | | - ); |
167 | | - tokens = JSON.parse(tokensJson) as Token[]; |
| 183 | + debugAssertIsNonNull(buffer); |
| 184 | + uint32 = buffer.uint32; |
168 | 185 |
|
169 | | - // Add `range` property to each token, and set prototype of each to `TokenProto` which provides getter for `loc` |
170 | | - for (const token of tokens) { |
171 | | - const { start, end } = token; |
172 | | - debugAssert( |
173 | | - typeof start === "number" && typeof end === "number", |
174 | | - "Precomputed tokens should include `start` and `end`", |
175 | | - ); |
| 186 | + let pos = uint32[TOKENS_OFFSET_POS_32]; |
| 187 | + const len = uint32[TOKENS_LEN_POS_32]; |
| 188 | + const endPos = pos + len * TOKEN_SIZE; |
176 | 189 |
|
177 | | - token.range = [start, end]; |
178 | | - // `TokenProto` provides getter for `loc` |
179 | | - Object.setPrototypeOf(token, TokenProto); |
| 190 | + tokens = []; |
| 191 | + while (pos < endPos) { |
| 192 | + tokens.push(deserializeToken(pos)); |
| 193 | + pos += TOKEN_SIZE; |
180 | 194 | } |
181 | 195 |
|
| 196 | + uint32 = null; |
| 197 | + |
182 | 198 | // Check `tokens` have valid ranges and are in ascending order |
183 | 199 | debugCheckValidRanges(tokens, "token"); |
184 | 200 | } |
185 | 201 |
|
| 202 | +/** |
| 203 | + * Deserialize a token from buffer at position `pos`. |
| 204 | + * @param pos - Position in buffer containing Rust `Token` type |
| 205 | + * @returns `Token` object |
| 206 | + */ |
| 207 | +function deserializeToken(pos: number): Token { |
| 208 | + const pos32 = pos >> 2; |
| 209 | + const start = uint32![pos32], |
| 210 | + end = uint32![pos32 + 1]; |
| 211 | + |
| 212 | + let value = sourceText!.slice(start, end); |
| 213 | + |
| 214 | + const kind = buffer![pos + KIND_FIELD_OFFSET]; |
| 215 | + |
| 216 | + if (kind === REGEXP_KIND) { |
| 217 | + const patternEnd = value.lastIndexOf("/"); |
| 218 | + return { |
| 219 | + // @ts-expect-error - TS doesn't understand `__proto__` |
| 220 | + __proto__: TokenProto, |
| 221 | + type: "RegularExpression", |
| 222 | + value, |
| 223 | + regex: { |
| 224 | + pattern: value.slice(1, patternEnd), |
| 225 | + flags: value.slice(patternEnd + 1), |
| 226 | + }, |
| 227 | + start, |
| 228 | + end, |
| 229 | + range: [start, end], |
| 230 | + }; |
| 231 | + } |
| 232 | + |
| 233 | + // Strip leading `#` from private identifiers |
| 234 | + if (kind === PRIVATE_IDENTIFIER_KIND) value = value.slice(1); |
| 235 | + |
| 236 | + // Unescape identifiers, keywords, and private identifiers |
| 237 | + if (kind <= PRIVATE_IDENTIFIER_KIND && buffer![pos + IS_ESCAPED_FIELD_OFFSET] === 1) { |
| 238 | + value = unescapeIdentifier(value); |
| 239 | + } |
| 240 | + |
| 241 | + return { |
| 242 | + // @ts-expect-error - TS doesn't understand `__proto__` |
| 243 | + __proto__: TokenProto, |
| 244 | + type: TOKEN_TYPES[kind], |
| 245 | + value, |
| 246 | + start, |
| 247 | + end, |
| 248 | + range: [start, end], |
| 249 | + }; |
| 250 | +} |
| 251 | + |
| 252 | +/** |
| 253 | + * Unescape an identifier. |
| 254 | + * |
| 255 | + * We do this on JS side, because escaped identifiers are so extremely rare that this function |
| 256 | + * is never called in practice anyway. |
| 257 | + * |
| 258 | + * @param {string} name - Identifier name to unescape |
| 259 | + * @returns {string} - Unescaped identifier name |
| 260 | + */ |
| 261 | +function unescapeIdentifier(name: string): string { |
| 262 | + return name.replace(/\\u(?:\{([0-9a-fA-F]+)\}|([0-9a-fA-F]{4}))/g, (_, hex1, hex2) => |
| 263 | + String.fromCodePoint(parseInt(hex1 ?? hex2, 16)), |
| 264 | + ); |
| 265 | +} |
| 266 | + |
186 | 267 | /** |
187 | 268 | * Check `tokens` have valid ranges and are in ascending order. |
188 | 269 | * |
@@ -332,7 +413,7 @@ function debugCheckTokensAndComments() { |
332 | 413 | } |
333 | 414 |
|
334 | 415 | /** |
335 | | - * Discard TS-ESLint tokens to free memory. |
| 416 | + * Discard tokens to free memory. |
336 | 417 | */ |
337 | 418 | export function resetTokens() { |
338 | 419 | tokens = null; |
|
0 commit comments