Skip to content

Commit 8db0e78

Browse files
committed
feat(linter/plugins): handle BOMs (#18376)
Closes #12526. Handle BOM on start of files in the same way that ESLint does - do not include it in the source text on JS side, but `context.sourceCode.hasBOM` evaluates to `true`. Method: * Alter `program.source_text` to trim off the BOM before passing AST to JS side. * Add a `has_bom` flag to `RawTransferMetadata`. * Add ability to add an offset in the conversion from UTF-8 to UTF-16 spans. The result is that the file as it's seen on JS side is as if the BOM didn't exist (except for the `hasBOM` flag). Spans are converted accordingly in JS-side AST, and converted back when passing diagnostics back to Rust.
1 parent 6ac09e2 commit 8db0e78

File tree

21 files changed

+309
-173
lines changed

21 files changed

+309
-173
lines changed

apps/oxlint/conformance/snapshot.md

Lines changed: 6 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
| Status | Count | % |
88
| ----------------- | ----- | ------ |
99
| Total rules | 292 | 100.0% |
10-
| Fully passing | 289 | 99.0% |
11-
| Partially passing | 3 | 1.0% |
10+
| Fully passing | 291 | 99.7% |
11+
| Partially passing | 1 | 0.3% |
1212
| Fully failing | 0 | 0.0% |
1313
| Load errors | 0 | 0.0% |
1414
| No tests run | 0 | 0.0% |
@@ -18,8 +18,8 @@
1818
| Status | Count | % |
1919
| ----------- | ----- | ------ |
2020
| Total tests | 33090 | 100.0% |
21-
| Passing | 32803 | 99.1% |
22-
| Failing | 5 | 0.0% |
21+
| Passing | 32807 | 99.1% |
22+
| Failing | 1 | 0.0% |
2323
| Skipped | 282 | 0.9% |
2424

2525
## Fully Passing Rules
@@ -156,6 +156,7 @@
156156
- `no-inner-declarations` (68 tests)
157157
- `no-invalid-regexp` (108 tests)
158158
- `no-invalid-this` (562 tests) (4 skipped)
159+
- `no-irregular-whitespace` (280 tests)
159160
- `no-iterator` (9 tests)
160161
- `no-label-var` (5 tests)
161162
- `no-labels` (29 tests)
@@ -306,6 +307,7 @@
306307
- `symbol-description` (8 tests)
307308
- `template-curly-spacing` (57 tests)
308309
- `template-tag-spacing` (63 tests)
310+
- `unicode-bom` (7 tests)
309311
- `use-isnan` (214 tests)
310312
- `valid-typeof` (54 tests)
311313
- `vars-on-top` (61 tests)
@@ -317,8 +319,6 @@
317319
## Rules with Failures
318320

319321
- `no-eval` - 100 / 101 (99.0%)
320-
- `no-irregular-whitespace` - 279 / 280 (99.6%)
321-
- `unicode-bom` - 4 / 7 (57.1%)
322322

323323
## Rules with Failures Detail
324324

@@ -359,146 +359,3 @@ AssertionError [ERR_ASSERTION]: Should have 1 error but had 0: []
359359
at runInvalidTestCase (apps/oxlint/dist/index.js)
360360
at apps/oxlint/dist/index.js
361361

362-
363-
### `no-irregular-whitespace`
364-
365-
Pass: 279 / 280 (99.6%)
366-
Fail: 1 / 280 (0.4%)
367-
Skip: 0 / 280 (0.0%)
368-
369-
#### no-irregular-whitespace > valid
370-
371-
```js
372-
console.log('hello BOM');
373-
```
374-
375-
```json
376-
{}
377-
```
378-
379-
AssertionError [ERR_ASSERTION]: Should have no errors but had 1: [
380-
{
381-
ruleId: 'rule-to-test/no-irregular-whitespace',
382-
message: 'Irregular whitespace not allowed.',
383-
messageId: 'noIrregularWhitespace',
384-
severity: 1,
385-
nodeType: null,
386-
line: 1,
387-
column: 0,
388-
endLine: 1,
389-
endColumn: 1,
390-
suggestions: null
391-
}
392-
]
393-
394-
1 !== 0
395-
396-
at assertErrorCountIsCorrect (apps/oxlint/dist/index.js)
397-
at assertValidTestCasePasses (apps/oxlint/dist/index.js)
398-
at runValidTestCase (apps/oxlint/dist/index.js)
399-
at apps/oxlint/dist/index.js
400-
401-
402-
### `unicode-bom`
403-
404-
Pass: 4 / 7 (57.1%)
405-
Fail: 3 / 7 (42.9%)
406-
Skip: 0 / 7 (0.0%)
407-
408-
#### unicode-bom > valid
409-
410-
```js
411-
 var a = 123;
412-
```
413-
414-
```json
415-
{
416-
"options": [
417-
"always"
418-
]
419-
}
420-
```
421-
422-
AssertionError [ERR_ASSERTION]: Should have no errors but had 1: [
423-
{
424-
ruleId: 'rule-to-test/unicode-bom',
425-
message: 'Expected Unicode BOM (Byte Order Mark).',
426-
messageId: 'expected',
427-
severity: 1,
428-
nodeType: null,
429-
line: 1,
430-
column: 0,
431-
endLine: 1,
432-
endColumn: 0,
433-
suggestions: null
434-
}
435-
]
436-
437-
1 !== 0
438-
439-
at assertErrorCountIsCorrect (apps/oxlint/dist/index.js)
440-
at assertValidTestCasePasses (apps/oxlint/dist/index.js)
441-
at runValidTestCase (apps/oxlint/dist/index.js)
442-
at apps/oxlint/dist/index.js
443-
444-
445-
#### unicode-bom > invalid
446-
447-
```js
448-
 var a = 123;
449-
```
450-
451-
```json
452-
{
453-
"output": " var a = 123;",
454-
"errors": [
455-
{
456-
"messageId": "unexpected",
457-
"line": 1,
458-
"column": 1
459-
}
460-
]
461-
}
462-
```
463-
464-
AssertionError [ERR_ASSERTION]: Should have 1 error but had 0: []
465-
466-
0 !== 1
467-
468-
at assertErrorCountIsCorrect (apps/oxlint/dist/index.js)
469-
at assertInvalidTestCasePasses (apps/oxlint/dist/index.js)
470-
at runInvalidTestCase (apps/oxlint/dist/index.js)
471-
at apps/oxlint/dist/index.js
472-
473-
474-
#### unicode-bom > invalid
475-
476-
```js
477-
 var a = 123;
478-
```
479-
480-
```json
481-
{
482-
"output": " var a = 123;",
483-
"options": [
484-
"never"
485-
],
486-
"errors": [
487-
{
488-
"messageId": "unexpected",
489-
"line": 1,
490-
"column": 1
491-
}
492-
]
493-
}
494-
```
495-
496-
AssertionError [ERR_ASSERTION]: Should have 1 error but had 0: []
497-
498-
0 !== 1
499-
500-
at assertErrorCountIsCorrect (apps/oxlint/dist/index.js)
501-
at assertInvalidTestCasePasses (apps/oxlint/dist/index.js)
502-
at runInvalidTestCase (apps/oxlint/dist/index.js)
503-
at apps/oxlint/dist/index.js
504-

apps/oxlint/src-js/generated/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export const BUFFER_ALIGN = 4294967296;
66
export const DATA_POINTER_POS_32 = 536870902;
77
export const IS_TS_FLAG_POS = 2147483612;
88
export const IS_JSX_FLAG_POS = 2147483613;
9+
export const HAS_BOM_FLAG_POS = 2147483614;
910
export const PROGRAM_OFFSET = 0;
1011
export const SOURCE_START_OFFSET = 8;
1112
export const SOURCE_LEN_OFFSET = 16;

apps/oxlint/src-js/plugins/lint.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { allOptions, DEFAULT_OPTIONS_ID } from "./options.ts";
55
import { diagnostics } from "./report.ts";
66
import { setSettingsForFile, resetSettings } from "./settings.ts";
77
import { ast, initAst, resetSourceAndAst, setupSourceForFile } from "./source_code.ts";
8+
import { HAS_BOM_FLAG_POS } from "../generated/constants.ts";
89
import { typeAssertIs, debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts";
910
import { getErrorMessage } from "../utils/utils.ts";
1011
import { setGlobalsForFile, resetGlobals } from "./globals.ts";
@@ -154,7 +155,7 @@ export function lintFileImpl(
154155
//
155156
// But... source text and AST can be accessed in body of `create` method, or `before` hook, via `context.sourceCode`.
156157
// So we pass the buffer to source code module here, so it can decode source text / deserialize AST on demand.
157-
const hasBOM = false; // TODO: Set this correctly
158+
const hasBOM = buffer[HAS_BOM_FLAG_POS] === 1;
158159
const parserServices = PARSER_SERVICES_DEFAULT; // TODO: Set this correctly
159160
setupSourceForFile(buffer, hasBOM, parserServices);
160161

apps/oxlint/src/js_plugins/parse.rs

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ unsafe fn parse_raw_impl(
151151

152152
// Parse source.
153153
// Enclose parsing logic in a scope to make 100% sure no references to within `Allocator` exist after this.
154-
let program_offset = {
154+
let (program_offset, has_bom) = {
155155
// SAFETY: We checked above that `source_len` does not exceed length of buffer
156156
let source_text = unsafe { buffer.get_unchecked(..source_len) };
157157
// SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8
@@ -179,22 +179,46 @@ unsafe fn parse_raw_impl(
179179

180180
if parsing_failed {
181181
// Use sentinel value for program offset to indicate that parsing failed
182-
PARSE_FAIL_SENTINEL
182+
(PARSE_FAIL_SENTINEL, false)
183183
} else {
184-
// Convert spans to UTF-16
185-
let span_converter = Utf8ToUtf16::new(source_text);
184+
// If has BOM, remove it
185+
const BOM: &str = "\u{feff}";
186+
const BOM_LEN: usize = BOM.len();
187+
188+
let mut source_text = program.source_text;
189+
let has_bom = source_text.starts_with(BOM);
190+
if has_bom {
191+
source_text = &source_text[BOM_LEN..];
192+
program.source_text = source_text;
193+
}
194+
195+
// Convert spans to UTF-16.
196+
// If source starts with BOM, create converter which ignores the BOM.
197+
let span_converter = if has_bom {
198+
#[expect(clippy::cast_possible_truncation)]
199+
Utf8ToUtf16::new_with_offset(source_text, BOM_LEN as u32)
200+
} else {
201+
Utf8ToUtf16::new(source_text)
202+
};
203+
186204
span_converter.convert_program(program);
187205
span_converter.convert_comments(&mut program.comments);
188206

189207
// Return offset of `Program` within buffer (bottom 32 bits of pointer)
190-
ptr::from_ref(program) as u32
208+
let program_offset = ptr::from_ref(program) as u32;
209+
210+
(program_offset, has_bom)
191211
}
192212
};
193213

194214
// Write metadata into end of buffer
195215
#[allow(clippy::cast_possible_truncation)]
196-
let metadata =
197-
RawTransferMetadata::new(program_offset, source_type.is_typescript(), source_type.is_jsx());
216+
let metadata = RawTransferMetadata::new(
217+
program_offset,
218+
source_type.is_typescript(),
219+
source_type.is_jsx(),
220+
has_bom,
221+
);
198222
const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE;
199223
const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN));
200224
// SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"categories": { "correctness": "off" },
3+
"jsPlugins": ["./plugin.ts"],
4+
"rules": {
5+
"bom-plugin/bom": "error"
6+
}
7+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
debugger;
2+
debugger;
3+
debugger;
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
debugger;
2+
// 😀🤪😆😎🤮
3+
debugger;
4+
debugger;
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
debugger;
2+
debugger;
3+
debugger;
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
debugger;
2+
// 😀🤪😆😎🤮
3+
debugger;
4+
debugger;

0 commit comments

Comments
 (0)