Skip to content

Commit a9bd8da

Browse files
authored
Emit trivia as sibling tokens and don't generate a RuleKind for it (#885)
Closes #737 Functionally it's done but needs a rebase and a final polish. For now I'm using the existing PG infra until we clean it up (#638, slated for this sprint as well).
1 parent 0125717 commit a9bd8da

483 files changed

Lines changed: 3295 additions & 3370 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.changeset/soft-ties-sort.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@nomicfoundation/slang": minor
3+
---
4+
5+
Flatten the trivia syntax nodes into sibling tokens

crates/codegen/grammar/src/constructor.rs

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,12 @@ impl GrammarConstructorDslV2 for Grammar {
4545

4646
let leading_trivia = Rc::new(NamedTriviaParser {
4747
name: "LeadingTrivia",
48-
def: resolve_trivia(lang.leading_trivia.clone(), &mut ctx),
48+
def: resolve_trivia(lang.leading_trivia.clone(), TriviaKind::Leading, &mut ctx),
4949
}) as Rc<dyn TriviaParserDefinition>;
5050

5151
let trailing_trivia = Rc::new(NamedTriviaParser {
5252
name: "TrailingTrivia",
53-
def: resolve_trivia(lang.trailing_trivia.clone(), &mut ctx),
53+
def: resolve_trivia(lang.trailing_trivia.clone(), TriviaKind::Trailing, &mut ctx),
5454
}) as Rc<dyn TriviaParserDefinition>;
5555

5656
for (_lex_ctx, item) in items.values() {
@@ -507,35 +507,43 @@ fn resolve_keyword_value(value: model::KeywordValue) -> KeywordScannerDefinition
507507
}
508508
}
509509

510-
fn resolve_trivia(parser: model::TriviaParser, ctx: &mut ResolveCtx<'_>) -> ParserDefinitionNode {
510+
fn resolve_trivia(
511+
parser: model::TriviaParser,
512+
kind: TriviaKind,
513+
ctx: &mut ResolveCtx<'_>,
514+
) -> ParserDefinitionNode {
511515
match parser {
512516
model::TriviaParser::Optional { parser } => {
513-
ParserDefinitionNode::Optional(Box::new(resolve_trivia(*parser, ctx)))
517+
ParserDefinitionNode::Optional(Box::new(resolve_trivia(*parser, kind, ctx)))
514518
}
515519
model::TriviaParser::OneOrMore { parser } => ParserDefinitionNode::OneOrMore(
516-
Labeled::anonymous(Box::new(resolve_trivia(*parser, ctx))),
520+
Labeled::anonymous(Box::new(resolve_trivia(*parser, kind, ctx))),
517521
),
518522
model::TriviaParser::ZeroOrMore { parser } => ParserDefinitionNode::ZeroOrMore(
519-
Labeled::anonymous(Box::new(resolve_trivia(*parser, ctx))),
523+
Labeled::anonymous(Box::new(resolve_trivia(*parser, kind, ctx))),
520524
),
521525
model::TriviaParser::Sequence { parsers } => ParserDefinitionNode::Sequence(
522526
parsers
523527
.into_iter()
524-
.map(|scanner| Labeled::anonymous(resolve_trivia(scanner, ctx)))
528+
.map(|scanner| Labeled::anonymous(resolve_trivia(scanner, kind, ctx)))
525529
.collect(),
526530
),
527531
model::TriviaParser::Choice { parsers } => {
528532
ParserDefinitionNode::Choice(Labeled::anonymous(
529533
parsers
530534
.into_iter()
531-
.map(|scanner| resolve_trivia(scanner, ctx))
535+
.map(|scanner| resolve_trivia(scanner, kind, ctx))
532536
.collect(),
533537
))
534538
}
535539
model::TriviaParser::Trivia { reference } => {
536540
match resolve_grammar_element(&reference, ctx) {
537541
GrammarElement::ScannerDefinition(parser) => {
538-
ParserDefinitionNode::ScannerDefinition(parser)
542+
// Hack: This is a sequence of a single scanner in order to emit the names
543+
ParserDefinitionNode::Sequence(vec![Labeled::with_builtin_label(
544+
kind.label(),
545+
ParserDefinitionNode::ScannerDefinition(parser),
546+
)])
539547
}
540548
_ => panic!("Expected {reference} to be a ScannerDefinition"),
541549
}
@@ -793,6 +801,21 @@ fn resolve_precedence(
793801
}
794802
}
795803

804+
#[derive(Clone, Copy)]
805+
enum TriviaKind {
806+
Leading,
807+
Trailing,
808+
}
809+
810+
impl TriviaKind {
811+
fn label(self) -> BuiltInLabel {
812+
match self {
813+
TriviaKind::Leading => BuiltInLabel::LeadingTrivia,
814+
TriviaKind::Trailing => BuiltInLabel::TrailingTrivia,
815+
}
816+
}
817+
}
818+
796819
trait IntoParserDefNode {
797820
fn into_parser_def_node(self) -> ParserDefinitionNode;
798821
}
@@ -861,6 +884,8 @@ enum BuiltInLabel {
861884
Operand,
862885
LeftOperand,
863886
RightOperand,
887+
LeadingTrivia,
888+
TrailingTrivia,
864889
}
865890

866891
impl<T> LabeledExt<T> for Labeled<T> {

crates/codegen/parser/generator/src/rust_generator.rs

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ pub struct RustGenerator {
3232

3333
rule_kinds: BTreeSet<&'static str>,
3434
token_kinds: BTreeSet<&'static str>,
35-
trivia_kinds: BTreeSet<&'static str>,
3635
trivia_scanner_names: BTreeSet<&'static str>,
3736
labels: BTreeSet<String>,
3837

@@ -41,6 +40,7 @@ pub struct RustGenerator {
4140
keyword_compound_scanners: BTreeMap<&'static str, String>, // (name of the KW scanner, code)
4241

4342
parser_functions: BTreeMap<&'static str, String>, // (name of parser, code)
43+
trivia_parser_functions: BTreeMap<&'static str, String>, // (name of parser, code)
4444

4545
#[serde(skip)]
4646
top_level_scanner_names: BTreeSet<&'static str>,
@@ -280,6 +280,8 @@ impl GrammarVisitor for RustGenerator {
280280
self.labels.remove("operand");
281281
self.labels.remove("left_operand");
282282
self.labels.remove("right_operand");
283+
self.labels.remove("leading_trivia");
284+
self.labels.remove("trailing_trivia");
283285

284286
// Just being anal about tidying up :)
285287
self.all_scanners.clear();
@@ -306,12 +308,9 @@ impl GrammarVisitor for RustGenerator {
306308

307309
fn trivia_parser_definition_enter(&mut self, parser: &TriviaParserDefinitionRef) {
308310
self.set_current_context(parser.context());
309-
self.rule_kinds.insert(parser.name());
310-
self.trivia_kinds.insert(parser.name());
311311
let trivia_scanners = {
312312
use codegen_grammar::Visitable as _;
313-
// TODO(#737): This will be cleaned up once we don't emit rule kinds for trivia parsers
314-
// Visit each node and only collect the scanner definition names:
313+
315314
#[derive(Default)]
316315
struct CollectTriviaScanners {
317316
scanner_names: BTreeSet<&'static str>,
@@ -328,15 +327,8 @@ impl GrammarVisitor for RustGenerator {
328327
};
329328
self.trivia_scanner_names.extend(trivia_scanners);
330329

331-
self.parser_functions.insert(
332-
parser.name(),
333-
{
334-
let code = parser.to_parser_code();
335-
let rule_kind = format_ident!("{}", parser.name());
336-
quote! { #code.with_kind(RuleKind::#rule_kind) }
337-
}
338-
.to_string(),
339-
);
330+
self.trivia_parser_functions
331+
.insert(parser.name(), parser.to_parser_code().to_string());
340332
}
341333

342334
fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) {

crates/codegen/parser/runtime/src/cst.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ impl Node {
160160

161161
pub fn is_trivia(&self) -> bool {
162162
match self {
163-
Self::Rule(rule) => rule.kind.is_trivia(),
163+
Self::Rule(_) => false,
164164
Self::Token(token) => token.kind.is_trivia(),
165165
}
166166
}

crates/codegen/parser/runtime/src/kinds.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,6 @@ pub enum RuleKind {
5353
Rule3,
5454
}
5555

56-
impl RuleKind {
57-
pub fn is_trivia(&self) -> bool {
58-
unreachable!("Expanded by the template")
59-
}
60-
}
61-
6256
#[derive(
6357
Debug,
6458
Eq,
@@ -82,6 +76,8 @@ pub enum NodeLabel {
8276
Operand,
8377
LeftOperand,
8478
RightOperand,
79+
LeadingTrivia,
80+
TrailingTrivia,
8581
// Used for testing this crate, this is generated in the client code
8682
Label1,
8783
Label2,

crates/codegen/parser/runtime/src/parser_support/parser_function.rs

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,41 +13,38 @@ pub trait ParserFunction<L>
1313
where
1414
Self: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
1515
{
16-
fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput;
16+
fn parse(&self, language: &L, input: &str) -> ParseOutput;
1717
}
1818

1919
impl<L, F> ParserFunction<L> for F
2020
where
2121
L: Lexer,
2222
F: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
2323
{
24-
fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput {
24+
fn parse(&self, language: &L, input: &str) -> ParseOutput {
2525
let mut stream = ParserContext::new(input);
2626
let mut result = self(language, &mut stream);
2727

2828
// For a succesful/recovered parse, collect any remaining trivia as part of the parse result
29-
// TODO(#737): Remove this once we unconditionally collect trivia
30-
if collect_trivia {
31-
if let ParserResult::Match(r#match) = &mut result {
32-
let [topmost] = r#match.nodes.as_mut_slice() else {
33-
unreachable!(
34-
"Match at the top level of a parse does not have exactly one Rule node"
35-
)
36-
};
37-
38-
let eof_trivia = match Lexer::leading_trivia(language, &mut stream) {
39-
ParserResult::Match(eof_trivia) if !eof_trivia.nodes.is_empty() => {
40-
Some(eof_trivia.nodes)
41-
}
42-
_ => None,
43-
};
29+
if let ParserResult::Match(r#match) = &mut result {
30+
let [topmost] = r#match.nodes.as_mut_slice() else {
31+
unreachable!(
32+
"Match at the top level of a parse does not have exactly one Rule node"
33+
)
34+
};
35+
36+
let eof_trivia = match Lexer::leading_trivia(language, &mut stream) {
37+
ParserResult::Match(eof_trivia) if !eof_trivia.nodes.is_empty() => {
38+
Some(eof_trivia.nodes)
39+
}
40+
_ => None,
41+
};
4442

45-
if let (cst::Node::Rule(rule), Some(eof_trivia)) = (&mut topmost.node, eof_trivia) {
46-
let mut new_children = rule.children.clone();
47-
new_children.extend(eof_trivia);
43+
if let (cst::Node::Rule(rule), Some(eof_trivia)) = (&mut topmost.node, eof_trivia) {
44+
let mut new_children = rule.children.clone();
45+
new_children.extend(eof_trivia);
4846

49-
topmost.node = cst::Node::rule(rule.kind, new_children);
50-
}
47+
topmost.node = cst::Node::rule(rule.kind, new_children);
5148
}
5249
}
5350

crates/codegen/parser/runtime/src/parser_support/parser_result.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,14 @@ impl ParserResult {
8282
{
8383
*prev_label = Some(label);
8484
}
85+
// Also allow to name a single trivia token node
86+
else if let ParserResult::Match(Match { nodes, .. }) = &mut self {
87+
if let [node] = nodes.as_mut_slice() {
88+
if node.as_token().is_some_and(|tok| tok.kind.is_trivia()) {
89+
node.label = Some(label);
90+
}
91+
}
92+
}
8593

8694
self
8795
}

crates/codegen/parser/runtime/src/parser_support/sequence_helper.rs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -123,17 +123,27 @@ impl SequenceHelper {
123123
return;
124124
}
125125

126-
let tokens: Vec<_> =
127-
next.nodes.iter().filter_map(|node| node.as_token()).collect();
128-
let mut rules = next.nodes.iter().filter_map(|node| node.as_rule());
129-
130-
let is_single_token_with_trivia =
131-
tokens.len() == 1 && rules.all(|rule| rule.kind.is_trivia());
132-
let next_token = tokens.first().map(|token| token.kind);
133-
134-
// NOTE: We only support skipping to a single token (optionally with trivia)
135-
debug_assert!(is_single_token_with_trivia);
136-
debug_assert_eq!(next_token, Some(running.found));
126+
// We only support skipping to a single, significant token.
127+
// Sanity check that we are recovering to the expected one.
128+
let next_token = next.nodes.iter().try_fold(None, |acc, node| {
129+
match &**node {
130+
cst::Node::Token(token) if token.kind.is_trivia() => Ok(acc),
131+
cst::Node::Token(token) => {
132+
match acc {
133+
None => Ok(Some(token.kind)),
134+
Some(..) => {
135+
debug_assert!(false, "Recovery skipped to multiple tokens: {acc:?}, {token:?}");
136+
Err(())
137+
}
138+
}
139+
}
140+
cst::Node::Rule(rule) => {
141+
debug_assert!(false, "Recovery skipped to a rule: {rule:?}");
142+
Err(())
143+
}
144+
}
145+
});
146+
debug_assert_eq!(next_token, Ok(Some(running.found)));
137147

138148
running.nodes.push(LabeledNode::anonymous(cst::Node::token(
139149
TokenKind::SKIPPED,

crates/codegen/parser/runtime/src/templates/kinds.rs.jinja2

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,6 @@ pub enum RuleKind {
2222
{%- endfor -%}
2323
}
2424

25-
impl RuleKind {
26-
pub fn is_trivia(&self) -> bool {
27-
#[allow(clippy::match_like_matches_macro)]
28-
match self {
29-
{%- for variant in generator.trivia_kinds -%}
30-
Self::{{ variant }} => true,
31-
{%- endfor -%}
32-
_ => false,
33-
}
34-
}
35-
}
36-
3725
#[derive(
3826
Debug,
3927
Eq,
@@ -58,6 +46,8 @@ pub enum NodeLabel {
5846
Operand,
5947
LeftOperand,
6048
RightOperand,
49+
LeadingTrivia,
50+
TrailingTrivia,
6151
// Generated
6252
{% for variant in generator.labels -%}
6353
{{ variant | pascal_case }},

crates/codegen/parser/runtime/src/templates/language.rs.jinja2

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ impl Language {
8484
fn {{ parser_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> ParserResult { {{ parser_code }} }
8585
{% endfor %}
8686

87+
{% for parser_name, parser_code in generator.trivia_parser_functions %}
88+
#[allow(unused_assignments, unused_parens)]
89+
fn {{ parser_name | snake_case }}(&self, input: &mut ParserContext<'_>) -> ParserResult { {{ parser_code }} }
90+
{% endfor %}
91+
8792
/********************************************
8893
* Scanner Functions
8994
********************************************/
@@ -102,12 +107,7 @@ impl Language {
102107
pub fn parse(&self, kind: RuleKind, input: &str) -> ParseOutput {
103108
match kind {
104109
{%- for parser_name, _ in generator.parser_functions -%}
105-
{# TODO(#737): Remove the special case once we stop generating RuleKind for trivia #}
106-
{%- if parser_name is ending_with("Trivia") -%}
107-
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, false),
108-
{%- else -%}
109-
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, true),
110-
{%- endif -%}
110+
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input),
111111
{%- endfor -%}
112112
}
113113
}

0 commit comments

Comments
 (0)