Skip to content

Commit 827bd93

Browse files
authored
lexers: add initial version of CUE lexer (#858)
This commit adds a decent first-cut of a lexer for CUE (https://cuelang.org). The main aspects of the language are implemented, but there are likely a number of edge cases not covered, especially when it comes to string interpolation, but this is a sufficiently decent first cut so as to warrant merging into the chroma project rather than first working in a fork. This was tested locally for visual correctness using mkdir -p _examples (command cd ./cmd/chroma; go run . --lexer=cue --html --html-inline-styles) < lexers/testdata/cue.actual > _examples/cue.html and then loading _examples/cue.html in a browser. Closes #857
1 parent 9087c63 commit 827bd93

File tree

3 files changed

+224
-0
lines changed

3 files changed

+224
-0
lines changed

lexers/embedded/cue.xml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
<lexer>
2+
<config>
3+
<name>CUE</name>
4+
<alias>cue</alias>
5+
<filename>*.cue</filename>
6+
<mime_type>text/x-cue</mime_type>
7+
<dot_all>true</dot_all>
8+
<ensure_nl>true</ensure_nl>
9+
</config>
10+
<rules>
11+
<state name="root">
12+
<rule pattern="[^\S\n]+">
13+
<token type="Text"/>
14+
</rule>
15+
<rule pattern="\\\n">
16+
<token type="Text"/>
17+
</rule>
18+
<rule pattern="//[^\n\r]+">
19+
<token type="CommentSingle"/>
20+
</rule>
21+
<rule pattern="\n">
22+
<token type="Text"/>
23+
</rule>
24+
<rule pattern="(\+|&amp;&amp;|==|&lt;|=|-|\|\||!=|&gt;|:|\*|&amp;|=~|&lt;=|\?|\[|\]|,|/|\||!~|&gt;=|!|_\|_|\.\.\.)">
25+
<token type="Operator"/>
26+
</rule>
27+
<rule pattern="#*&#34;+">
28+
<token type="LiteralString"/>
29+
<push state="string"/>
30+
</rule>
31+
<rule pattern="'(\\\\|\\'|[^'\n])*['\n]">
32+
<token type="LiteralString"/>
33+
</rule>
34+
<rule pattern="0[boxX][0-9a-fA-F][_0-9a-fA-F]*|(\.\d+|\d[_\d]*(\.\d*)?)([eE][+-]?\d+)?[KMGTP]?i?">
35+
<token type="LiteralNumber"/>
36+
</rule>
37+
<rule pattern="[~!%^&amp;*()+=|\[\]:;,.&lt;&gt;/?-]">
38+
<token type="Punctuation"/>
39+
</rule>
40+
<rule pattern="[{}]">
41+
<token type="Punctuation"/>
42+
</rule>
43+
<rule pattern="(import|for|if|in|let|package)\b">
44+
<token type="Keyword"/>
45+
</rule>
46+
<rule pattern="(bool|float|int|string|uint|ulong|ushort)\b\??">
47+
<token type="KeywordType"/>
48+
</rule>
49+
<rule pattern="(true|false|null|_)\b">
50+
<token type="KeywordConstant"/>
51+
</rule>
52+
<rule pattern="[_a-zA-Z]\w*">
53+
<token type="Name"/>
54+
</rule>
55+
</state>
56+
<state name="string">
57+
<rule pattern="\\#*\(">
58+
<token type="LiteralStringInterpol"/>
59+
<push state="string-intp"/>
60+
</rule>
61+
<rule pattern="&#34;+#*">
62+
<token type="LiteralString"/>
63+
<pop depth="1"/>
64+
</rule>
65+
<rule pattern="\\[&#39;&#34;\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}">
66+
<token type="LiteralStringEscape"/>
67+
</rule>
68+
<rule pattern="[^\\&#34;]+">
69+
<token type="LiteralString"/>
70+
</rule>
71+
<rule pattern="\\">
72+
<token type="LiteralString"/>
73+
</rule>
74+
</state>
75+
<state name="string-intp">
76+
<rule pattern="\)">
77+
<token type="LiteralStringInterpol"/>
78+
<pop depth="1"/>
79+
</rule>
80+
<rule>
81+
<include state="root"/>
82+
</rule>
83+
</state>
84+
</rules>
85+
</lexer>

lexers/testdata/cue.actual

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"this is a test"
2+
5
3+
false
4+
5+
// This is a comment
6+
a: {
7+
foo: string // foo is a string
8+
[=~"^i"]: int // all other fields starting with i are integers
9+
[=~"^b"]: bool // all other fields starting with b are booleans
10+
[>"c"]: string // all other fields lexically after c are strings
11+
12+
...string // all other fields must be a string. Note: default constraints are not yet implemented.
13+
}
14+
15+
x: #"""
16+
17+
\#(test)
18+
19+
"""
20+
21+
b: a & {
22+
i3: 3
23+
bar: true
24+
other: "a string"
25+
}
26+
27+
A: close({
28+
field1: string
29+
field2: string
30+
})
31+

lexers/testdata/cue.expected

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
[
2+
{"type":"LiteralString","value":"\"this is a test\""},
3+
{"type":"Text","value":"\n"},
4+
{"type":"LiteralNumber","value":"5"},
5+
{"type":"Text","value":"\n"},
6+
{"type":"KeywordConstant","value":"false"},
7+
{"type":"Text","value":"\n\n"},
8+
{"type":"CommentSingle","value":"// This is a comment"},
9+
{"type":"Text","value":"\n"},
10+
{"type":"Name","value":"a"},
11+
{"type":"Operator","value":":"},
12+
{"type":"Text","value":" "},
13+
{"type":"Punctuation","value":"{"},
14+
{"type":"Text","value":"\n "},
15+
{"type":"Name","value":"foo"},
16+
{"type":"Operator","value":":"},
17+
{"type":"Text","value":" "},
18+
{"type":"KeywordType","value":"string"},
19+
{"type":"Text","value":" "},
20+
{"type":"CommentSingle","value":"// foo is a string"},
21+
{"type":"Text","value":"\n "},
22+
{"type":"Operator","value":"[="},
23+
{"type":"Punctuation","value":"~"},
24+
{"type":"LiteralString","value":"\"^i\""},
25+
{"type":"Operator","value":"]:"},
26+
{"type":"Text","value":" "},
27+
{"type":"KeywordType","value":"int"},
28+
{"type":"Text","value":" "},
29+
{"type":"CommentSingle","value":"// all other fields starting with i are integers"},
30+
{"type":"Text","value":"\n "},
31+
{"type":"Operator","value":"[="},
32+
{"type":"Punctuation","value":"~"},
33+
{"type":"LiteralString","value":"\"^b\""},
34+
{"type":"Operator","value":"]:"},
35+
{"type":"Text","value":" "},
36+
{"type":"KeywordType","value":"bool"},
37+
{"type":"Text","value":" "},
38+
{"type":"CommentSingle","value":"// all other fields starting with b are booleans"},
39+
{"type":"Text","value":"\n "},
40+
{"type":"Operator","value":"[\u003e"},
41+
{"type":"LiteralString","value":"\"c\""},
42+
{"type":"Operator","value":"]:"},
43+
{"type":"Text","value":" "},
44+
{"type":"KeywordType","value":"string"},
45+
{"type":"Text","value":" "},
46+
{"type":"CommentSingle","value":"// all other fields lexically after c are strings"},
47+
{"type":"Text","value":"\n\n "},
48+
{"type":"Operator","value":"..."},
49+
{"type":"KeywordType","value":"string"},
50+
{"type":"Text","value":" "},
51+
{"type":"CommentSingle","value":"// all other fields must be a string. Note: default constraints are not yet implemented."},
52+
{"type":"Text","value":"\n"},
53+
{"type":"Punctuation","value":"}"},
54+
{"type":"Text","value":"\n\n"},
55+
{"type":"Name","value":"x"},
56+
{"type":"Operator","value":":"},
57+
{"type":"Text","value":" "},
58+
{"type":"LiteralString","value":"#\"\"\"\n\n "},
59+
{"type":"LiteralStringInterpol","value":"\\#("},
60+
{"type":"Name","value":"test"},
61+
{"type":"LiteralStringInterpol","value":")"},
62+
{"type":"LiteralString","value":"\n\n\"\"\""},
63+
{"type":"Text","value":"\n\n"},
64+
{"type":"Name","value":"b"},
65+
{"type":"Operator","value":":"},
66+
{"type":"Text","value":" "},
67+
{"type":"Name","value":"a"},
68+
{"type":"Text","value":" "},
69+
{"type":"Operator","value":"\u0026"},
70+
{"type":"Text","value":" "},
71+
{"type":"Punctuation","value":"{"},
72+
{"type":"Text","value":"\n "},
73+
{"type":"Name","value":"i3"},
74+
{"type":"Operator","value":":"},
75+
{"type":"Text","value":" "},
76+
{"type":"LiteralNumber","value":"3"},
77+
{"type":"Text","value":"\n "},
78+
{"type":"Name","value":"bar"},
79+
{"type":"Operator","value":":"},
80+
{"type":"Text","value":" "},
81+
{"type":"KeywordConstant","value":"true"},
82+
{"type":"Text","value":"\n "},
83+
{"type":"Name","value":"other"},
84+
{"type":"Operator","value":":"},
85+
{"type":"Text","value":" "},
86+
{"type":"LiteralString","value":"\"a string\""},
87+
{"type":"Text","value":"\n"},
88+
{"type":"Punctuation","value":"}"},
89+
{"type":"Text","value":"\n\n"},
90+
{"type":"Name","value":"A"},
91+
{"type":"Operator","value":":"},
92+
{"type":"Text","value":" "},
93+
{"type":"Name","value":"close"},
94+
{"type":"Punctuation","value":"({"},
95+
{"type":"Text","value":"\n "},
96+
{"type":"Name","value":"field1"},
97+
{"type":"Operator","value":":"},
98+
{"type":"Text","value":" "},
99+
{"type":"KeywordType","value":"string"},
100+
{"type":"Text","value":"\n "},
101+
{"type":"Name","value":"field2"},
102+
{"type":"Operator","value":":"},
103+
{"type":"Text","value":" "},
104+
{"type":"KeywordType","value":"string"},
105+
{"type":"Text","value":"\n"},
106+
{"type":"Punctuation","value":"})"},
107+
{"type":"Text","value":"\n\n"}
108+
]

0 commit comments

Comments
 (0)