Skip to content

Commit 66a939a

Browse files
authored
Add lexer for Game Boy Development System ASM. (#1117)
This adds a new lexer for [RGBDS](https://rgbds.gbdev.io/) assembly syntax, including: * All [opcodes](https://rgbds.gbdev.io/docs/v0.9.3/gbz80.7) implementing the Game Boy CPU instruction set. * The syntax supported by the [rgbasm](https://rgbds.gbdev.io/docs/v0.9.3/rgbasm.5) assembler itself. The new lexer was loosely based on the existing `z80_assembly.xml` file. Tests in `lexers/testdata/rgbasm/` are based on the current RGBDS documentation. Example of test output as seen in the playground can be found [here](https://kefen.tigris.fr/rgbasm_syntax.html). Thank you for your time.
1 parent 2984b60 commit 66a939a

File tree

7 files changed

+3822
-0
lines changed

7 files changed

+3822
-0
lines changed

lexers/embedded/rgbasm.xml

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
<lexer>
2+
<config>
3+
<name>RGBDS Assembly</name>
4+
<alias>rgbasm</alias>
5+
<filename>*.asm</filename>
6+
<priority>0.5</priority>
7+
<case_insensitive>true</case_insensitive>
8+
</config>
9+
<rules>
10+
<!-- Symbol state for parsing anything between curly brackets -->
11+
<state name="symbol">
12+
<rule pattern="[^{}]+">
13+
<token type="NameVariable"/>
14+
</rule>
15+
<rule pattern="{">
16+
<token type="Punctuation"/>
17+
<push state="symbol"/>
18+
</rule>
19+
<rule pattern="}">
20+
<token type="Punctuation"/>
21+
<pop depth="1"/>
22+
</rule>
23+
</state>
24+
<!-- String states for parsing quote-delimited text that may contain symbols -->
25+
<state name="string">
26+
<rule pattern="[^{&#34;\n\\]+">
27+
<token type="LiteralString"/>
28+
</rule>
29+
<rule pattern="\\.">
30+
<token type="LiteralStringEscape"/>
31+
</rule>
32+
<rule pattern="{">
33+
<token type="Punctuation"/>
34+
<push state="symbol"/>
35+
</rule>
36+
<rule pattern="(&#34;|\n)">
37+
<token type="LiteralString"/>
38+
<pop depth="1"/>
39+
</rule>
40+
</state>
41+
<state name="stringmultiline">
42+
<rule pattern="&#34;{3}">
43+
<token type="LiteralString"/>
44+
<pop depth="1"/>
45+
</rule>
46+
<rule pattern="[^{\\]+?">
47+
<token type="LiteralString"/>
48+
</rule>
49+
<rule pattern="\\.">
50+
<token type="LiteralStringEscape"/>
51+
</rule>
52+
<rule pattern="{">
53+
<token type="Punctuation"/>
54+
<push state="symbol"/>
55+
</rule>
56+
</state>
57+
<!-- Root state -->
58+
<state name="root">
59+
<!-- Comments -->
60+
<rule pattern=";.*?$">
61+
<token type="CommentSingle"/>
62+
</rule>
63+
<rule pattern="/[*](.|\n)*?[*]/">
64+
<token type="CommentMultiline"/>
65+
</rule>
66+
<!-- Local label -->
67+
<rule pattern="^(\.)(\w+)(:?)">
68+
<bygroups>
69+
<token type="Punctuation"/>
70+
<token type="NameLabel"/>
71+
<token type="Punctuation"/>
72+
</bygroups>
73+
</rule>
74+
<!-- Global label (with optional local) -->
75+
<rule pattern="(^\w+)(?:(\.)(\w+))?(::?)">
76+
<bygroups>
77+
<token type="NameLabel"/>
78+
<token type="Punctuation"/>
79+
<token type="NameLabel"/>
80+
<token type="Punctuation"/>
81+
</bygroups>
82+
</rule>
83+
<!-- Symbols delimited by curly brackets -->
84+
<rule pattern="{">
85+
<token type="Punctuation"/>
86+
<push state="symbol"/>
87+
</rule>
88+
<!-- Numeric types (can contain '_' except at the beginning) -->
89+
<rule pattern="(0x|\$)[0-9a-fA-F][0-9a-fA-F_]*">
90+
<token type="LiteralNumberHex"/>
91+
</rule>
92+
<rule pattern="[0-9a-fA-F][0-9a-fA-F_]*h\b">
93+
<token type="LiteralNumberHex"/>
94+
</rule>
95+
<rule pattern="(0o|&amp;)[0-7][0-7_]*">
96+
<token type="LiteralNumberOct"/>
97+
</rule>
98+
<rule pattern="(0b|%)[01][01_]*">
99+
<token type="LiteralNumberBin"/>
100+
</rule>
101+
<rule pattern="-?[0-9][0-9_]*\.[0-9_]+(q[0-9]+)?">
102+
<token type="LiteralNumberFloat"/>
103+
</rule>
104+
<rule pattern="-?[0-9][0-9_]*">
105+
<token type="LiteralNumberInteger"/>
106+
</rule>
107+
<!-- "Game Boy graphics" format, which can be made of user-defined symbols -->
108+
<rule pattern="`[^\s]+">
109+
<token type="LiteralNumberInteger"/>
110+
</rule>
111+
<!-- #-prefixed raw string (multiline) -->
112+
<rule pattern="(#)(&#34;{3}(?:.|\n)*?&#34;{3})">
113+
<bygroups>
114+
<token type="LiteralStringAffix"/>
115+
<token type="LiteralString"/>
116+
</bygroups>
117+
</rule>
118+
<!-- #-prefixed raw string -->
119+
<rule pattern="(#)(&#34;.*?&#34;)">
120+
<bygroups>
121+
<token type="LiteralStringAffix"/>
122+
<token type="LiteralString"/>
123+
</bygroups>
124+
</rule>
125+
<!-- Start of quote-delimited (non-raw) strings -->
126+
<rule pattern="&#34;{3}">
127+
<token type="LiteralString"/>
128+
<push state="stringmultiline"/>
129+
</rule>
130+
<rule pattern="&#34;">
131+
<token type="LiteralString"/>
132+
<push state="string"/>
133+
</rule>
134+
<!-- Macro arguments (single character) -->
135+
<rule pattern="\\[1-9@#]">
136+
<token type="NameVariableMagic"/>
137+
</rule>
138+
<!-- Macro arguments (bracketed) -->
139+
<rule pattern="(\\&lt;)([^&gt;]+)(&gt;)">
140+
<bygroups>
141+
<token type="NameVariableMagic"/>
142+
<usingself state="root"/>
143+
<token type="NameVariableMagic"/>
144+
</bygroups>
145+
</rule>
146+
<!-- LDI/LDD alternate formats -->
147+
<rule pattern="(\[)(hl\+|hl-|hli|hld)(\])">
148+
<bygroups>
149+
<token type="Punctuation"/>
150+
<token type="Keyword"/>
151+
<token type="Punctuation"/>
152+
</bygroups>
153+
</rule>
154+
<!-- Punctuation (excluding period which is used in predeclared symbols) -->
155+
<rule pattern="[\[\],()\\:]">
156+
<token type="Punctuation"/>
157+
</rule>
158+
<!-- Variable definitions that can contain RL, which is also an opcode -->
159+
<rule pattern="((?:re)?def)([\t ]+)([\w{}:]+)([\t ]+)(rb|rw|rl|equs|equ)?">
160+
<bygroups>
161+
<token type="NameBuiltin"/>
162+
<token type="TextWhitespace"/>
163+
<usingself state="root"/>
164+
<token type="TextWhitespace"/>
165+
<token type="NameBuiltin"/>
166+
</bygroups>
167+
</rule>
168+
<!-- Specific rule for some names that are both built-ins and functions -->
169+
<rule pattern="\b(bank|section)(\()\b">
170+
<bygroups>
171+
<token type="NameFunction"/>
172+
<token type="Punctuation"/>
173+
</bygroups>
174+
</rule>
175+
<!-- Specific rule for options redefinitions -->
176+
<rule pattern="\b(opt|pusho)([\t ]+)([^,\s]+)(?:(,)([\t ]*)([^,\s]+))*">
177+
<bygroups>
178+
<token type="NameBuiltin"/>
179+
<token type="TextWhitespace"/>
180+
<token type="Literal"/>
181+
<token type="Punctuation"/>
182+
<token type="TextWhitespace"/>
183+
<token type="Literal"/>
184+
</bygroups>
185+
</rule>
186+
<!-- Predeclared symbols -->
187+
<rule pattern="\b(_rs|_narg|__date__|__time__|__iso_8601_local__|__iso_8601_utc__|__utc_year__|__utc_month__|__utc_day__|__utc_hour__|__utc_minute__|__utc_second__|__rgbds_major__|__rgbds_minor__|__rgbds_patch__|__rgbds_rc__|__rgbds_version__)\b">
188+
<token type="NameVariableMagic"/>
189+
</rule>
190+
<!-- Built-in statements -->
191+
<rule pattern="\b(align|assert|bank|break|charmap|db|dl|ds|dw|elif|else|endc|endl|endm|endr|endsection|endu|export|fail|fatal|for|fragment|hram|if|incbin|include|load|macro|newcharmap|nextu|oam|popc|popo|pops|println|print|purge|pushc|pusho|pushs|redef|rept|rom0|romx|rsreset|rsset|section|setcharmap|shift|sram|static_assert|union|vram|warn|wram0|wramx)">
192+
<token type="NameBuiltin"/>
193+
</rule>
194+
<!-- Integer functions -->
195+
<rule pattern="\b(high|low|bitwidth|tzcount)\b">
196+
<token type="NameFunction"/>
197+
</rule>
198+
<!-- Fixed-point functions -->
199+
<rule pattern="\b(div|mul|fmod|pow|log|round|ceil|floor|sin|cos|tan|asin|acos|atan|atan2)\b">
200+
<token type="NameFunction"/>
201+
</rule>
202+
<!-- String functions -->
203+
<rule pattern="\b(strcat|strupr|strlwr|strslice|strrpl|strfmt|strchar|revchar|strlen|strcmp|strfind|strrfind|incharmap|charlen|charcmp|charsize|strsub|strin|strrin|charsub)\b">
204+
<token type="NameFunction"/>
205+
</rule>
206+
<!-- Other functions, excluding some that are the same as built-ins (BANK and SECTION) -->
207+
<rule pattern="\b(def|isconst|sizeof|startof)">
208+
<token type="NameFunction"/>
209+
</rule>
210+
<!-- Opcodes -->
211+
<rule pattern="\b(adc|add|and|bit|call|ccf|cp|cpl|daa|dec|di|ei|halt|inc|jp|jr|ld|ldd|ldh|ldi|nop|or|pop|push|res|ret|reti|rlca|rla|rlc|rl|rr|rra|rrc|rrca|rst|sbc|scf|set|sla|sra|srl|stop|sub|swap|xor)\b">
212+
<token type="Keyword"/>
213+
</rule>
214+
<!-- Registers and flags -->
215+
<rule pattern="\b(a|f|b|c|d|e|h|l|af|bc|de|hl|sp|pc|z|nz|nc)\b">
216+
<token type="Keyword"/>
217+
</rule>
218+
<!-- Operators -->
219+
<rule pattern="[-%!+/*~\^&amp;|=&lt;&gt;]">
220+
<token type="Operator"/>
221+
</rule>
222+
<!-- Reference to label -->
223+
<rule pattern="(\.)?(\w+)">
224+
<bygroups>
225+
<token type="Punctuation"/>
226+
<token type="Name"/>
227+
</bygroups>
228+
</rule>
229+
<!-- Predeclared symbols (non-words) -->
230+
<rule pattern="(@|\.|\.\.)">
231+
<token type="NameVariableMagic"/>
232+
</rule>
233+
<!-- Default text -->
234+
<rule pattern="\s+">
235+
<token type="TextWhitespace"/>
236+
</rule>
237+
</state>
238+
</rules>
239+
</lexer>
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
;
2+
; Integer functions.
3+
;
4+
HIGH(n)
5+
LOW(n)
6+
BITWIDTH(n)
7+
TZCOUNT(n)
8+
9+
;
10+
; Fixed-point functions.
11+
;
12+
DIV(x, y)
13+
MUL(x, y)
14+
FMOD(x, y)
15+
POW(x, y)
16+
LOG(x, y)
17+
ROUND(x)
18+
CEIL(x)
19+
FLOOR(x)
20+
SIN(x)
21+
COS(x)
22+
TAN(x)
23+
ASIN(x)
24+
ACOS(x)
25+
ATAN(x)
26+
ATAN2(y, x)
27+
28+
;
29+
; String functions.
30+
;
31+
STRCAT(str1, str2, str3)
32+
STRUPR(str)
33+
STRLWR(str)
34+
STRSLICE(str, start, end)
35+
STRRPL(str, old, new)
36+
STRFMT(fmt, arg1, arg2, arg3)
37+
STRCHAR(str, idx)
38+
REVCHAR(val1, val2, val3)
39+
STRLEN(str)
40+
STRCMP(str1, str2)
41+
STRFIND(str, substr)
42+
STRRFIND(str, substr)
43+
INCHARMAP(str)
44+
CHARLEN(str)
45+
CHARCMP(str1, str2)
46+
CHARSIZE(char)
47+
STRSUB(str, pos, len)
48+
STRIN(str, substr)
49+
STRRIN(str, substr)
50+
CHARSUB(str, pos)
51+
52+
;
53+
; Other functions.
54+
;
55+
DEF(symbol)
56+
ISCONST(arg)
57+
BANK(arg)
58+
SECTION(symbol)
59+
SIZEOF(arg)
60+
STARTOF(arg)

0 commit comments

Comments
 (0)