Skip to content

Commit 3b5a164

Browse files
authored
Add lexer for WebAssembly Text Format (#1161)
Adds a lexer for WebAssembly's text format based on [pygments' WatLexer](https://pygments.org/docs/lexers/#pygments.lexers.webassembly.WatLexer). Test data is the output of wasm2wat from a binary compiled from the following code using clang 21.1.2 and the wasm32-unknown-wasi target: ```c #include <stdio.h> int main() { puts("Hello World!"); return 0; } ``` Closes #754
1 parent 84d187e commit 3b5a164

File tree

3 files changed

+6077
-0
lines changed

3 files changed

+6077
-0
lines changed

lexers/embedded/wat.xml

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
<lexer>
2+
<config>
3+
<name>WebAssembly Text Format</name>
4+
<alias>wast</alias>
5+
<alias>wat</alias>
6+
<filename>*.wat</filename>
7+
<filename>*.wast</filename>
8+
</config>
9+
<rules>
10+
<state name="root">
11+
<rule pattern="(module|import|func|funcref|start|param|local|type|result|export|memory|global|mut|data|table|elem|if|then|else|end|block|loop)(?=[^a-z_\.])">
12+
<token type="Keyword"/>
13+
</rule>
14+
<rule pattern="(unreachable|nop|block|loop|if|else|end|br(?:_if|_table)?|return|call(?:_indirect)?|drop|select|local\.get|local\.set|local\.tee|global\.get|global\.set|i32\.load(?:(?:8|16)_(?:u|s))?|i64\.load(?:(?:8|16|32)_(?:u|s))?|f32\.load|f64\.load|i32\.store(?:8|16)?|i64\.store(:?8|16|32)?|f32\.store|f64\.store|memory\.size|memory\.grow|memory\.fill|memory\.copy|memory\.init|i32\.const|i64\.const|f32\.const|f64\.const|i32\.eqz|i32\.eq|i32\.ne|i32\.lt_s|i32\.lt_u|i32\.gt_s|i32\.gt_u|i32\.le_s|i32\.le_u|i32\.ge_s|i32\.ge_u|i64\.eqz|i64\.eq|i64\.ne|i64\.lt_s|i64\.lt_u|i64\.gt_s|i64\.gt_u|i64\.le_s|i64\.le_u|i64\.ge_s|i64\.ge_u|f32\.eq|f32\.neg?|f32\.lt|f32\.gt|f32\.le|f32\.ge|f64\.eq|f64\.neg?|f64\.lt|f64\.gt|f64\.le|f64\.ge|i32\.clz|i32\.ctz|i32\.popcnt|i32\.add|i32\.sub|i32\.mul|i32\.div_s|i32\.div_u|i32\.rem_s|i32\.rem_u|i32\.and|i32\.or|i32\.xor|i32\.shl|i32\.shr_s|i32\.shr_u|i32\.rotl|i32\.rotr|i64\.clz|i64\.ctz|i64\.popcnt|i64\.add|i64\.sub|i64\.mul|i64\.div_s|i64\.div_u|i64\.rem_s|i64\.rem_u|i64\.and|i64\.or|i64\.xor|i64\.shl|i64\.shr_s|i64\.shr_u|i64\.rotl|i64\.rotr|f32\.abs|f32\.ceil|f32\.floor|f32\.trunc|f32\.nearest|f32\.sqrt|f32\.add|f32\.sub|f32\.mul|f32\.div|f32\.min|f32\.max|f32\.copysign|f64\.abs|f64\.ceil|f64\.floor|f64\.trunc|f64\.nearest|f64\.sqrt|f64\.add|f64\.sub|f64\.mul|f64\.div|f64\.min|f64\.max|f64\.copysign|i32\.wrap_i64|i32\.trunc_f32_s|i32\.trunc_f32_u|i32\.trunc_f64_s|i32\.trunc_f64_u|i64\.extend(?:(?:8|16|32)_s|_i(?:32|64)_(?:u|s))|i32\.extend(?:8|16)_s|(?:i32|i64)\.trunc(?:_sat)?_f(?:32|64)_(?:s|u)|f32\.convert_i32_s|f32\.convert_i32_u|f32\.convert_i64_s|f32\.convert_i64_u|f32\.demote_f64|f64\.convert_i32_s|f64\.convert_i32_u|f64\.convert_i64_s|f64\.convert_i64_u|f64\.promote_f32|i32\.reinterpret_f32|i64\.reinterpret_f64|f32\.reinterpret_i32|f64\.reinterpret_i64)">
15+
<token type="NameBuiltin"/>
16+
<push state="arguments"/>
17+
</rule>
18+
<rule pattern="(i32|i64|f32|f64)">
19+
<token type="KeywordType"/>
20+
</rule>
21+
<rule pattern="\$[A-Za-z0-9!#$%&amp;\&#x27;*+./:&lt;=&gt;?@\\^_`|~-]+">
22+
<token type="NameVariable"/>
23+
</rule>
24+
<rule pattern=";;.*?$">
25+
<token type="CommentSingle"/>
26+
</rule>
27+
<rule pattern="\(;">
28+
<token type="CommentMultiline"/>
29+
<push state="nesting_comment"/>
30+
</rule>
31+
<rule pattern="[+-]?0x[\dA-Fa-f](_?[\dA-Fa-f])*(.([\dA-Fa-f](_?[\dA-Fa-f])*)?)?([pP][+-]?[\dA-Fa-f](_?[\dA-Fa-f])*)?">
32+
<token type="LiteralNumberFloat"/>
33+
</rule>
34+
<rule pattern="[+-]?\d.\d(_?\d)*[eE][+-]?\d(_?\d)*">
35+
<token type="LiteralNumberFloat"/>
36+
</rule>
37+
<rule pattern="[+-]?\d.\d(_?\d)*">
38+
<token type="LiteralNumberFloat"/>
39+
</rule>
40+
<rule pattern="[+-]?\d.[eE][+-]?\d(_?\d)*">
41+
<token type="LiteralNumberFloat"/>
42+
</rule>
43+
<rule pattern="[+-]?(inf|nan:0x[\dA-Fa-f](_?[\dA-Fa-f])*|nan)">
44+
<token type="LiteralNumberFloat"/>
45+
</rule>
46+
<rule pattern="[+-]?0x[\dA-Fa-f](_?[\dA-Fa-f])*">
47+
<token type="LiteralNumberHex"/>
48+
</rule>
49+
<rule pattern="[+-]?\d(_?\d)*">
50+
<token type="LiteralNumberInteger"/>
51+
</rule>
52+
<rule pattern="[\(\)]">
53+
<token type="Punctuation"/>
54+
</rule>
55+
<rule pattern="&quot;">
56+
<token type="LiteralStringDouble"/>
57+
<push state="string"/>
58+
</rule>
59+
<rule pattern="\s+">
60+
<token type="Text"/>
61+
</rule>
62+
</state>
63+
<state name="nesting_comment">
64+
<rule pattern="\(;">
65+
<token type="CommentMultiline"/>
66+
<push/>
67+
</rule>
68+
<rule pattern=";\)">
69+
<token type="CommentMultiline"/>
70+
<pop depth="1"/>
71+
</rule>
72+
<rule pattern="[^;(]+">
73+
<token type="CommentMultiline"/>
74+
</rule>
75+
<rule pattern="[;(]">
76+
<token type="CommentMultiline"/>
77+
</rule>
78+
</state>
79+
<state name="string">
80+
<rule pattern="\\[\dA-Fa-f][\dA-Fa-f]">
81+
<token type="LiteralStringEscape"/>
82+
</rule>
83+
<rule pattern="\\t">
84+
<token type="LiteralStringEscape"/>
85+
</rule>
86+
<rule pattern="\\n">
87+
<token type="LiteralStringEscape"/>
88+
</rule>
89+
<rule pattern="\\r">
90+
<token type="LiteralStringEscape"/>
91+
</rule>
92+
<rule pattern="\\&quot;">
93+
<token type="LiteralStringEscape"/>
94+
</rule>
95+
<rule pattern="\\&#x27;">
96+
<token type="LiteralStringEscape"/>
97+
</rule>
98+
<rule pattern="\\u\{[\dA-Fa-f](_?[\dA-Fa-f])*\}">
99+
<token type="LiteralStringEscape"/>
100+
</rule>
101+
<rule pattern="\\\\">
102+
<token type="LiteralStringEscape"/>
103+
</rule>
104+
<rule pattern="&quot;">
105+
<token type="LiteralStringDouble"/>
106+
<pop depth="1"/>
107+
</rule>
108+
<rule pattern="[^&quot;\\]+">
109+
<token type="LiteralStringDouble"/>
110+
</rule>
111+
</state>
112+
<state name="arguments">
113+
<rule pattern="\s+">
114+
<token type="Text"/>
115+
</rule>
116+
<rule pattern="(offset)(=)(0x[\dA-Fa-f](?:_?[\dA-Fa-f])*)">
117+
<bygroups>
118+
<token type="Keyword"/>
119+
<token type="Operator"/>
120+
<token type="LiteralNumberHex"/>
121+
</bygroups>
122+
</rule>
123+
<rule pattern="(offset)(=)(\d(?:_?\d)*)">
124+
<bygroups>
125+
<token type="Keyword"/>
126+
<token type="Operator"/>
127+
<token type="LiteralNumberInteger"/>
128+
</bygroups>
129+
</rule>
130+
<rule pattern="(align)(=)(0x[\dA-Fa-f](?:_?[\dA-Fa-f])*)">
131+
<bygroups>
132+
<token type="Keyword"/>
133+
<token type="Operator"/>
134+
<token type="LiteralNumberHex"/>
135+
</bygroups>
136+
</rule>
137+
<rule pattern="(align)(=)(\d(?:_?\d)*)">
138+
<bygroups>
139+
<token type="Keyword"/>
140+
<token type="Operator"/>
141+
<token type="LiteralNumberInteger"/>
142+
</bygroups>
143+
</rule>
144+
<rule>
145+
<pop depth="1"/>
146+
</rule>
147+
</state>
148+
</rules>
149+
</lexer>

0 commit comments

Comments
 (0)