Skip to content

Commit 8b8574c

Browse files
authored
Add Protocol Buffer Text Format Language (txtpb) (#1165)
1 parent c07ef4b commit 8b8574c

File tree

4 files changed

+839
-1
lines changed

4 files changed

+839
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ translators for Pygments lexers and styles.
5151
| M | Makefile, Mako, markdown, Mason, Materialize SQL dialect, Mathematica, Matlab, MCFunction, Meson, Metal, MiniZinc, MLIR, Modula-2, Mojo, MonkeyC, MoonScript, MorrowindScript, Myghty, MySQL
5252
| N | NASM, Natural, NDISASM, Newspeak, Nginx configuration file, Nim, Nix, NSIS, Nu
5353
| O | Objective-C, ObjectPascal, OCaml, Octave, Odin, OnesEnterprise, OpenEdge ABL, OpenSCAD, Org Mode
54-
| P | PacmanConf, Perl, PHP, PHTML, Pig, PkgConfig, PL/pgSQL, plaintext, Plutus Core, Pony, PostgreSQL SQL dialect, PostScript, POVRay, PowerQuery, PowerShell, Prolog, Promela, PromQL, properties, Protocol Buffer, PRQL, PSL, Puppet, Python, Python 2
54+
| P | PacmanConf, Perl, PHP, PHTML, Pig, PkgConfig, PL/pgSQL, plaintext, Plutus Core, Pony, PostgreSQL SQL dialect, PostScript, POVRay, PowerQuery, PowerShell, Prolog, Promela, PromQL, properties, Protocol Buffer, Protocol Buffer Text Format, PRQL, PSL, Puppet, Python, Python 2
5555
| Q | QBasic, QML
5656
| R | R, Racket, Ragel, Raku, react, ReasonML, reg, Rego, reStructuredText, Rexx, RPGLE, RPMSpec, Ruby, Rust
5757
| S | SAS, Sass, Scala, Scheme, Scilab, SCSS, Sed, Sieve, Smali, Smalltalk, Smarty, SNBT, Snobol, Solidity, SourcePawn, SPARQL, SQL, SquidConf, Standard ML, stas, Stylus, Svelte, Swift, SYSTEMD, systemverilog

lexers/embedded/txtpb.xml

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
<lexer>
2+
<config>
3+
<name>Protocol Buffer Text Format</name>
4+
<alias>txtpb</alias>
5+
<filename>*.txtpb</filename>
6+
<filename>*.textproto</filename>
7+
<filename>*.textpb</filename>
8+
<filename>*.pbtxt</filename>
9+
<mime_type>application/x-protobuf-text</mime_type>
10+
<case_insensitive>false</case_insensitive>
11+
<dot_all>false</dot_all>
12+
<ensure_nl>true</ensure_nl>
13+
</config>
14+
<rules>
15+
<state name="double-quote">
16+
<rule pattern="\\[abfnrtv\\\?&#39;&#34;]">
17+
<token type="LiteralStringEscape"/>
18+
</rule>
19+
<rule pattern="\\[0-7]{1,3}">
20+
<token type="LiteralStringEscape"/>
21+
</rule>
22+
<rule pattern="\\x[0-9a-fA-F]{1,2}">
23+
<token type="LiteralStringEscape"/>
24+
</rule>
25+
<rule pattern="\\u[0-9a-fA-F]{4}">
26+
<token type="LiteralStringEscape"/>
27+
</rule>
28+
<rule pattern="\\U000[0-9a-fA-F]{5}">
29+
<token type="LiteralStringEscape"/>
30+
</rule>
31+
<rule pattern="\\U0010[0-9a-fA-F]{4}">
32+
<token type="LiteralStringEscape"/>
33+
</rule>
34+
<rule pattern="[^&#34;\\]+">
35+
<token type="LiteralStringDouble"/>
36+
</rule>
37+
<rule pattern="&#34;">
38+
<token type="LiteralStringDouble"/>
39+
<pop depth="1"/>
40+
</rule>
41+
</state>
42+
43+
<state name="single-quote">
44+
<rule pattern="\\[abfnrtv\\\?&#39;&#34;]">
45+
<token type="LiteralStringEscape"/>
46+
</rule>
47+
<rule pattern="\\[0-7]{1,3}">
48+
<token type="LiteralStringEscape"/>
49+
</rule>
50+
<rule pattern="\\x[0-9a-fA-F]{1,2}">
51+
<token type="LiteralStringEscape"/>
52+
</rule>
53+
<rule pattern="\\u[0-9a-fA-F]{4}">
54+
<token type="LiteralStringEscape"/>
55+
</rule>
56+
<rule pattern="\\U000[0-9a-fA-F]{5}">
57+
<token type="LiteralStringEscape"/>
58+
</rule>
59+
<rule pattern="\\U0010[0-9a-fA-F]{4}">
60+
<token type="LiteralStringEscape"/>
61+
</rule>
62+
<rule pattern="[^&#39;\\]+">
63+
<token type="LiteralStringSingle"/>
64+
</rule>
65+
<rule pattern="&#39;">
66+
<token type="LiteralStringSingle"/>
67+
<pop depth="1"/>
68+
</rule>
69+
</state>
70+
71+
<state name="root">
72+
<!-- Comments -->
73+
<rule pattern="#.*\n">
74+
<token type="CommentSingle"/>
75+
</rule>
76+
77+
<!-- Whitespace -->
78+
<rule pattern="[ \n\t\v\f\r]+">
79+
<token type="Text"/>
80+
</rule>
81+
82+
<!-- Operators -->
83+
<rule pattern="-">
84+
<token type="Operator" />
85+
</rule>
86+
87+
<!-- Special float literals -->
88+
<rule pattern="(?i)(?:inf|infinity)\b">
89+
<token type="LiteralNumberFloat"/>
90+
</rule>
91+
<rule pattern="(?i)nan\b">
92+
<token type="LiteralNumberFloat"/>
93+
</rule>
94+
95+
<!-- Float literals with suffix (must come before decimal integers) -->
96+
<rule pattern="(?:0|[1-9][0-9]*)[fF]">
97+
<token type="LiteralNumberFloat"/>
98+
</rule>
99+
100+
<!-- Float literals -->
101+
<rule pattern="\.[0-9]+(?:[eE][+-]?[0-9]+)?[fF]?">
102+
<token type="LiteralNumberFloat"/>
103+
</rule>
104+
<rule pattern="(?:0|[1-9][0-9]*)\.[0-9]*(?:[eE][+-]?[0-9]+)?[fF]?">
105+
<token type="LiteralNumberFloat"/>
106+
</rule>
107+
<rule pattern="(?:0|[1-9][0-9]*)[eE][+-]?[0-9]+[fF]?">
108+
<token type="LiteralNumberFloat"/>
109+
</rule>
110+
111+
<!-- Hexadecimal integers -->
112+
<rule pattern="0[xX][0-9a-fA-F]+">
113+
<token type="LiteralNumberHex"/>
114+
</rule>
115+
116+
<!-- Octal integers -->
117+
<rule pattern="0[0-7]+">
118+
<token type="LiteralNumberOct"/>
119+
</rule>
120+
121+
<!-- Decimal integers -->
122+
<rule pattern="(?:0|[1-9][0-9]*)">
123+
<token type="LiteralNumberInteger"/>
124+
</rule>
125+
126+
<!-- Boolean keywords -->
127+
<rule pattern="\b(?:[Tt]rue|[Ff]alse|t|f)\b">
128+
<token type="KeywordConstant"/>
129+
</rule>
130+
131+
<!-- Strings -->
132+
<rule pattern="&#34;">
133+
<token type="LiteralStringDouble"/>
134+
<push state="double-quote"/>
135+
</rule>
136+
<rule pattern="&#39;">
137+
<token type="LiteralStringSingle"/>
138+
<push state="single-quote"/>
139+
</rule>
140+
141+
<!-- Qualified names (with dots) for field paths and type URLs -->
142+
<rule pattern="[a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)+">
143+
<token type="NameNamespace"/>
144+
</rule>
145+
146+
<!-- Field names and identifiers (including enum values) -->
147+
<rule pattern="[a-zA-Z_][a-zA-Z0-9_]*">
148+
<token type="Name"/>
149+
</rule>
150+
151+
<!-- URL paths in type URLs for Any expansion -->
152+
<rule pattern="/[a-zA-Z_][a-zA-Z0-9_/.]*">
153+
<token type="NameNamespace"/>
154+
</rule>
155+
156+
<!-- Punctuation -->
157+
<rule pattern="[:;,&lt;&gt;\[\]{}]">
158+
<token type="Punctuation"/>
159+
</rule>
160+
</state>
161+
</rules>
162+
</lexer>

lexers/testdata/txtpb.actual

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# proto-file: example/test.proto
2+
# proto-message: TestMessage
3+
4+
# Basic field assignments
5+
name: "John Smith"
6+
age: 42
7+
email: 'user@example.com'
8+
9+
# Numeric literals
10+
decimal_value: 12345
11+
octal_value: 0755
12+
hex_value: 0xDEADBEEF
13+
zero: 0
14+
15+
# Negative numbers (- is a separate token)
16+
negative_int: -42
17+
negative_float: -3.14159
18+
negative_with_space: - 100
19+
negative_scientific: -54.32E+1f
20+
21+
# Float literals
22+
pi: 3.14159
23+
scientific: 1.23e-10
24+
with_suffix: 42f
25+
decimal_suffix: 100F
26+
exp_notation: 2e5
27+
leading_dot: .5
28+
trailing_dot: 10.
29+
optional_suffix: 1.0f
30+
31+
# Special float values
32+
positive_inf: inf
33+
negative_inf: -infinity
34+
not_a_number: NaN
35+
case_insensitive: INFINITY
36+
37+
# Boolean values
38+
is_active: true
39+
is_deleted: False
40+
short_true: t
41+
short_false: f
42+
# 0, 1, 0x0, 0x1, etc. are also valid but not bools lexically
43+
44+
# String escapes
45+
escaped_string: "line1\nline2\ttab\r\nwindows\\backslash"
46+
double_quote_escape: "He said \"hello\", 'single' quote"
47+
single_quote_escape: 'She said \'good bye\', "double" quote'
48+
octal_escape: "\0\18\22\3334"
49+
hex_escape: "\x0\x1Hello\x42"
50+
unicode_escape: "\u0041\u0042"
51+
unicode_long: "\U0001f951\U0010FFFD"
52+
53+
# Message values with colon delimiter
54+
nested_message: {
55+
field1: "value1"
56+
field2: 123
57+
}
58+
59+
# Message values with angle brackets
60+
another_message <
61+
inner_field: true
62+
>
63+
64+
# Repeated fields
65+
tags: "tag1"
66+
tags: "tag2"
67+
tags: "tag3"
68+
69+
# List syntax for repeated fields
70+
numbers: [1, 2, 3, 4, 5]
71+
strings: ["first", "second", "third"]
72+
73+
# Map entries
74+
metadata { key: "version" value: "1.0" }
75+
metadata { key: "author" value: "test" }
76+
metadata: [
77+
{ key: "language" value: "proto" },
78+
{ key: "year" value: "2024" }
79+
]
80+
81+
# Extension fields
82+
[com.example.custom_field]: "extension_value"
83+
[org.example.number_ext]: 999
84+
85+
# Any expansion
86+
details {
87+
[type.googleapis.com/example.PersonDetails] {
88+
phone: "555-1234"
89+
address: "123 Main St"
90+
}
91+
}
92+
93+
# Qualified field names
94+
config.server.host: "localhost"
95+
config.server.port: 8080
96+
97+
# Group field (capitalized name)
98+
MyGroup {
99+
my_value: 42
100+
}
101+
102+
# Enum values by name
103+
status: ACTIVE
104+
priority: HIGH
105+
color: RED
106+
107+
# Enum values by number
108+
fallback_status: 1
109+
110+
# Edge cases
111+
empty_string: ""
112+
empty_message: {}
113+
empty_list: []
114+
115+
# Multiple nested messages
116+
outer {
117+
middle {
118+
inner {
119+
deep_field: "nested"
120+
}
121+
}
122+
}
123+
124+
# Mixed delimiters
125+
mixed: { field1: "a" } next: < field2: "b" >
126+
127+
# Semicolon separators (optional)
128+
with_semi: 1; another: 2;
129+
130+
# Concatenated strings
131+
long_string: "This is a very long string that "
132+
"can be split across multiple lines "
133+
"for better readability"
134+
135+
field_with_comment: 42 # Inline comment (and no new line, should be handled by ensure_nl=true)

0 commit comments

Comments
 (0)