Skip to content

Commit c19fc11

Browse files
committed
Drop some compatibility and tighten up lexing
Old versions of the C tools used to ignore unhandled characters in some contexts due to sloppy lexing, which Kconfiglib emulated for compatibility (things like "---help---" used to depend on it). This was improved in the C tools by commit c2264564 ("kconfig: warn of unhandled characters in Kconfig commands"), committed in July 2015. Remove the compatibility hack and tighten up the lexing in Kconfiglib as well. It will make implementing the new preprocessor stuff smoother. The major version will be bumped.
1 parent fccfbae commit c19fc11

2 files changed

Lines changed: 15 additions & 37 deletions

File tree

kconfiglib.py

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,28 +1584,21 @@ def _tokenize(self):
15841584
# Token index (minus one). Set for later -- not further updated here.
15851585
self._tokens_i = -1
15861586

1587-
# See comment at _initial_token_re_match definition
1588-
initial_token_match = _initial_token_re_match(s)
1589-
if not initial_token_match:
1587+
# Initial token on the line
1588+
command_match = _command_re_match(s)
1589+
if not command_match:
15901590
self._tokens = (None,)
15911591
return
15921592

15931593
# Tricky implementation detail: While parsing a token, 'token' refers
15941594
# to the previous token. See _STRING_LEX for why this is needed.
1595-
token = _get_keyword(initial_token_match.group(1))
1596-
1597-
if token == _T_HELP:
1598-
# Avoid junk after "help", e.g. "---", being registered as a
1599-
# symbol
1600-
self._tokens = (token, None)
1601-
return
1602-
1595+
token = _get_keyword(command_match.group(1))
16031596
if token is None:
16041597
self._parse_error("expected keyword as first token")
16051598

16061599
self._tokens = [token]
16071600
# The current index in the string being tokenized
1608-
i = initial_token_match.end()
1601+
i = command_match.end()
16091602

16101603
# Main tokenization loop (for tokens past the first one)
16111604
while i < len(s):
@@ -1712,17 +1705,15 @@ def _tokenize(self):
17121705
self._lookup_const_sym(val)
17131706

17141707
elif c == "&":
1715-
# Invalid characters are ignored (backwards-compatible)
17161708
if i >= len(s) or s[i] != "&":
1717-
continue
1709+
self._parse_error("malformed operator")
17181710

17191711
token = _T_AND
17201712
i += 1
17211713

17221714
elif c == "|":
1723-
# Invalid characters are ignored (backwards-compatible)
17241715
if i >= len(s) or s[i] != "|":
1725-
continue
1716+
self._parse_error("malformed operator")
17261717

17271718
token = _T_OR
17281719
i += 1
@@ -1763,8 +1754,7 @@ def _tokenize(self):
17631754
token = _T_GREATER
17641755

17651756
else:
1766-
# Invalid characters are ignored (backwards-compatible)
1767-
continue
1757+
self._parse_error("invalid character in line")
17681758

17691759
# Skip trailing whitespace
17701760
while i < len(s) and s[i].isspace():
@@ -5317,6 +5307,7 @@ def _warn_choice_select_imply(sym, expr, expr_type):
53175307
# Keyword to token map, with the get() method assigned directly as a small
53185308
# optimization
53195309
_get_keyword = {
5310+
"---help---": _T_HELP,
53205311
"allnoconfig_y": _T_ALLNOCONFIG_Y,
53215312
"bool": _T_BOOL,
53225313
"boolean": _T_BOOL,
@@ -5393,25 +5384,12 @@ def _warn_choice_select_imply(sym, expr, expr_type):
53935384
# Use ASCII regex matching on Python 3. It's already the default on Python 2.
53945385
_RE_ASCII = 0 if _IS_PY2 else re.ASCII
53955386

5396-
# Note: This hack is no longer needed as of upstream commit c226456
5397-
# (kconfig: warn of unhandled characters in Kconfig commands). It
5398-
# is kept around for backwards compatibility.
5399-
#
5400-
# The initial word on a line is parsed specially. Let
5401-
# command_chars = [A-Za-z0-9_]. Then
5402-
# - leading non-command_chars characters are ignored, and
5403-
# - the first token consists the following one or more
5404-
# command_chars characters.
5405-
# This is why things like "----help--" are accepted.
5406-
#
5407-
# In addition to the initial token, the regex also matches trailing whitespace
5408-
# so that we can jump straight to the next token (or to the end of the line if
5409-
# there's just a single token).
5387+
# The initial token on a line. Also eats leading and trailing whitespace, so
5388+
# that we can jump straight to the next token (or to the end of the line if
5389+
# there is only one token).
54105390
#
5411-
# As an optimization, this regex fails to match for lines containing just a
5412-
# comment.
5413-
_initial_token_re_match = \
5414-
re.compile(r"[^A-Za-z0-9_#]*([A-Za-z0-9_]+)\s*", _RE_ASCII).match
5391+
# This regex will also fail to match for empty lines and comment lines.
5392+
_command_re_match = re.compile(r"\s*([A-Za-z0-9_-]+)\s*", _RE_ASCII).match
54155393

54165394
# Matches an identifier/keyword, also eating trailing whitespace
54175395
_id_keyword_re_match = re.compile(r"([A-Za-z0-9_/.-]+)\s*", _RE_ASCII).match

tests/Krepr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ endif
99
config BASIC
1010
bool
1111
default y
12-
help
12+
---help---
1313

1414
config VISIBLE
1515
bool "visible"

0 commit comments

Comments
 (0)