@@ -1584,28 +1584,21 @@ def _tokenize(self):
15841584 # Token index (minus one). Set for later -- not further updated here.
15851585 self ._tokens_i = - 1
15861586
1587- # See comment at _initial_token_re_match definition
1588- initial_token_match = _initial_token_re_match (s )
1589- if not initial_token_match :
1587+ # Initial token on the line
1588+ command_match = _command_re_match (s )
1589+ if not command_match :
15901590 self ._tokens = (None ,)
15911591 return
15921592
15931593 # Tricky implementation detail: While parsing a token, 'token' refers
15941594 # to the previous token. See _STRING_LEX for why this is needed.
1595- token = _get_keyword (initial_token_match .group (1 ))
1596-
1597- if token == _T_HELP :
1598- # Avoid junk after "help", e.g. "---", being registered as a
1599- # symbol
1600- self ._tokens = (token , None )
1601- return
1602-
1595+ token = _get_keyword (command_match .group (1 ))
16031596 if token is None :
16041597 self ._parse_error ("expected keyword as first token" )
16051598
16061599 self ._tokens = [token ]
16071600 # The current index in the string being tokenized
1608- i = initial_token_match .end ()
1601+ i = command_match .end ()
16091602
16101603 # Main tokenization loop (for tokens past the first one)
16111604 while i < len (s ):
@@ -1712,17 +1705,15 @@ def _tokenize(self):
17121705 self ._lookup_const_sym (val )
17131706
17141707 elif c == "&" :
1715- # Invalid characters are ignored (backwards-compatible)
17161708 if i >= len (s ) or s [i ] != "&" :
1717- continue
1709+ self . _parse_error ( "malformed operator" )
17181710
17191711 token = _T_AND
17201712 i += 1
17211713
17221714 elif c == "|" :
1723- # Invalid characters are ignored (backwards-compatible)
17241715 if i >= len (s ) or s [i ] != "|" :
1725- continue
1716+ self . _parse_error ( "malformed operator" )
17261717
17271718 token = _T_OR
17281719 i += 1
@@ -1763,8 +1754,7 @@ def _tokenize(self):
17631754 token = _T_GREATER
17641755
17651756 else :
1766- # Invalid characters are ignored (backwards-compatible)
1767- continue
1757+ self ._parse_error ("invalid character in line" )
17681758
17691759 # Skip trailing whitespace
17701760 while i < len (s ) and s [i ].isspace ():
@@ -5317,6 +5307,7 @@ def _warn_choice_select_imply(sym, expr, expr_type):
53175307# Keyword to token map, with the get() method assigned directly as a small
53185308# optimization
53195309_get_keyword = {
5310+ "---help---" : _T_HELP ,
53205311 "allnoconfig_y" : _T_ALLNOCONFIG_Y ,
53215312 "bool" : _T_BOOL ,
53225313 "boolean" : _T_BOOL ,
@@ -5393,25 +5384,12 @@ def _warn_choice_select_imply(sym, expr, expr_type):
53935384# Use ASCII regex matching on Python 3. It's already the default on Python 2.
53945385_RE_ASCII = 0 if _IS_PY2 else re .ASCII
53955386
5396- # Note: This hack is no longer needed as of upstream commit c226456
5397- # (kconfig: warn of unhandled characters in Kconfig commands). It
5398- # is kept around for backwards compatibility.
5399- #
5400- # The initial word on a line is parsed specially. Let
5401- # command_chars = [A-Za-z0-9_]. Then
5402- # - leading non-command_chars characters are ignored, and
5403- # - the first token consists the following one or more
5404- # command_chars characters.
5405- # This is why things like "----help--" are accepted.
5406- #
5407- # In addition to the initial token, the regex also matches trailing whitespace
5408- # so that we can jump straight to the next token (or to the end of the line if
5409- # there's just a single token).
5387+ # The initial token on a line. Also eats leading and trailing whitespace, so
5388+ # that we can jump straight to the next token (or to the end of the line if
5389+ # there is only one token).
54105390#
5411- # As an optimization, this regex fails to match for lines containing just a
5412- # comment.
5413- _initial_token_re_match = \
5414- re .compile (r"[^A-Za-z0-9_#]*([A-Za-z0-9_]+)\s*" , _RE_ASCII ).match
5391+ # This regex will also fail to match for empty lines and comment lines.
5392+ _command_re_match = re .compile (r"\s*([A-Za-z0-9_-]+)\s*" , _RE_ASCII ).match
54155393
54165394# Matches an identifier/keyword, also eating trailing whitespace
54175395_id_keyword_re_match = re .compile (r"([A-Za-z0-9_/.-]+)\s*" , _RE_ASCII ).match
0 commit comments