Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions Cython/Compiler/Lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,18 @@ def make_lexicon():
hexdigit = Any("0123456789ABCDEFabcdef")
indentation = Bol + Rep(Any(" \t"))

decimal = Rep1(digit)
def underscore_digits(d):
return Rep1(d) + Rep(Str("_") + Rep1(d))

decimal = underscore_digits(digit)
dot = Str(".")
exponent = Any("Ee") + Opt(Any("+-")) + decimal
decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)

name = letter + Rep(letter | digit)
intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) |
(Any("Oo") + Rep1(octdigit)) |
(Any("Bb") + Rep1(bindigit)) ))
intconst = decimal | (Str("0") + ((Any("Xx") + underscore_digits(hexdigit)) |
(Any("Oo") + underscore_digits(octdigit)) |
(Any("Bb") + underscore_digits(bindigit)) ))
intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
intliteral = intconst + intsuffix
fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
Expand Down Expand Up @@ -67,9 +70,9 @@ def make_lexicon():

return Lexicon([
(name, IDENT),
(intliteral, 'INT'),
(fltconst, 'FLOAT'),
(imagconst, 'IMAG'),
(intliteral, Method('strip_underscores', symbol='INT')),
(fltconst, Method('strip_underscores', symbol='FLOAT')),
(imagconst, Method('strip_underscores', symbol='IMAG')),
(punct | diphthong, TEXT),

(bra, Method('open_bracket_action')),
Expand Down
1 change: 1 addition & 0 deletions Cython/Compiler/Scanning.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ cdef initial_compile_time_env()

cdef class Method:
cdef object name
cdef dict kwargs
cdef readonly object __name__ # for tracing the scanner

@cython.final
Expand Down
10 changes: 8 additions & 2 deletions Cython/Compiler/Scanning.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,15 @@ def get_lexicon():

class Method(object):

def __init__(self, name):
def __init__(self, name, **kwargs):
self.name = name
self.kwargs = kwargs or None
self.__name__ = name # for Plex tracing

def __call__(self, stream, text):
return getattr(stream, self.name)(text)
method = getattr(stream, self.name)
# self.kwargs is almost always unused => avoid call overhead
return method(text, **self.kwargs) if self.kwargs is not None else method(text)


#------------------------------------------------------------------
Expand Down Expand Up @@ -340,6 +343,9 @@ def commentline(self, text):
if self.parse_comments:
self.produce('commentline', text)

def strip_underscores(self, text, symbol):
self.produce(symbol, text.replace('_', ''))

def current_level(self):
return self.indentation_stack[-1]

Expand Down
129 changes: 129 additions & 0 deletions Cython/Compiler/Tests/TestGrammar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# mode: run
# tag: syntax

"""
Uses TreeFragment to test invalid syntax.
"""

from __future__ import absolute_import

from ...TestUtils import CythonTest
from ..Errors import CompileError
from .. import ExprNodes

# Copied from CPython's test_grammar.py
VALID_UNDERSCORE_LITERALS = [
'0_0_0',
'4_2',
'1_0000_0000',
'0b1001_0100',
'0xffff_ffff',
'0o5_7_7',
'1_00_00.5',
'1_00_00.5j',
'1_00_00.5e5',
'1_00_00j',
'1_00_00e5_1',
'1e1_0',
'.1_4',
'.1_4e1',
'.1_4j',
]

# Copied from CPython's test_grammar.py
INVALID_UNDERSCORE_LITERALS = [
# Trailing underscores:
'0_',
'42_',
'1.4j_',
'0b1_',
'0xf_',
'0o5_',
# Underscores in the base selector:
'0_b0',
'0_xf',
'0_o5',
# Underscore right after the base selector:
'0b_0',
'0x_f',
'0o_5',
# Old-style octal, still disallowed:
#'0_7',
#'09_99',
# Special case with exponent:
'0 if 1_Else 1',
# Underscore right before a dot:
'1_.4',
'1_.4j',
# Underscore right after a dot:
'1._4',
'1._4j',
'._5',
# Underscore right after a sign:
'1.0e+_1',
# Multiple consecutive underscores:
'4_______2',
'0.1__4',
'0b1001__0100',
'0xffff__ffff',
'0o5__77',
'1e1__0',
# Underscore right before j:
'1.4_j',
'1.4e5_j',
# Underscore right before e:
'1_e1',
'1.4_e1',
# Underscore right after e:
'1e_1',
'1.4e_1',
# Whitespace in literals
'1_ 2',
'1 _2',
'1_2.2_ 1',
'1_2.2 _1',
'1_2e _1',
'1_2e2 _1',
'1_2e 2_1',
]


class TestGrammar(CythonTest):

def test_invalid_number_literals(self):
for literal in INVALID_UNDERSCORE_LITERALS:
for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']:
code = 'x = ' + expression % literal
try:
self.fragment(u'''\
# cython: language_level=3
''' + code)
except CompileError as exc:
assert code in [s.strip() for s in str(exc).splitlines()], str(exc)
else:
assert False, "Invalid Cython code '%s' failed to raise an exception" % code

def test_valid_number_literals(self):
for literal in VALID_UNDERSCORE_LITERALS:
for i, expression in enumerate(['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']):
code = 'x = ' + expression % literal
node = self.fragment(u'''\
# cython: language_level=3
''' + code).root
assert node is not None

literal_node = node.stats[0].rhs # StatListNode([SingleAssignmentNode('x', expr)])
if i > 0:
# Add/MulNode() -> literal is first or second operand
literal_node = literal_node.operand2 if i % 2 else literal_node.operand1
if 'j' in literal or 'J' in literal:
assert isinstance(literal_node, ExprNodes.ImagNode)
elif '.' in literal or 'e' in literal or 'E' in literal and not ('0x' in literal or '0X' in literal):
assert isinstance(literal_node, ExprNodes.FloatNode)
else:
assert isinstance(literal_node, ExprNodes.IntNode)


if __name__ == "__main__":
import unittest
unittest.main()
1 change: 1 addition & 0 deletions runtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2100,3 +2100,4 @@ def runtests(options, cmd_args, coverage=None):
except PendingThreadsError:
# normal program exit won't kill the threads, do it the hard way here
flush_and_terminate(1)
sys.exit(1)
21 changes: 21 additions & 0 deletions tests/run/int_literals.pyx
Original file line number Diff line number Diff line change
@@ -1,9 +1,30 @@
# mode: run
# tag: syntax

from __future__ import absolute_import

cimport cython
from cython cimport typeof

import sys


def valid_underscore_literals():
"""
>>> valid_underscore_literals()
"""
# Copied from CPython's test_grammar.py
assert 0_0_0 == 0
assert 4_2 == 42
assert 1_0000_0000 == 100000000
assert 0b1001_0100 == 0b10010100
assert 0xffff_ffff == 0xffffffff
assert 0o5_7_7 == 0o577
assert 1_00_00.5 == 10000.5
assert 1e1_0 == 1e10
assert .1_4 == .14


@cython.test_assert_path_exists(
'//IntNode[@longness = "LL"]',
'//IntNode[@longness = "L"]',
Expand Down