Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions Lib/lib2to3/pgen2/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def _combinations(*l):
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'[a-zA-Z_]\w*'
Name = r'\w+'

Binnumber = r'0[bB]_?[01]+(?:_[01]+)*'
Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?'
Expand Down Expand Up @@ -107,8 +107,8 @@ def _combinations(*l):
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)

tokenprog, pseudoprog, single3prog, double3prog = list(map(
re.compile, (Token, PseudoToken, Single3, Double3)))
tokenprog, pseudoprog, single3prog, double3prog = map(
re.compile, (Token, PseudoToken, Single3, Double3))

_strprefixes = (
_combinations('r', 'R', 'f', 'F') |
Expand Down Expand Up @@ -349,7 +349,6 @@ def generate_tokens(readline):
logical line; continuation lines are included.
"""
lnum = parenlev = continued = 0
namechars, numchars = string.ascii_letters + '_', '0123456789'
contstr, needcont = '', 0
contline = None
indents = [0]
Expand Down Expand Up @@ -451,7 +450,7 @@ def generate_tokens(readline):
spos, epos, pos = (lnum, start), (lnum, end), end
token, initial = line[start:end], line[start]

if initial in numchars or \
if initial in string.digits or \
(initial == '.' and token != '.'): # ordinary number
yield (NUMBER, token, spos, epos, line)
elif initial in '\r\n':
Expand Down Expand Up @@ -501,7 +500,7 @@ def generate_tokens(readline):
yield stashed
stashed = None
yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
if async_def:
yield (ASYNC if token == 'async' else AWAIT,
Expand Down
10 changes: 10 additions & 0 deletions Lib/lib2to3/tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,16 @@ def test_4(self):
self.validate("""x = {2, 3, 4,}""")


# Adapted from Python 3's Lib/test/test_unicode_identifiers.py and
# Lib/test/test_tokenize.py:TokenizeTest.test_non_ascii_identifiers
class TestIdentfier(GrammarTest):
def test_non_ascii_identifiers(self):
self.validate("Örter = 'places'\ngrün = 'green'")
self.validate("蟒 = a蟒 = 锦蛇 = 1")
self.validate("µ = aµ = µµ = 1")
self.validate("𝔘𝔫𝔦𝔠𝔬𝔡𝔢 = a_𝔘𝔫𝔦𝔠𝔬𝔡𝔢 = 1")


class TestNumericLiterals(GrammarTest):
def test_new_octal_notation(self):
self.validate("""0o7777777777777""")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix parsing non-ASCII identifiers in :mod:`lib2to3.pgen2.tokenize` (PEP 3131).