diff --git a/Lib/idlelib/HyperParser.py b/Lib/idlelib/HyperParser.py --- a/Lib/idlelib/HyperParser.py +++ b/Lib/idlelib/HyperParser.py @@ -6,11 +6,23 @@ """ import string -import keyword +from keyword import iskeyword from idlelib import PyParse + +# all ASCII chars that may be in an identifier +_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_") +_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)] +del _ASCII_ID_CHARS + +# all ASCII chars that may be the first char of an identifier +_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_") +_IS_ASCII_FIRST_ID_CHAR = \ + [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)] +del _ASCII_ID_FIRST_CHARS + + class HyperParser: - def __init__(self, editwin, index): "To initialize, analyze the surroundings of the given index." @@ -143,25 +155,72 @@ return beforeindex, afterindex - # Ascii chars that may be in a white space + _IS_ASCII_ID_CHAR = _IS_ASCII_ID_CHAR + _IS_ASCII_FIRST_ID_CHAR = _IS_ASCII_FIRST_ID_CHAR + + # the set of built-in identifiers which are also keywords, + # i.e. keyword.iskeyword() returns True for them + _ID_KEYWORDS = frozenset({"True", "False", "None"}) + + @classmethod + def _eat_identifier(cls, str, limit, pos): + """Given a string and pos, return the number of chars in the + identifier which ends at pos, or 0 if there is no such one. + + This ignores non-identifier eywords are not identifiers. + """ + is_ascii_id_char = cls._IS_ASCII_ID_CHAR + + # Start at the end (pos) and work backwards. + i = pos + + # Go backwards as long as the characters are valid ASCII + # identifier characters. This is an optimization, since it + # is faster in the common case where most of the characters + # are ASCII. + while i > limit and ( + ord(str[i - 1]) limit and ord(str[i - 1]) >= 128: + while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier(): + i -= 4 + if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier(): + i -= 2 + if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier(): + i -= 1 + + # The identifier candidate starts here. If it isn't a valid + # identifier, don't eat anything. At this point that is only + # possible if the first character isn't a valid first + # character for an identifier. + if not str[i:pos].isidentifier(): + return 0 + elif i limit and str[i-1] in self._id_chars: - i -= 1 - if (i