"""Utilities for working with tokens."""fromcollections.abcimportIterable,IteratorfromdataclassesimportdataclassfromfunctoolsimportcacheimportiofromitertoolsimportpairwiseimporttokenizefromtypingimportNamedTuplefrom._utilsimportTupleNewType
[docs]classToken(NamedTuple):"""Minimal representation of a token."""type:int"""The token's :mod:`type code <token>`."""string:str"""The string representation of the token."""
[docs]classTokenTree(TupleNewType[Token]):"""A delimited sequence of tokens. :type args: Token """
[docs]@dataclass(frozen=True,slots=True)classDelimiter:"""Represents a delimiter which must be kept balanced."""open_type:int"""Type code of the opening token."""open_string:str|None"""String representation of the opening token. If ``None``, only :attr:`open_type` is checked. """close_type:int"""Type code of the closing token."""close_string:str|None"""String representation of the closing token. If ``None``, only :attr:`close_type` is checked. """
[docs]@staticmethod@cachedeffrom_token(token:Token)->'Delimiter | None':"""Attempt to create a :class:`Delimiter` from a :class:`Token`."""matchtoken:caseToken(type=tokenize.OP,string='('):returnDelimiter(*token,tokenize.OP,')')caseToken(type=tokenize.OP,string='['):returnDelimiter(*token,tokenize.OP,']')caseToken(type=tokenize.OP,string='{'):returnDelimiter(*token,tokenize.OP,'}')caseToken(type=tokenize.INDENT):returnDelimiter(tokenize.INDENT,None,tokenize.DEDENT,None)caseToken(type=tokenize.FSTRING_START):returnDelimiter(tokenize.FSTRING_START,None,tokenize.FSTRING_END,None)returnNone
[docs]defmatches_open(self,token:Token)->bool:"""Check if the given token matches the delimiter's open token."""returntoken.type==self.open_typeand(self.open_stringisNoneortoken.string==self.open_string)
[docs]defmatches_close(self,token:Token)->bool:"""Check if the given token matches the delimiter's close token."""returntoken.type==self.close_typeand(self.close_stringisNoneortoken.string==self.close_string)
[docs]deflex(source:str)->Iterator[Token]:r"""Create a simplified token stream from source code. Some simplifications are applied to make matching easier: * Semantically innert tokens, such as :data:`~token.NL` and :data:`~token.COMMENT`, are stripped. * :data:`~token.NEWLINE`\ -:data:`~token.INDENT` and :data:`~token.NEWLINE`\ -:data:`~token.DEDENT` pairs are reduced to :data:`~token.INDENT` and :data:`~token.DEDENT`, respectively. (This is reversed by :func:`desimplify`.) * :data:`~token.INDENT` and :data:`~token.NEWLINE` tokens' strings are normalized. * The trailing :data:`~token.NEWLINE` and :data:`~token.ENDMARKER` are stripped. """read_source_line=io.StringIO(source).readline# The final token will never appear as the first item in a pair, but that's okay# since the last token will be ENDMARKER, which we want to strip anyway.token_pairs=pairwise(Token(raw_token.type,raw_token.string)forraw_tokenintokenize.generate_tokens(readline=read_source_line)# NL (non-terminating newline) tokens can break up NEWLINE/DEDENT pairs, so we# remove them here, along with comments.ifraw_token.typenotin(tokenize.NL,tokenize.COMMENT))fortoken,next_tokenintoken_pairs:matchtoken,next_token:caseToken(type=tokenize.NEWLINE,string=''),_:# Omit implicit trailing NEWLINEcontinuecase(Token(type=tokenize.NEWLINE),Token(type=tokenize.INDENT|tokenize.DEDENT),):# Omit NEWLINEs before INDENTs and DEDENTs to simplify matchingcontinuecaseToken(type=tokenize.NEWLINE),_:# Normalize NEWLINEsyieldToken(tokenize.NEWLINE,'\n')caseToken(type=tokenize.INDENT),_:# Normalize INDENTsyieldToken(tokenize.INDENT,'')case_:yieldtoken
[docs]defdesimplify(tokens:Iterable[Token],*,indent:str=' ')->Iterator[Token]:"""Revert simplifications made by :func:`lex` and fix indentation. Only reverts simplifications that change semantics. """indentation_level=0fortokenintokens:matchtoken:caseToken(type=tokenize.INDENT):indentation_level+=1# Insert a NEWLINE before INDENT tokensyieldToken(tokenize.NEWLINE,'\n')# Repair indentationyieldToken(tokenize.INDENT,indent*indentation_level)caseToken(type=tokenize.DEDENT):indentation_level-=1# Insert a NEWLINE before DEDENT tokensyieldToken(tokenize.NEWLINE,'\n')yieldtokencase_:yieldtoken
[docs]defstringify(tokens:Iterable[Token])->str:"""Construct source code from a token stream."""returntokenize.untokenize(desimplify(tokens))