Source code for macro_polo.parse._transcribers

"""Utilities for parsing macro transcribers from source code."""

from collections.abc import Iterable, Sequence
import tokenize
from typing import cast

from .._utils import SliceView
from ..match import (
    DelimitedMacroMatcher,
    MacroMatch,
    MacroMatcher,
    MacroMatcherNegativeLookahead,
    MacroMatcherRepeater,
    MacroMatcherRepeaterMode,
    MacroMatcherVar,
    MacroMatcherVarType,
)
from ..tokens import Delimiter, Token, TokenTree, lex
from ..transcribe import (
    MacroTransciberSubstitution,
    MacroTranscriber,
    MacroTranscriberItem,
    MacroTranscriberRepeater,
)
from ._utils import DOLLAR_TOKEN, _parse_dollar_escape, _ParseResult, _replace_digraphs


_MACRO_TRANSCRIBER_SUBSTITUTION_PARSER = MacroMatcher(
    DOLLAR_TOKEN,
    MacroMatcherVar('name', MacroMatcherVarType.NAME),
)


def _parse_macro_transcriber_substitution(
    tokens: Sequence[Token],
) -> _ParseResult[MacroTransciberSubstitution] | None:
    """Try to parse a macro transcriber substitution.

    Syntax:
        `$$ $name:name`
    """
    match _MACRO_TRANSCRIBER_SUBSTITUTION_PARSER.match(tokens):
        case MacroMatch(
            size=match_size,
            captures={'name': Token(string=name)},
        ):
            return _ParseResult(
                match_size=match_size,
                value=MacroTransciberSubstitution(name),
            )

    return None


_MACRO_TRANSCRIBER_REPEATER_PARSER = MacroMatcher(
    DOLLAR_TOKEN,
    DelimitedMacroMatcher(
        delimiter=Delimiter(
            open_type=tokenize.OP,
            open_string='(',
            close_type=tokenize.OP,
            close_string=')',
        ),
        matcher=MacroMatcher(
            MacroMatcherRepeater(
                matcher=MacroMatcher(
                    MacroMatcherVar('sub_transcriber', MacroMatcherVarType.TOKEN_TREE),
                ),
                mode=MacroMatcherRepeaterMode.ZERO_OR_MORE,
            ),
        ),
    ),
    MacroMatcherRepeater(
        matcher=MacroMatcher(
            # Match any token as separator except valid repitition modes
            *(
                MacroMatcherNegativeLookahead(Token(tokenize.OP, mode.value))
                for mode in MacroMatcherRepeaterMode
            ),
            MacroMatcherVar('sep', MacroMatcherVarType.TOKEN),
        ),
        mode=MacroMatcherRepeaterMode.ZERO_OR_ONE,
    ),
    MacroMatcherVar('mode', MacroMatcherVarType.OP),
)


def _parse_macro_transcriber_repeater(
    tokens: Sequence[Token],
) -> _ParseResult[MacroTranscriberRepeater] | None:
    """Try to parse a macro matcher repeater.

    Syntax:
        `$$ ($($sub_matcher:tt)*) $($sep:token)? $mode:op` where (
            $sep not in (*,+,?)
            $mode in (*,+,?)
        )
    """
    match _MACRO_TRANSCRIBER_REPEATER_PARSER.match(tokens):
        case MacroMatch(
            size=match_size,
            captures={
                'sub_transcriber': sub_transcriber_capture,
                'sep': sep_capture,
                'mode': Token(string=mode_string),
            },
        ) if mode_string in MacroMatcherRepeaterMode:
            sub_transcriber = sum(cast(list[TokenTree], sub_transcriber_capture), ())
            sep = cast(Token, sep_capture[0]) if sep_capture else None

            return _ParseResult(
                match_size=match_size,
                value=MacroTranscriberRepeater(
                    transcriber=parse_macro_transcriber(sub_transcriber),
                    sep=sep,
                ),
            )

    return None


_PARSER_FUNCS = [
    _parse_macro_transcriber_substitution,
    _parse_macro_transcriber_repeater,
    _parse_dollar_escape,
]


def _parse_macro_transcriber_internal(tokens: Sequence[Token]) -> MacroTranscriber:
    """Parse a macro transcriber from a token sequence.

    Unlike `parse_macro_transcriber`, this function does not perform digraph substitution.
    """
    tokens = SliceView(tokens)

    pattern: list[MacroTranscriberItem] = []

    while len(tokens) > 0:
        for parser_func in _PARSER_FUNCS:
            if result := parser_func(tokens):
                tokens = tokens[result.match_size :]
                pattern.append(result.value)
                break
        else:
            pattern.append(tokens.popleft())

    return MacroTranscriber(*pattern)


[docs] def parse_macro_transcriber(source: str | Iterable[Token]) -> MacroTranscriber: """Parse a macro transcriber from source code or a token stream.""" if isinstance(source, str): source = tuple(lex(source)) tokens = tuple(_replace_digraphs(source)) return _parse_macro_transcriber_internal(tokens)