diff --git a/muppet/parser_combinator.py b/muppet/parser_combinator.py index 06ff8da389c2a2276a04c36eb7daa6e591c2f15a..cc549f51984a897dbf2fd36e239597a738ea07ad 100644 --- a/muppet/parser_combinator.py +++ b/muppet/parser_combinator.py @@ -188,6 +188,7 @@ from typing import ( TypeVar, ) import logging +from muppet.util import string_around, len_before logger = logging.getLogger(__name__) @@ -218,14 +219,48 @@ class ParseError(Exception): This should only appear with optional fields, since we don't know if the next token is the "expected" one. It should be captured internally by all exported procedures and methods. + + :param msg: + Free-text message describing the error. + :param pos: + The position, counted in characters, from the beginning of the + string. Zero indexed. + :param src: + The string which contains the position of the error. + :param stk: + Stack trace of previous parsers. Recommended usage is + + .. code-block:: python + try: + ... + except ParseError as e: + e.stk.append(item_used_in_block) + raise e """ msg: Optional[str] = None + pos: Optional[int] = None + src: Optional[str] = None stk: list['Items'] = field(default_factory=list) def __str__(self) -> str: - s = f"{self.msg}\nTraceback of called parsers:\n" + s = '' + if self.msg: + s += self.msg + if self.pos: + s += f' (pos {self.pos})\n' + preview_back = 10 + preview_front = 10 + if self.src: + s += "|" + string_around(self.src, self.pos, preview_back, preview_front) + s += '\n' + s += "|" + ' ' * len_before(self.src, self.pos, preview_back) + s += '^' + s += '\n' + if self.msg or self.pos: + s += '\n' + s += "Traceback of called parsers:\n" for item in self.stk: s += '• ' item = str(item) @@ -313,7 +348,7 @@ class not_(ParseDirective): parser.restore(snapshot) # The parser failed, meaning that we "suceeded" return [] - raise ParseError() + raise parser.error() def __repr__(self) -> str: return f'~ {self.form}' @@ -321,7 +356,25 @@ class not_(ParseDirective): @dataclass class name(ParseDirective): - """Attach a name to a parser, purely for debugging.""" + r""" + Wrap a parser into a new "primitive" item. + + The resulting parser works exactly as the given parser, but in + when instead of displaying the parsers components when printing + it, instead only the name will be shown. + + :param name: + New name to show + :param form: + Actual parser. + + .. code-block:: python + + >>> space = s(' ') | '\t' | '\n' | '\r' + >>> ws = name('ws', many(space)) + >>> print(ws) + [ws] + """ name: str form: 'Items' @@ -397,7 +450,7 @@ class or_(ParseDirective): parser.restore(save) else: msg = f"No alternative suceeded, cases={self.alternatives}, seek={parser.seek}" - raise ParseError(msg) + raise parser.error(msg) def __or__(self, other: Any) -> ParseDirective: if isinstance(other, or_): @@ -598,9 +651,9 @@ class complement(ParseDirective): if c not in self.chars: return parser.get(char) else: - raise ParseError(msg=f"{c} in {self.chars}") + raise parser.error(f"{c} in {self.chars}") case it: - raise ParseError(msg=f"Parsed item wasn't a char: {it!r}") + raise parser.error(f"Parsed item wasn't a char: {it!r}") def __repr__(self) -> str: return f'complement({repr(self.chars)})' @@ -702,7 +755,7 @@ class ParserCombinator: try: out = self.__source[self.seek] except IndexError: - raise ParseError("End of string") + raise self.error("End of string") self.seek += 1 return out @@ -742,7 +795,7 @@ class ParserCombinator: self.seek += len(s) out += [s] else: - raise ParseError(f'Expected {item!r}, got {substr!r} (char {self.seek})') + raise self.error(f'Expected {item!r}, got {substr!r}') case None: pass @@ -835,6 +888,21 @@ class ParserCombinator: """ return self.__source[self.seek:][:max_len] + def error(self, msg: Optional[str] = None) -> ParseError: + """ + Return a fresh ParseError. + + This factory method exists since most ParseErrors should have + their ``pos`` and ``src`` parameters set, but setting them + manually is cumbersome. + + :param msg: + Message passed to the ParseError. + :return: + A new ParseError, suitable to be directly raised. + """ + return ParseError(msg=msg, pos=self.seek, src=self.__source) + T = TypeVar('T')