Skip to content
Snippets Groups Projects
Commit 71962647 authored by Hugo Hörnquist's avatar Hugo Hörnquist
Browse files

Improve error reporting in parser combinator.

Errors are now created through a factory, embedding information about
WHERE the error comes from in the source.

Also add a clearer indicator to the error output.
parent da65b68f
No related branches found
No related tags found
No related merge requests found
...@@ -188,6 +188,7 @@ from typing import ( ...@@ -188,6 +188,7 @@ from typing import (
TypeVar, TypeVar,
) )
import logging import logging
from muppet.util import string_around, len_before
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -218,14 +219,48 @@ class ParseError(Exception): ...@@ -218,14 +219,48 @@ class ParseError(Exception):
This should only appear with optional fields, since we don't know This should only appear with optional fields, since we don't know
if the next token is the "expected" one. It should be captured if the next token is the "expected" one. It should be captured
internally by all exported procedures and methods. internally by all exported procedures and methods.
:param msg:
Free-text message describing the error.
:param pos:
The position, counted in characters, from the beginning of the
string. Zero indexed.
:param src:
The string which contains the position of the error.
:param stk:
Stack trace of previous parsers. Recommended usage is
.. code-block:: python
try:
...
except ParseError as e:
e.stk.append(item_used_in_block)
raise e
""" """
msg: Optional[str] = None msg: Optional[str] = None
pos: Optional[int] = None
src: Optional[str] = None
stk: list['Items'] = field(default_factory=list) stk: list['Items'] = field(default_factory=list)
def __str__(self) -> str: def __str__(self) -> str:
s = f"{self.msg}\nTraceback of called parsers:\n" s = ''
if self.msg:
s += self.msg
if self.pos:
s += f' (pos {self.pos})\n'
preview_back = 10
preview_front = 10
if self.src:
s += "|" + string_around(self.src, self.pos, preview_back, preview_front)
s += '\n'
s += "|" + ' ' * len_before(self.src, self.pos, preview_back)
s += '^'
s += '\n'
if self.msg or self.pos:
s += '\n'
s += "Traceback of called parsers:\n"
for item in self.stk: for item in self.stk:
s += '' s += ''
item = str(item) item = str(item)
...@@ -313,7 +348,7 @@ class not_(ParseDirective): ...@@ -313,7 +348,7 @@ class not_(ParseDirective):
parser.restore(snapshot) parser.restore(snapshot)
# The parser failed, meaning that we "suceeded" # The parser failed, meaning that we "suceeded"
return [] return []
raise ParseError() raise parser.error()
def __repr__(self) -> str: def __repr__(self) -> str:
return f'~ {self.form}' return f'~ {self.form}'
...@@ -321,7 +356,25 @@ class not_(ParseDirective): ...@@ -321,7 +356,25 @@ class not_(ParseDirective):
@dataclass @dataclass
class name(ParseDirective): class name(ParseDirective):
"""Attach a name to a parser, purely for debugging.""" r"""
Wrap a parser into a new "primitive" item.
The resulting parser works exactly as the given parser, but in
when instead of displaying the parsers components when printing
it, instead only the name will be shown.
:param name:
New name to show
:param form:
Actual parser.
.. code-block:: python
>>> space = s(' ') | '\t' | '\n' | '\r'
>>> ws = name('ws', many(space))
>>> print(ws)
[ws]
"""
name: str name: str
form: 'Items' form: 'Items'
...@@ -397,7 +450,7 @@ class or_(ParseDirective): ...@@ -397,7 +450,7 @@ class or_(ParseDirective):
parser.restore(save) parser.restore(save)
else: else:
msg = f"No alternative suceeded, cases={self.alternatives}, seek={parser.seek}" msg = f"No alternative suceeded, cases={self.alternatives}, seek={parser.seek}"
raise ParseError(msg) raise parser.error(msg)
def __or__(self, other: Any) -> ParseDirective: def __or__(self, other: Any) -> ParseDirective:
if isinstance(other, or_): if isinstance(other, or_):
...@@ -598,9 +651,9 @@ class complement(ParseDirective): ...@@ -598,9 +651,9 @@ class complement(ParseDirective):
if c not in self.chars: if c not in self.chars:
return parser.get(char) return parser.get(char)
else: else:
raise ParseError(msg=f"{c} in {self.chars}") raise parser.error(f"{c} in {self.chars}")
case it: case it:
raise ParseError(msg=f"Parsed item wasn't a char: {it!r}") raise parser.error(f"Parsed item wasn't a char: {it!r}")
def __repr__(self) -> str: def __repr__(self) -> str:
return f'complement({repr(self.chars)})' return f'complement({repr(self.chars)})'
...@@ -702,7 +755,7 @@ class ParserCombinator: ...@@ -702,7 +755,7 @@ class ParserCombinator:
try: try:
out = self.__source[self.seek] out = self.__source[self.seek]
except IndexError: except IndexError:
raise ParseError("End of string") raise self.error("End of string")
self.seek += 1 self.seek += 1
return out return out
...@@ -742,7 +795,7 @@ class ParserCombinator: ...@@ -742,7 +795,7 @@ class ParserCombinator:
self.seek += len(s) self.seek += len(s)
out += [s] out += [s]
else: else:
raise ParseError(f'Expected {item!r}, got {substr!r} (char {self.seek})') raise self.error(f'Expected {item!r}, got {substr!r}')
case None: case None:
pass pass
...@@ -835,6 +888,21 @@ class ParserCombinator: ...@@ -835,6 +888,21 @@ class ParserCombinator:
""" """
return self.__source[self.seek:][:max_len] return self.__source[self.seek:][:max_len]
def error(self, msg: Optional[str] = None) -> ParseError:
"""
Return a fresh ParseError.
This factory method exists since most ParseErrors should have
their ``pos`` and ``src`` parameters set, but setting them
manually is cumbersome.
:param msg:
Message passed to the ParseError.
:return:
A new ParseError, suitable to be directly raised.
"""
return ParseError(msg=msg, pos=self.seek, src=self.__source)
T = TypeVar('T') T = TypeVar('T')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment