diff --git a/muppet/__main__.py b/muppet/__main__.py index cd2cfefe5284b0467bf9582cfb95b67d74b97dbc..2c0e4f2872bcf9d8994f9356ef421abc64f722b2 100644 --- a/muppet/__main__.py +++ b/muppet/__main__.py @@ -10,6 +10,11 @@ from .cache import Cache from .gather import get_module, get_modules, ModuleEntry from .output import setup_index, setup_module +# Our parser combinator system builds heavily on recursing, +# overflowing Pythons' stack. Simply cranking up the stack size solves +# this problem. +import sys +sys.setrecursionlimit(100_000) logger = logging.getLogger('muppet') logger.setLevel(logging.DEBUG) diff --git a/muppet/format.py b/muppet/format.py index 4726258cbced4df7d96df1314b6b1a88da2d632a..3cc09968314fa907eff821d3a9075cd9f0b70813 100644 --- a/muppet/format.py +++ b/muppet/format.py @@ -7,9 +7,7 @@ definition in it, and outputs a complete index.html. """ from .markdown import markdown -from subprocess import CalledProcessError import html -import sys import re from typing import ( Tuple, @@ -29,8 +27,9 @@ from .puppet.strings import ( DocStringExampleTag, ) from muppet.puppet.ast import build_ast -from muppet.puppet.format import to_string -from muppet.puppet.format.parser import ParserFormatter, ParseError +# from muppet.puppet.format import to_string +from muppet.parser_combinator import ParserCombinator, ParseError +from muppet.puppet.format.parser import ParserFormatter logger = logging.getLogger(__name__) @@ -39,6 +38,26 @@ logger = logging.getLogger(__name__) param_doc: dict[str, str] = {} + +def parse_puppet(source: str, file: str) -> str: + """ + Parse and syntax highlight the given puppet source. + + :returns: An HTML string + """ + # logger.debug("source: %a", source) + # Run the upstream puppet parser, + # then masage the tree into a usable form. + ast = build_ast(puppet_parser(source)) + # logger.info("ast: %a", ast) + # From the ast, build a parser combinator parser. + parser = ParserFormatter(source, file=file).serialize(ast) + # logger.warning("parser: %a", parser) + # Run the generatefd parser, giving us a list of match objects + match_objects = ParserCombinator(source, file).get(parser) + # logger.error("match_objects: %a", match_objects) + return '\n'.join(repr(m) for m in match_objects) + # -------------------------------------------------- @@ -126,10 +145,9 @@ def build_param_dict(docstring: DocString) -> dict[str, str]: def format_class(d_type: DefinedType | PuppetClass) -> Tuple[str, str]: """Format Puppet class.""" out = '' - name = d_type.name - logger.info("Formatting class %s", name) + logger.info("Formatting class %s", d_type.name) # print(name, file=sys.stderr) - name, body = format_docstring(name, d_type.docstring) + name, body = format_docstring(d_type.name, d_type.docstring) out += body out += '<pre class="highlight-muppet"><code class="puppet">' @@ -139,9 +157,8 @@ def format_class(d_type: DefinedType | PuppetClass) -> Tuple[str, str]: # renderer = HTMLRenderer(build_param_dict(d_type.docstring)) # out += render(renderer, data) # ------ New --------------------------------------- - ast = build_ast(puppet_parser(d_type.source)) try: - out += to_string(ParserFormatter(d_type.source).serialize(ast)) + out += parse_puppet(d_type.source, d_type.name) except ParseError as e: logger.error("Parsing %(name)s failed: %(err)s", {'name': d_type.name, 'err': e}) @@ -154,7 +171,7 @@ def format_type() -> str: return 'TODO format_type not implemented' -def format_type_alias(d_type: DataTypeAlias) -> Tuple[str, str]: +def format_type_alias(d_type: DataTypeAlias, file: str) -> Tuple[str, str]: """Format Puppet type alias.""" out = '' name = d_type.name @@ -164,14 +181,16 @@ def format_type_alias(d_type: DataTypeAlias) -> Tuple[str, str]: out += body out += '\n' out += '<pre class="highlight-muppet"><code class="puppet">' - t = puppet_parser(d_type.alias_of) - data = build_ast(t) - out += to_string(ParserFormatter(d_type.alias_of).serialize(data)) + try: + out += parse_puppet(d_type.alias_of, file) + except ParseError as e: + logger.error("Parsing %(name)s failed: %(err)s", + {'name': d_type.alias_of, 'err': e}) out += '</code></pre>\n' return title, out -def format_defined_type(d_type: DefinedType) -> Tuple[str, str]: +def format_defined_type(d_type: DefinedType, file: str) -> Tuple[str, str]: """Format Puppet defined type.""" # renderer = HTMLRenderer(build_param_dict(d_type.docstring)) out = '' @@ -182,8 +201,11 @@ def format_defined_type(d_type: DefinedType) -> Tuple[str, str]: out += body out += '<pre class="highlight-muppet"><code class="puppet">' - ast = build_ast(puppet_parser(d_type.source)) - out += to_string(ParserFormatter(d_type.source).serialize(ast)) + try: + out += parse_puppet(d_type.source, file) + except ParseError as e: + logger.error("Parsing %(name)s failed: %(err)s", + {'name': d_type.source, 'err': e}) out += '</code></pre>\n' return title, out @@ -225,7 +247,7 @@ def format_resource_type(r_type: ResourceType) -> str: return out -def format_puppet_function(function: Function) -> str: +def format_puppet_function(function: Function, file: str) -> str: """Format Puppet function.""" out = '' name = function.name @@ -245,13 +267,10 @@ def format_puppet_function(function: Function) -> str: elif t == 'puppet': out += '<pre class="highlight-muppet"><code class="puppet">' try: - # source = parse_puppet(function.source) - # out += str(build_ast(source)) - ast = build_ast(puppet_parser(function.source)) - out += to_string(ParserFormatter(function.source).serialize(ast)) - except CalledProcessError as e: - print(e, file=sys.stderr) - print(f"Failed on function: {name}", file=sys.stderr) + out += parse_puppet(function.source, file) + except ParseError as e: + logger.error("Parsing %(name)s failed: %(err)s", + {'name': function.source, 'err': e}) out += '</code></pre>\n' else: diff --git a/muppet/gather.py b/muppet/gather.py index eb9c6f2e44e9376fba47af122f0256ca9ab0ea53..3152562f2e748b47ee575e7f0c12ba6a565524ae 100644 --- a/muppet/gather.py +++ b/muppet/gather.py @@ -17,6 +17,10 @@ import hashlib from glob import glob from .puppet.strings import puppet_strings, PuppetStrings from .cache import Cache +import logging + + +logger = logging.getLogger(__name__) @dataclass @@ -73,7 +77,10 @@ def get_puppet_strings(cache: Cache, path: str) -> Optional[PuppetStrings]: # TODO actually run puppet strings again. # This is just since without a metadata.json we always get a # cache miss, which is slow. - # return puppet_strings(path) + logger.info("Running 'puppet-strings %s'", + os.path.basename(path)) + result = puppet_strings(path) + return PuppetStrings.from_json(json.loads(result)) return None # try: diff --git a/muppet/output.py b/muppet/output.py index fe1be7d8ea3f68b1c697309109410e383e926673..08239c91471434a1917fdeadefcdfed81609eb35 100644 --- a/muppet/output.py +++ b/muppet/output.py @@ -464,7 +464,7 @@ def setup_module_index(*, with open(os.path.join(base, 'index.html'), 'w') as f: f.write(templates.module_index( module_name=module.name, - module_author=module.metadata['author'], + module_author='TODO', # module.metadata['author'], breadcrumbs=crumbs, content=content, path_base=path_base, @@ -495,6 +495,7 @@ def setup_module(base: str, module: ModuleEntry, *, path_base: str) -> None: path = os.path.join(base, module.name) pathlib.Path(path).mkdir(exist_ok=True) if not module.strings_output: + logger.warning("No strings output for %s", module.name) return data = module.strings_output @@ -570,7 +571,7 @@ def setup_module(base: str, module: ModuleEntry, *, path_base: str) -> None: # with open(os.path.join(dir, 'source.json'), 'w') as f: # json.dump(type_alias, f, indent=2) - title, body = format_type_alias(type_alias) + title, body = format_type_alias(type_alias, type_alias.name) with open(os.path.join(dir, 'index.html'), 'w') as f: f.write(templates.code_page( title=title, diff --git a/muppet/parser_combinator.py b/muppet/parser_combinator.py index 439b3ef7c783a359be0aa400893826d86f6d52c5..e0d1a3b873bf0c90c21f5d314bda462111e8a8a5 100644 --- a/muppet/parser_combinator.py +++ b/muppet/parser_combinator.py @@ -1,4 +1,180 @@ -"""A basic parser combinator for Python.""" +""" +YAPC - Yet Another Parser Combinator. + +This module implements a simple, and probably rather bad parser +combinator. + +At its core is the ``ParserCombinator`` class, which is the driver for +the whole process. A sample usage is as follows: + +.. code-block:: python + + filename = 'example.ext' + with open(filename, 'r') as f: + data = f.read() + + parser = ParserCombinator(data, file=filename) + + parser.get(grammar) + +Where ``grammar`` is a parser parser combinator rule. + +All rules stem from the compound type ``Items`` (TODO which should be +renamed). The types of valid rules are: + +- string literals, which match themselves +- ``None``, which matches nothing, and return nothing +- ParseDirective instances, see below +- Nullary functions, returing parser instances. This is purely for + lazy evaluation. + + .. _sequence: + +- Sequences of the above, interpreted as all the items after each + other. Note that if earlier elements succeed, but later fails, then + an exception will still be thrown, and the earlier parts discarded. + See also: :ref:`and <and>` + +``ParseDirective`` objects are the extension mechanism for this parser. + +Rules +===== + +Each parser MUST implement a ``run`` method, which should take an +instance of the parser object, and attempt to read characters from it. +Characters can be read from the parser instance through the ``read`` +method. A couple of other usefull methods are available, see +:py:class:`ParserCombinator`. + +A parser should either return a list of ``MatchObject``:s of what it +parsed, or throw a ``ParseError`` exception if the current parser +isn't applicable. These errors SHOULD be catched, since they should be +treated as non fatal. For exmaple + +.. code-block:: python + + literal = integer | string + +will be parsed by first trynig the integer parser, which SHOULD fail +if the input isn't a valid integer, and then try a string literal. If +neither matches, then the whole (``literal``) expression SHOULD throw. + +Built in parsers +================ + +However, usually when constructing parser you instead want to build +from the "primitives" provided by this module. These include: + +``optional(P)`` + Either matches the given parser, or matches nothing. + +``count(P, n)`` + Matches exactly *n* instances of the parser *P*. + +``discard(P)`` + Matches the parser *P*, but returns no matches. + +``char`` + Matches any single character. This is the most basic parser possible. + +``nop`` + Matches nothing. This is only useful as part of more complex parser. + +``many(P)`` + Consume tokens until the given parser doesn't match any more, this + means 0 or more. + +``many1`` + Like ``many``, but at least one. Equivalent to ``P & many(P)`` + +``complement(P)`` + Parses anything except the given parser. Mainly useful together + with ``all_``. + +``delimited(P1, P2)`` + Parses a list of *P2*:s, delimited by the parser *P1*. A good + usage could be: + + .. code-block:: python + + delimited(ws & "," & ws, integer) + + which would match a list of integers, separated by commas (and + surrounding space). + +``digit`` + Matches any one digit. + +``hexdig`` + Matches any one hexadecimal digit. + +``space`` + Matches a single ASCII whitespace character. + +``ws`` + Matches any sequence of ``space``. + +``all_(P ...)`` + Starting from the same point, run each parser in order. If all of + them succeed, return the result of the last ran parser. + +Meta commands +------------- + +Besides these, a few "meta" commands exists, these include: + +``s(any)`` + Which wraps its argument in a parser combinator object. Primarily + useful for the operators (see below). + +``name(name, P)`` + Attaches a name to the given parser. This causes the ``repr`` of + the given object to the the given name. + +``tag(tag, P)`` + Attaches the given tag to the parsed object. This is usefull to + quickly figure out what kind of object was parsed. The following + example is a good idea of how to implement an integer parser. + + .. code-block:: python + + integer = tag("integer", many1(number)) + + When combining ``tag`` and ``name``, it's recommended to have + ``name`` as the outermost, since otherwise the tag's ``repr`` + would still be visible. + + +Operators +--------- + +Finally, the base parser combinator class defines a few operators +(which mostly also are available as methods). These are: + +.. _and: +``P1 & P2`` or ``and_(P1, P2)`` + Creates a new combinator, which matches if *P1* first matches, + and *P2* matches wherever *P1* ends. + + Note that this is an all or nothing deal, in contrast to sequences + from lists, which may fail partway through. + + See also: :ref:`sequence <sequence>` + +``P1 | P2`` or ``or_(P1, P2)``) + Creates a new combinator, which matches either *P1* or *P2*, + returning the relut from the one matching. + +``P @ f`` + Matches (or fails) *P* as usuall. However, if it matches, then the + result will be passed through the procedure *f*, and its result + will become part of the parsed tree instead of the initial result + of *P*. + +``~ P`` or ``not_(P)`` + Creates a combinator which matches the complement of *P*, meaning + that this parser will be successfull if *P* fails. +""" # import html from dataclasses import dataclass, field @@ -9,66 +185,52 @@ from typing import ( Sequence, TypeAlias, Union, + TypeVar, ) +import logging -@dataclass +logger = logging.getLogger(__name__) + + +@dataclass(kw_only=True) class MatchObject: """A matched item, similar to a regex match.""" # start: int # end: int + + matched: Any + + def __init__(self) -> None: + raise TypeError("Can't instanciate MatchObject directly") + + +@dataclass(kw_only=True) +class MatchCompound(MatchObject): + """An advanced matched object.""" + type: str - matched: str | list['MatchObject'] - - def __init__(self, type: str, matched: str | list['MatchObject']): - self.type = type - self.matched = matched - # logger.debug(repr(self)) - - # def __str__(self) -> str: - # match self.matched: - # case str(s): - # return s - # case xs: - # return ''.join(str(s) for s in xs) - - # def __repr__(self) -> str: - # match self: - # case MatchObject('', str(s)): - # return repr(s) - # case _: - # return f'MatchedObject({repr(self.type)}, {repr(self.matched)})' - - # def serialize(self) -> str: - # """Seralize into HTML.""" - # match self.matched: - # case str(s): - # if self.type: - # return f'<span class="{self.type}">{html.escape(s)}</span>' - # else: - # return html.escape(s) - # case _: - # out = '' - # if self.type: - # out += f'<span class="{self.type}">' - # for item in self.matched: - # out += item.serialize() - # if self.type: - # out += '</span>' - # return out - - -def stringify_match(items: list[MatchObject]) -> str: - """Serialize a list of match objects back into their string form.""" - out: str = '' - for item in items: - match item.matched: - case str(s): - out += s - case other: - out += stringify_match(other) - return out + # matched: list['MatchObject'] + + # def __init__(self, type: str, matched: str | list['MatchObject']): + # self.type = type + # self.matched = matched + # # logger.debug(repr(self)) + + def __repr__(self) -> str: + if self.type: + return f'`{self.type}({self.matched!r})`' + else: + return f'MatchObject({self.matched!r}))' + + +@dataclass(kw_only=True) +class MatchLiteral(MatchObject): + """A matched string.""" + + def __repr__(self) -> str: + return f's({repr(self.matched)})' @dataclass @@ -88,7 +250,13 @@ class ParseError(Exception): def __str__(self) -> str: s = f"{self.msg}\nTraceback of called parsers:\n" for item in self.stk: - s += f'• {item}\n' + s += '• ' + item = str(item) + if len(item) > 60: + s += item[:60] + '…' + else: + s += item + s += '\n' return s @@ -99,6 +267,8 @@ class ParseDirective: This is used for optional parsers, alternative parsers, and the like. """ + handler: Optional[Callable[[list[Any]], list[Any]]] = None + def run(self, parser: 'ParserCombinator') -> list[MatchObject]: """ Execute this directive. @@ -114,12 +284,20 @@ class ParseDirective: def __or__(self, other: Any) -> 'ParseDirective': return or_(self, other) + def __matmul__(self, proc: Any) -> 'ParseDirective': + self.handler = proc + return self + def __invert__(self) -> 'ParseDirective': + return not_(self) + + +# TODO rename this Items: TypeAlias = Union[ str, None, ParseDirective, - Callable[[], list[MatchObject]], + Callable[[], 'Items'], Sequence['Items'] ] @@ -135,9 +313,34 @@ class s(ParseDirective): s: Items - def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102): + def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 return parser.get(self.s) + def __repr__(self) -> str: + return repr(self.s) + + +@dataclass +class not_(ParseDirective): + """Succeeds if the given directive fails.""" + + form: Items + + def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 + snapshot = parser.snapshot() + try: + # if this succeeds, then we want to fail + # but if it fails then we want to suceed + parser.get(self.form) + except ParseError: + parser.restore(snapshot) + # The parser failed, meaning that we "suceeded" + return [] + raise ParseError() + + def __repr__(self) -> str: + return f'~ {self.form}' + @dataclass class name(ParseDirective): @@ -152,24 +355,26 @@ class name(ParseDirective): def __repr__(self) -> str: return f'{self.name}' - def __str__(self) -> str: - return repr(self) - - -@dataclass -class optional(ParseDirective): - """An optional parameter.""" - form: 'Items' +# @dataclass +# class optional(ParseDirective): +# """An optional parameter.""" +# +# form: 'Items' +# +# def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 +# try: +# return parser.get(self.form) +# except ParseError: +# return [] +# +# def __repr__(self) -> str: +# return f'optional({repr(self.form)})' - def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 - try: - return parser.get(self.form) - except ParseError: - return [] - def __repr__(self) -> str: - return f'optional({repr(self.form)})' +def optional(parser: ParseDirective) -> ParseDirective: + """Optionally parse parameter.""" + return parser | nop @dataclass @@ -185,15 +390,25 @@ class and_(ParseDirective): snapshot = parser.snapshot() out = [] try: + # print("self.items", self.items) for item in self.items: - out += parser.get(item) + # print("item=", item) + result = parser.get(item) + # print("result=", result) + out += result except ParseError as e: parser.restore(snapshot) raise e return out + def __and__(self, other: Any) -> ParseDirective: + if isinstance(other, and_): + return and_(*self.items, *other.items) + else: + return and_(*self.items, other) + def __repr__(self) -> str: - return f'and_({", ".join(repr(x) for x in self.items)})' + return f'({" & ".join(repr(x) for x in self.items)})' @dataclass @@ -223,8 +438,15 @@ class or_(ParseDirective): msg = f"No alternative suceeded, cases={self.alternatives}, seek={parser.seek}" raise ParseError(msg) + def __or__(self, other: Any) -> ParseDirective: + if isinstance(other, or_): + return or_(*self.alternatives, *other.alternatives) + else: + return or_(*self.alternatives, other) + def __repr__(self) -> str: - return f'or_({", ".join(repr(x) for x in self.alternatives)})' + # return f'or_({", ".join(repr(x) for x in self.alternatives)})' + return f'({" | ".join(repr(x) for x in self.alternatives)})' @dataclass @@ -304,12 +526,32 @@ class count(ParseDirective): return f'count({repr(self.parser)}, {self.min}, {self.max})' +# @dataclass +# class discard(ParseDirective): +# """Run parser, but discard the result.""" +# +# parser: Items +# +# def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 +# parser.get(self.parser) +# return [] + + +def discard(parser: Items) -> ParseDirective: + """Run parser, but discard the result.""" + return s(parser) @ (lambda _: []) + + @dataclass class CharParser(ParseDirective): """Parse a single character.""" def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 - out = [MatchObject('', parser.__source[parser.seek])] + out: list[MatchObject] + try: + out = [MatchLiteral(matched=parser._ParserCombinator__source[parser.seek])] + except IndexError: + raise ParseError("End of string") parser.seek += 1 return out @@ -321,26 +563,83 @@ char = CharParser() @dataclass -class many(ParseDirective): - """Many a parser as many times as possible.""" +class all_(ParseDirective): + """ + Run each parser in succession from the same point, returning the final result. - parser: 'Items' + .. code-block:: python + + all_(~space, char) + """ + + parsers: list[Items] + + def __init__(self, *parsers: Items) -> None: + self.parsers = list(parsers) def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 - out = [] - try: - while True: - entry = parser.get(self.parser) - if not entry: - raise ParseError() - # logger.info("seek = %s, parser = %s", - # parser.seek, self.parser) - out += entry - except ParseError: - return out + match self.parsers: + case [*ps, last]: + for p in ps: + snapshot = parser.snapshot() + parser.get(p) + parser.restore(snapshot) + return parser.get(last) + case []: + return [] + raise ValueError(f"This shoudn't be reachable: {self!r}") + + +@dataclass +class NopParser(ParseDirective): + """Parses nothing.""" + + def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 + return [] def __repr__(self) -> str: - return f'many({repr(self.parser)})' + return 'nop' + + +nop = NopParser() + + +# @dataclass +# class many(ParseDirective): +# """Many a parser as many times as possible.""" +# +# parser: 'Items' +# +# def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 +# out = [] +# try: +# while True: +# entry = parser.get(self.parser) +# if not entry: +# raise ParseError() +# # logger.info("seek = %s, parser = %s", +# # parser.seek, self.parser) +# out += entry +# except ParseError: +# return out +# +# def __repr__(self) -> str: +# return f'many({repr(self.parser)})' + + +# This is a "nicer" looking implementation of ``many``, but it will +# probably blow up the call stack. + + +def many(parser: Items) -> ParseDirective: + """Match a parser as many times as possible.""" + return (s(parser) & name('<rest>', lambda: many(parser))) @ (lambda xs: [xs[0], *xs[1:]]) \ + | nop + + +def many1(parser: ParseDirective) -> ParseDirective: + """Parse between 1 any many.""" + return parser & many(parser) @dataclass @@ -351,7 +650,7 @@ class complement(ParseDirective): def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 match parser.peek(char): - case [MatchObject(matched=str(c))]: + case [MatchLiteral(matched=c)]: if c not in self.chars: return parser.get(char) else: @@ -372,11 +671,31 @@ class tag(ParseDirective): def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 result = parser.get(self.parser) - return [MatchObject(self.tag, result)] - - -@dataclass -class delimited(ParseDirective): + return [MatchCompound(type=self.tag, matched=result)] + + +# @dataclass +# class delimited(ParseDirective): +# """ +# Read an infix delimited list of items. +# +# If a optional trailing "comma" is wanted: +# +# .. code-block:: python +# +# and_(delimited(and_(ws, ',', ws), +# ITEM), +# optional(ws, ',')) +# """ +# +# delim: 'Items' +# parser: 'Items' +# +# def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 +# return parser.get(and_(self.parser, many(and_(self.delim, self.parser)))) + + +def delimited(delim: Items, parser: Items) -> ParseDirective: """ Read an infix delimited list of items. @@ -388,26 +707,24 @@ class delimited(ParseDirective): ITEM), optional(ws, ',')) """ + return s(parser) & many(s(delim) & s(parser)) - delim: 'Items' - parser: 'Items' - - def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 - return parser.get(and_(self.parser, many(and_(self.delim, self.parser)))) +digit = name('digit', or_(*(chr(x + ord('0')) for x in range(0, 10)))) hexdig = name('hexdig', - or_(*([chr(x + ord('0')) for x in range(0, 10)] - + [chr(x + ord('A')) for x in range(0, 6)]))) + digit + | or_(*(chr(x + ord('A')) for x in range(0, 6))) + | or_(*(chr(x + ord('a')) for x in range(0, 6)))) + +space = s(' ') | '\t' | '\n' | '\r' + +ws = name('ws', many(space)) -space = or_(' ', '\t', '\n', '\r') -ws = tag('ws', - name('ws', - # TODO tagging space with 'ws' locks us into an infinite loop - many(or_(many(tag('space', space)), - and_(tag('comment', and_('#', many(complement('\n')))), - '\n'))))) +def line_comment(start: str) -> ParseDirective: + """Match a line comment, from start delim to end of line.""" + return tag('line-comment', and_(start, many(complement('\n')), '\n')) class ParserCombinatorSnapshot: @@ -432,59 +749,133 @@ class ParserCombinator: :param source: The string which should be parsed. :param file: - Optional name of the file being parsed. + Optional name of the file being parsed. Only used for debug purposes. """ - # Required fields - # __source: str - # __seek: int = 0 - - # # Fields useful for better debugging info - # file: Optional[str] = None - - def __init__(self, source: str, file: Optional[str] = None): + # def __init__(self, source: str, file: Optional[str] = None): + def __init__(self, source: str, file: str): self.__source = source self.seek = 0 self.file = file def peek(self, item: Items) -> list[MatchObject]: """Run the parser without updating the state.""" + # TODO on error? snapshot = self.snapshot() result = self.get(item) self.restore(snapshot) return result - def get(self, item: Items) -> list[MatchObject]: - """Like get, but dosn't seek through whitespace or comments.""" - out: list[MatchObject] = [] + def remaining(self) -> str: + """Return remaining, unparsed, string.""" + return self.__source[self.seek:] + + def get(self, item: Items) -> list[Any]: + """ + Try parsing the next item in stream as the passed on parser. + + :param item: + Parser to run next. + :returns: + If the parsing suceeded, the list of matched objects are + returned, and the interal stream is updated. + + :throws ParseError: + If parsing failed, then a ParseError is thrown. However, + if any tokens had been parsed that far then the internal + stream is still upadted, and the matched objects are lost. + """ + # logger.debug("item: %a", item) + out: list[Any] = [] + # Step 1: decode our parser, and run it + logger.debug("Running parser %a (%a:%a)", item, self.file, self.seek) try: match item: case [*entries]: for entry in entries: out += self.get(entry) + case str(s): substr = self.__source[self.seek:][:len(s)] # Always case fold when matching if substr.lower() == s.lower(): self.seek += len(s) - out += [MatchObject('', s)] + out += [MatchLiteral(matched=s)] else: raise ParseError(f'Expected {item!r}, got {substr!r} (char {self.seek})') + case None: pass + case ParseDirective(): out += item.run(self) - # TODO case Puppet(): + case other: if callable(item): - out += item() + out += self.get(item()) else: raise ValueError(f"Unexpected item: {other}") + except ParseError as e: e.stk.append(item) raise e + + out = self.__merge_matches(out) + + # Parse directives can have a handler attached to them. + # This handler is run on the matched result, and should return + # a new result. + if isinstance(item, ParseDirective) and (f := item.handler): + # print('Running handler:', f, merged) + result = f(out) + # print("Result:", repr(result)) + out = result + + # if isinstance(merged, MatchLiteral): + # merged = merged.matched + + # if not isinstance(merged, list): + # return [merged] + # else: return out + def __merge_matches(self, entries: list[Any]) -> list[Any]: + """ + Attempt to merge similar match nodes into larger nodes. + + For example, a list of matched strings could look like + + .. code-block:: python + + ['H', 'e', 'll', 'o'] + + This would merge it into + + .. code-block:: python + + ['Hello'] + + If any token which can't be merged appears in the list, simply + return the original list. + """ + fixed_entries = [] + s = '' + for entry in entries: + match entry: + case MatchLiteral(matched=c): + s += c + case str(c): + s += c + case _: + if s != '': + fixed_entries.append(MatchLiteral(matched=s)) + fixed_entries.append(entry) + s = '' + if s != '': + fixed_entries.append(MatchLiteral(matched=s)) + + return fixed_entries + def snapshot(self) -> ParserCombinatorSnapshot: """Create a snapshot of the parsers current state.""" return ParserCombinatorSnapshot( @@ -505,3 +896,15 @@ class ParserCombinator: enough characters left """ return self.__source[self.seek:][:max_len] + + +T = TypeVar('T') + + +def const(x: T) -> T: + """ + Return the value directly. + + Useful for handlers. + """ + return x diff --git a/muppet/puppet/format/__init__.py b/muppet/puppet/format/__init__.py index d30eb7d3977dde8d3e852a9e412753bfe661c5ef..963bceb45ec750c64b7f02f24ce34d0695632645 100644 --- a/muppet/puppet/format/__init__.py +++ b/muppet/puppet/format/__init__.py @@ -1,7 +1,6 @@ """Fromat Puppet AST's into something useful.""" from .base import Serializer -from muppet.parser_combinator import MatchObject from muppet.puppet.ast import Puppet from typing import Any, TypeVar @@ -14,13 +13,12 @@ def serialize(ast: Puppet, serializer: type[Serializer[T]]) -> T: return serializer().serialize(ast) +# TODO is this even used? Remove it? def to_string(t: Any) -> str: """Turn a serialized structure into a string.""" match t: case str(s): return s - case MatchObject(): - return t.serialize() case [*xs]: return ''.join(to_string(x) for x in xs) case _: diff --git a/muppet/puppet/format/base.py b/muppet/puppet/format/base.py index b24fd3346f3f2729f3d3e3604be44306466294d6..df3fd1ab19c32a3b27adbcdc5a50bef747820665 100644 --- a/muppet/puppet/format/base.py +++ b/muppet/puppet/format/base.py @@ -177,6 +177,7 @@ class Serializer(Generic[T]): @final def serialize(self, form: Puppet) -> T: """Dispatch depending on type.""" + # logger.debug("Serializing %s", form) match form: case PuppetLiteral(): return self._puppet_literal(form) diff --git a/muppet/puppet/format/parser.py b/muppet/puppet/format/parser.py index ad4819137dfd05b9583b50776824d5f46e49ebb1..6c4a3e76487de54a718b50d4b88fdbc25bb2db1b 100644 --- a/muppet/puppet/format/parser.py +++ b/muppet/puppet/format/parser.py @@ -56,33 +56,40 @@ from muppet.puppet.ast import ( from muppet.parser_combinator import ( MatchObject, - ParseError, ParseDirective, - # Items, - # name, - optional, - count, - # char, - many, + ParseError, + ParserCombinator, + char, complement, - tag, + count, hexdig, - # space, - ws, - ParserCombinator, + line_comment, + many, + name, + nop, + not_, + optional, s, - stringify_match, + tag, + all_, + space, ) +# from muppet.parser_combinator import ws as primitive_ws from typing import ( Callable, + Literal, TypeVar, - Optional, + # Optional, + Sequence, ) from dataclasses import dataclass +ws = name('ws', tag('ws', many(line_comment('#') | space))) + + F = TypeVar('F', bound=Callable[..., object]) # TODO replace this decorator with @@ -137,39 +144,7 @@ class rich_char(ParseDirective): return parser.get(self.c) -@dataclass -class known_array(ParseDirective): - """Parse an array where all the values are known beforehand.""" - - delim: str - in_items: list[Puppet] - - def run(self, parser: 'ParserCombinator') -> list[MatchObject]: # noqa: D102 - """ - Read a delimted, comma separated, array. - - Reads the starting delimiter, a comma separated list of Puppet - items, an optional ending comma, and the ending delimiter. - - :param delim: - A string of length two, containing the starting and ending delimiter. - :param in_items: - """ - assert len(self.delim) == 2, "Delimiter should be the start and end character used." - items: list[MatchObject] = [] - items += parser.get(ws & self.delim[0]) - match self.in_items: - case []: - items += parser.get(ws & self.delim[1]) - case [x, *xs]: - items += parser.get(ws & x) - for item in xs: - parser.get(ws & ',' & ws & item) - items += parser.get(ws & optional(',') & ws & self.delim[1]) - return items - - -class ParserFormatter(Serializer[MatchObject]): +class ParserFormatter(Serializer[ParseDirective]): """ Reserialize AST by highlighting the original source code. @@ -182,385 +157,445 @@ class ParserFormatter(Serializer[MatchObject]): TODO make this private. """ - parser: ParserCombinator + # parser: ParserCombinator + + # def __init__(self, source: str, file: Optional[str] = None): + def __init__(self, source: str, file: str): + # self.parser = ParserCombinator(source=source, file=file) + self.file = file + pass + + def declaration_parameter(self, item: PuppetDeclarationParameter) -> ParseDirective: + """Build parser for the given declaration parameter.""" + type: ParseDirective + value: ParseDirective + + type = self.s(item.type) + value = optional(ws & '=' & ws & self.s(item.v)) + return name(f'decl-${item.k}', ws & type & ws & '$' & item.k & value) + + def instanciation_parameter(self, param: PuppetInstanciationParameter) -> ParseDirective: + """ + Parse a declaration parameter. + + In the example below, `ensure => present,` is the part parsed. - def __init__(self, source: str, file: Optional[str] = None): - self.parser = ParserCombinator(source=source, file=file) + .. code-block:: puppet - def get_declaration_parameter(self, item: PuppetDeclarationParameter) -> list[MatchObject]: - """Parse a single declaration parameter.""" - return self.parser.get(ws & optional(item.type) & - ws & '$' & item.k & - optional(ws & '=' & ws & item.v)) + file { '/': + ensure => present, + } + """ + return (ws & param.k & + ws & param.arrow & + ws & self.s(param.v) & + # Technically only optional for final entry + ws & optional(s(','))) - def get_declaration_parameters( + def declaration_parameters( self, delim: str, - in_items: list[PuppetDeclarationParameter] | None) -> list[MatchObject]: - """See get_arr.""" - items: list[MatchObject] = [] - items += self.parser.get(ws & delim[0]) + in_items: list[PuppetDeclarationParameter] | None) -> ParseDirective: + """ + Build parser for the given list of declaration parameters. + + :param delim: + A string of length two, containing a start and end delimiter. + :param in_items: + None and empty lists are both treated as empty lists. It's + laid out like thit due to how the puppet parser works. + """ + parser = ws & delim[0] match in_items: case [] | None: - items += self.parser.get(ws & delim[1]) + parser &= ws & delim[1] + case [x, *xs]: + parser &= self.declaration_parameter(x) + for item in xs: + parser &= ws & ',' & self.declaration_parameter(item) + parser &= ws & optional(s(',')) & ws & delim[1] + return name('declaration-parameters', parser) + + def known_array(self, delim: str, in_items: list[Puppet]) -> ParseDirective: + """ + Read a delimted, comma separated, array. + + Reads the starting delimiter, a comma separated list of Puppet + items, an optional ending comma, and the ending delimiter. + + :param delim: + A string of length two, containing the starting and ending delimiter. + :param in_items: + """ + assert len(delim) == 2, "Delimiter should be the start and end character used." + parser = ws & delim[0] + match in_items: + case []: + parser &= ws & delim[1] case [x, *xs]: - items += self.parser.get_declaration_parameter(x) + parser &= ws & self.s(x) for item in xs: - items += self.parser.get(ws & ',') - items += self.parser.get_declaration_parameter(item) - items += self.parser.get(ws & optional(',') & ws & delim[1]) - return items + parser &= ws & ',' & ws & self.s(item) + parser &= ws & optional(s(',')) & ws & delim[1] + return parser + + def if_chain(self, + chain: list[tuple[Puppet | Literal['else'], list[Puppet]]] + ) -> ParseDirective: + """Handle all trailing clauses in an if chain.""" + # logger.warning("chain: %a", chain) + match chain: + case []: + return nop + + case [('else', body)]: + # logger.warning("else clause, body: %s", body) + return (ws & tag('keyword', 'else') + & ws & '{' & ws & self.s(body) & ws & '}') + + case [('else', body), *rest]: + raise ValueError(f'Unexpected extra forms after else: {rest!r}') + + case [(test, body), *rest]: + # logger.warning("elsif clause, test: %s, body: %s", test, body) + + # Recursive calls wrapped in lambdas, since they NEED + # to be lazily evaluated, since they are only valid in + # their branch ('else'/'elsif') + elsif_parser = (ws & tag('keyword', 'elsif') & + ws & self.s(test) & # type: ignore + ws & '{' & + ws & self.s(body) & + ws & '}') & (lambda: self.if_chain(rest)) + + else_parser = (ws & tag('keyword', 'else') & + ws & '{' & + ws & (lambda: self.s(PuppetIfChain(rest))) & + ws & '}') + + # return elsif_parser | else_parser + return else_parser | elsif_parser + + raise ValueError(f"Bad if-chain: {chain!r}") + + def s(self, it: Puppet | Sequence[Puppet] | None) -> ParseDirective: + """Shorthand for self.serialize, but also handles None and lists.""" + match it: + case Puppet(): + return self.serialize(it) + case [x, *xs]: + parser = ws & self.s(x) + for x in xs: + parser &= ws & self.s(x) + return parser + case _: + return nop # -------------------------------------------------- @override - def _puppet_access(self, it: PuppetAccess) -> MatchObject: - return MatchObject('access', self.parser.get( - ws & it.how & ws & known_array('[]', it.args))) + def _puppet_access(self, it: PuppetAccess) -> ParseDirective: + return tag('access', ws & self.s(it.how) & ws & self.known_array('[]', it.args)) @override - def _puppet_array(self, it: PuppetArray) -> MatchObject: - return MatchObject('array', self.parser.get( - ws & known_array('[]', it.items))) + def _puppet_array(self, it: PuppetArray) -> ParseDirective: + return tag('array', ws & self.known_array('[]', it.items)) @override - def _puppet_binary_operator(self, it: PuppetBinaryOperator) -> MatchObject: - return MatchObject('', self.parser.get( - ws & it.lhs & ws & it.op & ws & it.rhs)) + def _puppet_binary_operator(self, it: PuppetBinaryOperator) -> ParseDirective: + return ws & self.s(it.lhs) & ws & it.op & ws & self.s(it.rhs) @override - def _puppet_block(self, it: PuppetBlock) -> MatchObject: - return MatchObject('', self.parser.get(ws & it.entries)) + def _puppet_block(self, it: PuppetBlock) -> ParseDirective: + return ws & self.s(it.entries) @override - def _puppet_call(self, it: PuppetCall) -> MatchObject: - return MatchObject('', self.parser.get( - ws & it.func & - ws & known_array('()', it.args))) + def _puppet_call(self, it: PuppetCall) -> ParseDirective: + return ws & self.s(it.func) & ws & self.known_array('()', it.args) @override - def _puppet_call_method(self, it: PuppetCallMethod) -> MatchObject: - return MatchObject('', self.parser.get( - ws & it.func & - optional(ws & known_array('()', it.args)) & - optional(ws & it.block))) + def _puppet_call_method(self, it: PuppetCallMethod) -> ParseDirective: + return ws & self.s(it.func) & \ + optional(ws & self.known_array('()', it.args)) & \ + optional(ws & self.s(it.block)) @override - def _puppet_case(self, it: PuppetCase) -> MatchObject: - items: list[MatchObject] = [] - items += self.parser.get( - ws & tag('keyword', 'case') & ws & it.test & ws & '{') + def _puppet_case(self, it: PuppetCase) -> ParseDirective: + parser = ws & tag('keyword', 'case') & ws & self.s(it.test) & ws & '{' for ((x, *xs), body) in it.cases: - items += self.parser.get(ws & x) + parser &= ws & self.s(x) for x in xs: - items += self.parser.get(ws & ',' & ws & x) - items += self.parser.get(ws & ':' & ws & '{' & ws & body & ws & '}') - items += self.parser.get(ws & '}') - return MatchObject('', items) + parser &= ws & ',' & ws & self.s(x) + parser &= ws & ':' & ws & '{' & ws & self.s(body) & ws & '}' + parser &= ws & '}' + return parser @override - def _puppet_class(self, it: PuppetClass) -> MatchObject: - return MatchObject('', self.parser.get( - ws & tag('keyword', 'class') & ws & it.name & - optional(ws & (lambda: self.get_declaration_parameters('()', it.params))) & - optional(ws & tag('inherits', it.parent)) & - ws & '{' & ws & it.body & ws & '}')) + def _puppet_class(self, it: PuppetClass) -> ParseDirective: + parser = (ws & tag('keyword', 'class') & ws & it.name & + optional(ws & self.declaration_parameters('()', it.params))) + parser &= optional(ws & 'inherits' & ws & tag('inherits', it.parent)) + parser &= ws & '{' & ws & self.s(it.body) & ws & '}' + # logger.warning(parser) + return parser @override - def _puppet_collect(self, it: PuppetCollect) -> MatchObject: - return MatchObject('', self.parser.get( - ws & it.type & ws & it.query)) + def _puppet_collect(self, it: PuppetCollect) -> ParseDirective: + return ws & self.s(it.type) & ws & self.s(it.query) @override - def _puppet_concat(self, it: PuppetConcat) -> MatchObject: - out = [] - out += self.parser.get(ws & '"') + def _puppet_concat(self, it: PuppetConcat) -> ParseDirective: + parser = ws & '"' for fragment in it.fragments: match fragment: case PuppetVar(x): - f = ws & '$' & optional('{') & ws & optional('$') & x & ws & optional('}') - out += self.parser.get(f) + f = (ws + & '$' + & optional(s('{')) + & ws + & optional(s('$')) + & x + & ws + & optional(s('}'))) + parser &= f case PuppetString(st): try: - out += self.parser.get(st) + parser &= st except ParseError: for c in st: - out += self.parser.get(rich_char(c)) + parser &= rich_char(c) case _: # TODO "${x[10][20]}" - out += self.parser.get(ws & "${" & ws & fragment & ws & "}") - out += self.parser.get(s('"') & ws) - return MatchObject('string', out) + parser &= ws & "${" & ws & self.s(fragment) & ws & "}" + parser &= s('"') & ws + return parser @override - def _puppet_declaration(self, it: PuppetDeclaration) -> MatchObject: - return MatchObject('', self.parser.get( - ws & it.k & ws & '=' & ws & it.v)) + def _puppet_declaration(self, it: PuppetDeclaration) -> ParseDirective: + return ws & self.s(it.k) & ws & '=' & ws & self.s(it.v) @override - def _puppet_define(self, it: PuppetDefine) -> MatchObject: - return MatchObject('', self.parser.get( - ws & tag('keyword', 'define') & ws & it.name & - optional(ws & (lambda: self.get_declaration_parameters('()', it.params))) & - ws & '{' & ws & it.body & ws & '}')) + def _puppet_define(self, it: PuppetDefine) -> ParseDirective: + return (ws & tag('keyword', 'define') & ws & it.name & + optional(ws & self.declaration_parameters('()', it.params)) & + ws & '{' & ws & self.s(it.body) & ws & '}') @override - def _puppet_exported_query(self, it: PuppetExportedQuery) -> MatchObject: - return MatchObject('', self.parser.get( - ws & '<<|' & ws & it.filter & ws & '|>>')) + def _puppet_exported_query(self, it: PuppetExportedQuery) -> ParseDirective: + return ws & '<<|' & ws & self.s(it.filter) & ws & '|>>' @override - def _puppet_function(self, it: PuppetFunction) -> MatchObject: - return MatchObject('', self.parser.get( - ws & tag('keyword', 'function') & ws & it.name & - optional(ws & (lambda: self.get_declaration_parameters('()', it.params))) & - optional(ws & '>>' & it.returns) & - ws & '{' & ws & it.body & ws & '}')) + def _puppet_function(self, it: PuppetFunction) -> ParseDirective: + return (ws & tag('keyword', 'function') & ws & it.name & + optional(ws & self.declaration_parameters('()', it.params)) & + optional(ws & '>>' & self.s(it.returns)) & + ws & '{' & ws & self.s(it.body) & ws & '}') @override - def _puppet_hash(self, it: PuppetHash) -> MatchObject: - out = [] - out += self.parser.get(ws & '{') + def _puppet_hash(self, it: PuppetHash) -> ParseDirective: + parser = ws & '{' for entry in it.entries: - out += self.parser.get( - ws & entry.k & - ws & '=>' & - ws & entry.v & - optional(ws & ',')) - out += self.parser.get(ws & '}') - return MatchObject('', out) - - @override - def _puppet_if_chain(self, it: PuppetIfChain) -> MatchObject: - logger.debug(it) - logger.debug("remaining = %a…", self.parser.peek_string(100)) + parser &= (ws & self.s(entry.k) & + ws & '=>' & + ws & self.s(entry.v) & + optional(ws & ',')) + parser &= ws & '}' + return parser + + @override + def _puppet_if_chain(self, it: PuppetIfChain) -> ParseDirective: + if not it.clauses: + return nop + # logger.warning("clauses: %s", it.clauses) (test1, body1), *rest = it.clauses - out = [] - out += self.parser.get(ws & 'if' & ws & test1 & ws & '{' & ws & body1 & ws & '}') - while True: - logger.debug("seek = %s, rem = %a…, len = %s", - self.parser.seek, self.parser.peek_string(100), len(rest)) - match rest: - case []: - break - case [('else', body), *xs]: - out += self.parser.get( - ws & tag('keyword', 'else') & ws & '{' & ws & body & ws & '}') - rest = xs - case [(test, body), *xs]: - out += self.parser.get(ws) - try: - out += self.parser.get( - ws & tag('keyword', 'elsif') & - ws & test & ws & '{' & ws & body & '}') - rest = xs - except ParseError: - out += self.parser.get( - ws & tag('keyword', 'else') & - ws & '{' & ws & PuppetIfChain(rest) & ws & '}') - rest = [] - return MatchObject('', out) + assert test1 != 'else', f"Unexpected else clause: {it.clauses}" + parser = (ws & 'if' + & ws & self.s(test1) + & ws & '{' + & ws & self.s(body1) + & ws & '}') + + # logger.warning("rest: %s", it.clauses) + if rest: + parser &= self.if_chain(rest) + return parser @override - def _puppet_instanciation_parameter(self, it: PuppetInstanciationParameter) -> MatchObject: - return MatchObject('', self.parser.get( - ws & it.k & ws & it.arrow & ws & it.v & optional(ws & ' &'))) + def _puppet_instanciation_parameter(self, it: PuppetInstanciationParameter) -> ParseDirective: + return ws & it.k & ws & it.arrow & ws & self.s(it.v) & optional(ws & ' &') @override - def _puppet_invoke(self, it: PuppetInvoke) -> MatchObject: - out = self.parser.get(ws & it.func & optional(ws & '(')) + def _puppet_invoke(self, it: PuppetInvoke) -> ParseDirective: + parser = ws & self.s(it.func) & optional(ws & '(') match it.args: case [x, *xs]: - out += self.parser.get(ws & x) + parser &= ws & self.s(x) for x in xs: - out += self.parser.get(ws & ',' & ws & x) - out += self.parser.get(optional(ws & ')')) - return MatchObject('', out) + parser &= ws & ',' & ws & self.s(x) + parser &= optional(ws & ')') + return parser @override - def _puppet_keyword(self, it: PuppetKeyword) -> MatchObject: - return MatchObject('keyword', self.parser.get(ws & it.name)) + def _puppet_keyword(self, it: PuppetKeyword) -> ParseDirective: + return tag('keyword', ws & it.name) @override - def _puppet_lambda(self, it: PuppetLambda) -> MatchObject: - return MatchObject('lambda', self.parser.get( - s(lambda: self.get_declaration_parameters('||', it.params)) & - '{' & it.body & '}')) + def _puppet_lambda(self, it: PuppetLambda) -> ParseDirective: + return tag('lambda', + self.declaration_parameters('||', it.params) & + '{' & self.s(it.body) & '}') @override - def _puppet_literal(self, it: PuppetLiteral) -> MatchObject: - return MatchObject('literal', self.parser.get(ws & it.literal)) + def _puppet_literal(self, it: PuppetLiteral) -> ParseDirective: + return tag('literal', ws & it.literal) @override - def _puppet_heredoc(self, it: PuppetHeredoc) -> MatchObject: - # TODO - return MatchObject('', []) + def _puppet_heredoc(self, it: PuppetHeredoc) -> ParseDirective: + return nop @override - def _puppet_literal_heredoc(self, it: PuppetLiteralHeredoc) -> MatchObject: - out: list[MatchObject] = [] - out += self.parser.get(ws & '@(' & ws) + def _puppet_literal_heredoc(self, it: PuppetLiteralHeredoc) -> ParseDirective: escape_switches = s('/') & many(s('n') | 'r' | 't' | 's' | '$' | 'u' | 'L') - try: - # Delimiter - out += self.parser.get('"') - delim_parts = self.parser.get(many(complement('"'))) - out += delim_parts - out += self.parser.get('"') - # Syntax note - if it.syntax: - out += self.parser.get(ws & ':' & ws & it.syntax) - # escape switches - out += self.parser.get(optional(ws & escape_switches)) - # end delimiter - out += self.parser.get(ws & ')') + switches = optional(ws & escape_switches) - except ParseError: - # Delimiter - delim_parts = self.parser.get(many(complement("):/"))) - out += delim_parts - if it.syntax: - out += self.parser.get(ws & ':' & ws & it.syntax) - # escape switches - out += self.parser.get(optional(ws & escape_switches)) - # end delimiter - out += self.parser.get(ws & ')') - - delim = stringify_match(delim_parts) + if it.syntax: + syntax = ws & ':' & ws & it.syntax + else: + syntax = nop + + unquoted_heredoc = many(complement('):/')) & syntax & switches + quoted_heredoc = s('"') & many(complement('"')) & '"' & syntax & switches & ws & ')' + + heredoc_declarator = ws & '@(' & ws & (quoted_heredoc | unquoted_heredoc) + + # delim = stringify_match(delim_parts) + + parser = heredoc_declarator for line in it.content.split('\n'): - out += self.parser.get(ws & line.lstrip() & '\n') - out += self.parser.get(ws & '|' & optional('-') & ws & delim) + parser &= ws & line.lstrip() & '\n' + parser &= ws & '|' & optional(s('-')) & ws & many(all_(not_(ws), char)) - # get_until("|-? *{delim}") - return MatchObject('', out) + return parser @override - def _puppet_node(self, it: PuppetNode) -> MatchObject: - return MatchObject('', self.parser.get( - ws & 'node' & - # TODO non-wrapped list with optional trailing comma - ws & "{" & ws & it.body & "}")) + def _puppet_node(self, it: PuppetNode) -> ParseDirective: + parser = ws & 'node' & ws + for match in it.matches: + parser &= ws & match & ws & "," + parser &= ws & "{" & ws & self.s(it.body) & "}" + return parser @override - def _puppet_nop(self, it: PuppetNop) -> MatchObject: - # Should match nothing - return MatchObject('', []) + def _puppet_nop(self, it: PuppetNop) -> ParseDirective: + return nop @override - def _puppet_number(self, it: PuppetNumber) -> MatchObject: - out: list[MatchObject] = self.parser.get(ws) + def _puppet_number(self, it: PuppetNumber) -> ParseDirective: + parser: ParseDirective = ws match (it.x, it.radix): case int(x), 8: - out += self.parser.get(s('0') & oct(x)[2:]) + parser &= s('0') & oct(x)[2:] case int(x), 16: - out += self.parser.get(s('0') & 'x' & hex(x)[2:]) + parser &= s('0') & 'x' & hex(x)[2:] case x, None: - out += self.parser.get(str(it.x)) + parser &= str(x) case _: raise ValueError(f"Unexpected radix: {it.radix}") - return MatchObject('', out) + return parser @override - def _puppet_parenthesis(self, it: PuppetParenthesis) -> MatchObject: - return MatchObject('', self.parser.get(ws & '(' & ws & it.form & ws & ')')) + def _puppet_parenthesis(self, it: PuppetParenthesis) -> ParseDirective: + return ws & '(' & ws & self.s(it.form) & ws & ')' @override - def _puppet_qn(self, it: PuppetQn) -> MatchObject: - return MatchObject('qn', self.parser.get(ws & it.name)) + def _puppet_qn(self, it: PuppetQn) -> ParseDirective: + return tag('qn', ws & it.name) @override - def _puppet_qr(self, it: PuppetQr) -> MatchObject: - return MatchObject('qr', self.parser.get(ws & it.name)) + def _puppet_qr(self, it: PuppetQr) -> ParseDirective: + return tag('qr', ws & it.name) @override - def _puppet_regex(self, it: PuppetRegex) -> MatchObject: - return MatchObject('rx', self.parser.get(ws & '/' & it.s.replace('/', r'\/') & '/')) + def _puppet_regex(self, it: PuppetRegex) -> ParseDirective: + return tag('rx', ws & '/' & it.s.replace('/', r'\/') & '/') @override - def _puppet_resource(self, it: PuppetResource) -> MatchObject: - out = self.parser.get(ws & it.type & ws & '{') - for key, body in it.bodies: - out += self.parser.get(ws & key & ws & ':' & ws & body & ws & optional(';')) - out = self.parser.get(ws & '}') - return MatchObject('', out) + def _puppet_resource(self, it: PuppetResource) -> ParseDirective: + parser = ws & self.s(it.type) & ws & '{' + for key, params in it.bodies: + parser &= ws & self.s(key) & ws & ':' + for param in params: + parser &= self.instanciation_parameter(param) + parser &= ws & optional(s(';')) + parser &= ws & '}' + return parser @override - def _puppet_resource_defaults(self, it: PuppetResourceDefaults) -> MatchObject: - return MatchObject('', self.parser.get(ws & it.type & ws & '{' & ws & it.ops & ws & '}')) + def _puppet_resource_defaults(self, it: PuppetResourceDefaults) -> ParseDirective: + parser = ws & self.s(it.type) & ws & '{' & ws + for param in it.ops: + parser &= self.instanciation_parameter(param) + parser &= ws & '}' + return parser @override - def _puppet_resource_override(self, it: PuppetResourceOverride) -> MatchObject: - return MatchObject('', self.parser.get( - ws & it.resource & ws & '{' & ws & it.ops & ws & '}')) + def _puppet_resource_override(self, it: PuppetResourceOverride) -> ParseDirective: + parser = ws & self.s(it.resource) & ws & '{' & ws + for param in it.ops: + parser &= self.instanciation_parameter(param) + parser &= ws & '}' + return parser @override - def _puppet_selector(self, it: PuppetSelector) -> MatchObject: - out = self.parser.get(ws & it.resource & ws & '?' & ws & '{') + def _puppet_selector(self, it: PuppetSelector) -> ParseDirective: + parser = ws & self.s(it.resource) & ws & '?' & ws & '{' for key, body in it.cases: - out += self.parser.get(ws & key & ws & '=>' & ws & body & ws & optional(',')) - out += self.parser.get('}') - return MatchObject('', out) + parser &= ws & self.s(key) & ws & '=>' & ws & self.s(body) & ws & optional(s(',')) + parser &= ws & '}' + return parser @override - def _puppet_string(self, it: PuppetString) -> MatchObject: + def _puppet_string(self, it: PuppetString) -> ParseDirective: # get one char to find delimiter # Then read chars until matching delimiter (or parse expected # string) - out: list[MatchObject] = [] - try: - out += self.parser.get(ws & it.s) - except ParseError: - out += self.parser.get(ws) - match self.parser.get(s('"') | "'"): - case [MatchObject(matched="'") as match]: - # Single quoted string - out.append(match) - for c in it.s: - match c: - case "'": - out += self.parser.get(r"\'") - case '\\': - out += self.parser.get(s(r'\\') | '\\') - case _: - out += self.parser.get(c) - # print([str(x) for x in out]) - out += self.parser.get("'") - case [MatchObject(matched='"') as match]: - # Double quoted string - out.append(match) - for c in it.s: - out += self.parser.get(rich_char(c)) + # A string without any delimiters at all + raw_string = s(it.s) + + # A string with ' as delimiter + single_quoted = s("'") & [{"'": r"\'", + '\\': s(r'\\') | '\\' + }.get(c, c) for c in it.s] & "'" - out += self.parser.get('"') - case err: - logger.error("Unknown match object: %s", err) + # A string with " as delimiter + double_quoted = s('"') & [rich_char(c) for c in it.s] & '"' - return MatchObject('string', out) + parser = ws & (raw_string | single_quoted | double_quoted) + return tag('string', parser) @override - def _puppet_unary_operator(self, it: PuppetUnaryOperator) -> MatchObject: - return MatchObject('', self.parser.get(ws & it.op & ws & it.x)) + def _puppet_unary_operator(self, it: PuppetUnaryOperator) -> ParseDirective: + return ws & it.op & ws & self.s(it.x) @override - def _puppet_unless(self, it: PuppetUnless) -> MatchObject: - return MatchObject('', self.parser.get( - ws & 'unless' & ws & it.condition & ws & '{' & - ws & it.consequent & ws & '}')) + def _puppet_unless(self, it: PuppetUnless) -> ParseDirective: + return (ws & 'unless' & ws & self.s(it.condition) & ws & '{' & + ws & self.s(it.consequent) & ws & '}') @override - def _puppet_var(self, it: PuppetVar) -> MatchObject: - return MatchObject('', self.parser.get(ws & '$' & it.name)) + def _puppet_var(self, it: PuppetVar) -> ParseDirective: + return name(f'${it.name}', ws & '$' & it.name) @override - def _puppet_virtual_query(self, it: PuppetVirtualQuery) -> MatchObject: - return MatchObject('', self.parser.get(ws & '<|' & ws & it.q & ws & '|>')) + def _puppet_virtual_query(self, it: PuppetVirtualQuery) -> ParseDirective: + return ws & '<|' & ws & self.s(it.q) & ws & '|>' @override - def _puppet_parse_error(self, it: PuppetParseError) -> MatchObject: + def _puppet_parse_error(self, it: PuppetParseError) -> ParseDirective: logger.fatal(it) raise Exception(it) # return MatchObject('', self.parser.get()) diff --git a/muppet/puppet/format/text.py b/muppet/puppet/format/text.py index 2ac3aaab5f447413b032077ed43fddc728b5196c..25f090b3b01e34482bd77351843b8357183b8dd9 100644 --- a/muppet/puppet/format/text.py +++ b/muppet/puppet/format/text.py @@ -302,7 +302,7 @@ class TextFormatter(Serializer[str]): out += 'else' else: out += f'elsif {self.serialize(testn)}' - out += ' {' + out += ' {\n' for item in bodyn: out += self.ind(1) + self.indent(1).serialize(item) + '\n' out += self.ind() + '}' diff --git a/muppet/puppet/parser.py b/muppet/puppet/parser.py index fb8d14eb84c868ef826ffd755797074d50303811..ca8b5c849ae5e3694e3d8021372c55745615a05a 100644 --- a/muppet/puppet/parser.py +++ b/muppet/puppet/parser.py @@ -51,10 +51,10 @@ def traverse(tree: Any) -> Any: - lists are recursed through - strings (and other stuff) is kept verbatim """ - if type(tree) == str: + if type(tree) is str: # print(tree) return tree - elif type(tree) == dict: + elif type(tree) is dict: # `x in tree` pattern since there may be empty lists (which # are "False") if '#' in tree: @@ -65,7 +65,7 @@ def traverse(tree: Any) -> Any: return [traverse(subtree) for subtree in tree['^']] else: raise Exception('Unexpected dictionary', tree) - elif type(tree) == list: + elif type(tree) is list: return [traverse(branch) for branch in tree] else: return tree diff --git a/muppet/symbols.py b/muppet/symbols.py index d8b0c5211e23a9d83d4403d1e56bf13121fa855f..eb5fffaab0ec7915c675b7e7c3de756919acb60a 100644 --- a/muppet/symbols.py +++ b/muppet/symbols.py @@ -4,7 +4,7 @@ Prettify symbols appearing in puppet code. For example, replace bangs ('!') with negation signs ('¬'). """ -symbols: dict[str, str] = { +symbols: dict[str, str] = { # pragma: no cover '=>': '⇒', '!': '¬', '!=': '≠', @@ -18,6 +18,6 @@ symbols: dict[str, str] = { } -def prettify(symb: str) -> str: +def prettify(symb: str) -> str: # pragma: no cover """Either turn the symbol into it's "pretty" variant, or return itself.""" return symbols.get(symb, symb) diff --git a/muppet/syntax_highlight/pygments.py b/muppet/syntax_highlight/pygments.py index 05e8562fad970843a6ce60689a968a1e4cc098b6..4a8fc729c96d474912d313bae90488709d6cbe1e 100644 --- a/muppet/syntax_highlight/pygments.py +++ b/muppet/syntax_highlight/pygments.py @@ -1,4 +1,5 @@ """Syntax highlighting through pygments.""" +# pyright: reportUnboundVariable=false try: from pygments.formatters import HtmlFormatter @@ -11,6 +12,8 @@ except ModuleNotFoundError: def highlight(code: str, language: str) -> str: """Highlight code through pygments.""" + # NOTE possibly propagate error from ModuleNoteFound in the imports + assert available, "Pygmetize not available on this machine" out = pygments.highlight(code, get_lexer_by_name(language), HtmlFormatter(cssclass='highlight-pygments', lineanchors='line', diff --git a/muppet/templates.py b/muppet/templates.py index 492c5019bceae968a466113e95d5ae457638c46d..80254a73b1dfdcd4c41f861a70967e5f59512c32 100644 --- a/muppet/templates.py +++ b/muppet/templates.py @@ -25,7 +25,8 @@ def code_page(*, title: str, content: str, path_base: str, - breadcrumbs: Optional[Breadcrumbs] = None) -> str: + breadcrumbs: Optional[Breadcrumbs] = None + ) -> str: # pragma: no cover """Template for a page containing puppet code.""" template = jinja.get_template('code_page.html') return template.render( @@ -38,7 +39,8 @@ def code_page(*, def content(*, content: str, path_base: str, - breadcrumbs: Optional[Breadcrumbs] = None) -> str: + breadcrumbs: Optional[Breadcrumbs] = None + ) -> str: # pragma: no cover """Template for a page with arbitrary content.""" template = jinja.get_template('content.html') return template.render( @@ -51,7 +53,7 @@ def index(*, modules: list[ModuleEntry], path_base: str, breadcrumbs: Optional[Breadcrumbs] = None - ) -> str: + ) -> str: # pragma: no cover """Root index file.""" template = jinja.get_template('index.html') return template.render( @@ -69,7 +71,7 @@ def module_index( doc_files: list[tuple[str, str]], path_base: str, breadcrumbs: Optional[Breadcrumbs] = None, - ) -> str: + ) -> str: # pragma: no cover """Index for a single module.""" template = jinja.get_template('module_index.html') return template.render( diff --git a/setup.cfg b/setup.cfg index 65fe7a14491ab4f4ea73fa9192110f4a31b84aae..0f723393c5388fecdf9a9af7521e1cbf3c817840 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,8 +15,11 @@ classifiers = Environment :: Web Environment Indended Audience :: System Administrators License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+) + Typing :: Typed license = AGPLv3+ license_files = LICENSE +project_urls = + Documentation = https://adrift.space/code/muppet/doc [options.entry_points] console_scripts = diff --git a/tests/test_ast.py b/tests/test_ast.py index 593e0f65d448d4920dc71cbd8f4397ee43e8a594..811e98d7451afd9cceea7a73672c5e89f13498a2 100644 --- a/tests/test_ast.py +++ b/tests/test_ast.py @@ -33,7 +33,7 @@ from muppet.puppet.ast import ( PuppetUnaryOperator, PuppetArray, PuppetCallMethod, PuppetCase, PuppetDeclarationParameter, PuppetInstanciationParameter, PuppetClass, PuppetConcat, - PuppetCollect, PuppetIf, PuppetUnless, PuppetKeyword, + PuppetCollect, PuppetIfChain, PuppetUnless, PuppetKeyword, PuppetExportedQuery, PuppetVirtualQuery, PuppetFunction, PuppetHash, PuppetHeredoc, PuppetLiteralHeredoc, PuppetVar, PuppetLambda, PuppetQn, PuppetQr, PuppetRegex, @@ -65,7 +65,7 @@ def parse(puppet_source: str) -> Puppet: def ser(ast: Puppet) -> str: return serialize(ast, TextFormatter) - + # from pprint import pprint # def run(x): @@ -283,26 +283,23 @@ if 1 { } """.strip() - r1 = PuppetIf(condition=PuppetNumber(x=1), - consequent=[PuppetString(s='a')]) + r1 = PuppetIfChain([(PuppetNumber(x=1), [PuppetString(s='a')])]) - r2 = PuppetIf(condition=PuppetNumber(x=1), - consequent=[PuppetString(s='a')], - alternative=[PuppetString(s='b')]) + r2 = PuppetIfChain([(PuppetNumber(x=1), [PuppetString(s='a')]), + ('else', [PuppetString(s='b')])]) - r3 = PuppetIf(condition=PuppetNumber(x=1), - consequent=[PuppetString(s='a')], - alternative=[PuppetIf(condition=PuppetNumber(x=2), - consequent=[PuppetString(s='b')], - alternative=[PuppetString(s='c')])]) + r3 = PuppetIfChain([(PuppetNumber(x=1), [PuppetString(s='a')]), + (PuppetNumber(x=2), [PuppetString(s='b')]), + ('else', [PuppetString(s='c')])]) assert parse(s1) == r1 assert ser(r1) == s1 + assert parse(s2) == r2 assert ser(r2) == s2 + assert parse(s3) == r3 - # TODO elsif - # assert ser(r3) == s3 + assert ser(r3) == s3 def test_unless():