From 245f44b284eea86888597080460935c8057a7a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se> Date: Wed, 20 Sep 2023 23:41:34 +0200 Subject: [PATCH] Cleanup repeated code. --- muppet/puppet/format/parser.py | 127 +++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 52 deletions(-) diff --git a/muppet/puppet/format/parser.py b/muppet/puppet/format/parser.py index 1a960b8..0607448 100644 --- a/muppet/puppet/format/parser.py +++ b/muppet/puppet/format/parser.py @@ -57,6 +57,7 @@ from muppet.puppet.ast import ( from muppet.parser_combinator import ( MatchObject, ParseDirective, + Parser, ParseError, ParserCombinator, char, @@ -107,6 +108,56 @@ else: return f +def heredoc_declarator(syntax: str | None) -> Parser: + """ + Parser for the start of heredoc. + + .. code-block:: + :caption: Sample Heredoc beginning form. + + @("EOF":json/$) + + :param syntax: + The expected syntax of this entry, `json` in the example above. + + This can technically be ignored, since it always end with '/' or ')'. + """ + escape_switches = s('/') & many(s('n') | 'r' | 't' | 's' | '$' | 'u' | 'L') + switches = optional(ws & escape_switches) + + syntax_decl = optional(ws & ':' & ws & syntax) + + unquoted_heredoc = many(complement('):/')) + quoted_heredoc = s('"') & many(complement('"')) & '"' + + return ws & '@(' & ws & (quoted_heredoc | unquoted_heredoc) \ + & syntax_decl & switches & ws & ')' + + +heredoc_end = many(space) & optional(s('|')) & ws & optional(s('-')) \ + & ws & many1(all_(~ space, char)) +"""Parser for the end of a heredoc.""" + + +def interpolated_form(inner: Parser) -> Parser: + """ + Parse a form interpolated into a string. + + .. code-block:: puppet + + "Hello ${inner}" + + In the above example, parse from the ``${`` until ``}``. + + :param inner: + The parser for whatever is inside the braces. + """ + return (many(space) + & '$' & optional(s('{')) + & inner + & ws & optional(s('}'))) + + logger = logging.getLogger(__name__) @@ -359,21 +410,11 @@ class ParserFormatter(Serializer[ParseDirective]): parser = ws & '"' for fragment in it.fragments: match fragment: - case PuppetVar(x): - f = (ws - & '$' - & optional(s('{')) - & ws - & optional(s('$')) - & tag('var', x) - & ws - & optional(s('}'))) - parser &= f case PuppetString(st): for c in st: parser &= rich_char(c) case _: - parser &= many(space) & "${" & ws & self.s(fragment) & ws & "}" + parser &= interpolated_form(self.s(fragment)) parser &= s('"') & ws return parser @@ -486,28 +527,27 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_heredoc(self, it: PuppetHeredoc) -> ParseDirective: - # TODO The header and footer is mostly shared wiht literal - # heredoc. Merge these - parser = ws & '@(' & ws & '"' & ws & many(complement('"')) & '"' - parser &= optional(ws & ':' & ws & it.syntax) - switches = name('switches', many(s('n') | 'r' | 't' | 's' | '$' | 'u' | 'L')) - parser &= optional(ws & '/' & switches) - parser &= ws & ')' - - # TODO the contents shares muttch with concat, merge these + # This uses ``many(space)`` rather than ``ws``. Our ``ws`` + # implementation also "eats" comments, but inside heredoc's + # there are no comments. + + # This is technically incorrect, since heredoc_declarator + # always accepts non-quoted starts. This however doesn't + # matter. + parser = ws & heredoc_declarator(it.syntax) + for fragment in it.fragments: match fragment: - case PuppetVar(x): - f = (many(space) - & '$' - & optional(s('{')) - & ws - & optional(s('$')) - & tag('var', x) - & ws - & optional(s('}'))) - parser &= f case PuppetString(st): + # This discards the given whitespace, and + # re-applies arbitrary whitespace between. + # This is for two reasons: + # 1. Leading indentation whitespace is ignored up + # to the '|' line, meaning that we still have + # to check for whitespace before each string. + # 2. If the '-' ending modifier is used then we + # DON'T have a newline at the end, confusing + # matters. word = many(space) for line in st.split('\n'): word &= many(space) @@ -515,37 +555,20 @@ class ParserFormatter(Serializer[ParseDirective]): word &= rich_char(c) word &= optional(s('\n')) parser &= name(repr(st), word) - # parser &= many(space) & st.strip() case _: - parser &= many(space) & "${" & ws & self.s(fragment) & ws & "}" - parser &= many(space) & optional(s('|')) & ws & optional(s('-')) - parser &= ws & many1(all_(~ space, char)) + parser &= interpolated_form(self.s(fragment)) + + parser &= heredoc_end return parser @override def _puppet_literal_heredoc(self, it: PuppetLiteralHeredoc) -> ParseDirective: - escape_switches = s('/') & many(s('n') | 'r' | 't' | 's' | '$' | 'u' | 'L') - switches = optional(ws & escape_switches) - - if it.syntax: - syntax = ws & ':' & ws & it.syntax - else: - syntax = nop - - unquoted_heredoc = many(complement('):/')) & syntax & switches - quoted_heredoc = s('"') & many(complement('"')) & '"' & syntax & switches - - heredoc_declarator = ws & '@(' & ws & (quoted_heredoc | unquoted_heredoc) & ws & ')' - - # delim = stringify_match(delim_parts) - - parser = heredoc_declarator + parser = ws & heredoc_declarator(it.syntax) for line in it.content.split('\n'): parser &= many(space) & line.strip() - parser &= many(space) & optional(s('|')) & ws & optional(s('-')) - parser &= ws & many1(all_(~ space, char)) + parser &= heredoc_end return parser @override -- GitLab