From 245f44b284eea86888597080460935c8057a7a03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se>
Date: Wed, 20 Sep 2023 23:41:34 +0200
Subject: [PATCH] Cleanup repeated code.

---
 muppet/puppet/format/parser.py | 127 +++++++++++++++++++--------------
 1 file changed, 75 insertions(+), 52 deletions(-)

diff --git a/muppet/puppet/format/parser.py b/muppet/puppet/format/parser.py
index 1a960b8..0607448 100644
--- a/muppet/puppet/format/parser.py
+++ b/muppet/puppet/format/parser.py
@@ -57,6 +57,7 @@ from muppet.puppet.ast import (
 from muppet.parser_combinator import (
     MatchObject,
     ParseDirective,
+    Parser,
     ParseError,
     ParserCombinator,
     char,
@@ -107,6 +108,56 @@ else:
         return f
 
 
+def heredoc_declarator(syntax: str | None) -> Parser:
+    """
+    Parser for the start of heredoc.
+
+    .. code-block::
+       :caption: Sample Heredoc beginning form.
+
+        @("EOF":json/$)
+
+    :param syntax:
+        The expected syntax of this entry, `json` in the example above.
+
+        This can technically be ignored, since it always end with '/' or ')'.
+    """
+    escape_switches = s('/') & many(s('n') | 'r' | 't' | 's' | '$' | 'u' | 'L')
+    switches = optional(ws & escape_switches)
+
+    syntax_decl = optional(ws & ':' & ws & syntax)
+
+    unquoted_heredoc = many(complement('):/'))
+    quoted_heredoc = s('"') & many(complement('"')) & '"'
+
+    return ws & '@(' & ws & (quoted_heredoc | unquoted_heredoc) \
+        & syntax_decl & switches & ws & ')'
+
+
+heredoc_end = many(space) & optional(s('|')) & ws & optional(s('-')) \
+    & ws & many1(all_(~ space, char))
+"""Parser for the end of a heredoc."""
+
+
+def interpolated_form(inner: Parser) -> Parser:
+    """
+    Parse a form interpolated into a string.
+
+    .. code-block:: puppet
+
+        "Hello ${inner}"
+
+    In the above example, parse from the ``${`` until ``}``.
+
+    :param inner:
+        The parser for whatever is inside the braces.
+    """
+    return (many(space)
+            & '$' & optional(s('{'))
+            & inner
+            & ws & optional(s('}')))
+
+
 logger = logging.getLogger(__name__)
 
 
@@ -359,21 +410,11 @@ class ParserFormatter(Serializer[ParseDirective]):
         parser = ws & '"'
         for fragment in it.fragments:
             match fragment:
-                case PuppetVar(x):
-                    f = (ws
-                         & '$'
-                         & optional(s('{'))
-                         & ws
-                         & optional(s('$'))
-                         & tag('var', x)
-                         & ws
-                         & optional(s('}')))
-                    parser &= f
                 case PuppetString(st):
                     for c in st:
                         parser &= rich_char(c)
                 case _:
-                    parser &= many(space) & "${" & ws & self.s(fragment) & ws & "}"
+                    parser &= interpolated_form(self.s(fragment))
         parser &= s('"') & ws
         return parser
 
@@ -486,28 +527,27 @@ class ParserFormatter(Serializer[ParseDirective]):
 
     @override
     def _puppet_heredoc(self, it: PuppetHeredoc) -> ParseDirective:
-        # TODO The header and footer is mostly shared wiht literal
-        # heredoc. Merge these
-        parser = ws & '@(' & ws & '"' & ws & many(complement('"')) & '"'
-        parser &= optional(ws & ':' & ws & it.syntax)
-        switches = name('switches', many(s('n') | 'r' | 't' | 's' | '$' | 'u' | 'L'))
-        parser &= optional(ws & '/' & switches)
-        parser &= ws & ')'
-
-        # TODO the contents shares muttch with concat, merge these
+        # This uses ``many(space)`` rather than ``ws``. Our ``ws``
+        # implementation also "eats" comments, but inside heredoc's
+        # there are no comments.
+
+        # This is technically incorrect, since heredoc_declarator
+        # always accepts non-quoted starts. This however doesn't
+        # matter.
+        parser = ws & heredoc_declarator(it.syntax)
+
         for fragment in it.fragments:
             match fragment:
-                case PuppetVar(x):
-                    f = (many(space)
-                         & '$'
-                         & optional(s('{'))
-                         & ws
-                         & optional(s('$'))
-                         & tag('var', x)
-                         & ws
-                         & optional(s('}')))
-                    parser &= f
                 case PuppetString(st):
+                    # This discards the given whitespace, and
+                    # re-applies arbitrary whitespace between.
+                    # This is for two reasons:
+                    # 1. Leading indentation whitespace is ignored up
+                    #    to the '|' line, meaning that we still have
+                    #    to check for whitespace before each string.
+                    # 2. If the '-' ending modifier is used then we
+                    #    DON'T have a newline at the end, confusing
+                    #    matters.
                     word = many(space)
                     for line in st.split('\n'):
                         word &= many(space)
@@ -515,37 +555,20 @@ class ParserFormatter(Serializer[ParseDirective]):
                             word &= rich_char(c)
                         word &= optional(s('\n'))
                     parser &= name(repr(st), word)
-                    # parser &= many(space) & st.strip()
                 case _:
-                    parser &= many(space) & "${" & ws & self.s(fragment) & ws & "}"
-        parser &= many(space) & optional(s('|')) & ws & optional(s('-'))
-        parser &= ws & many1(all_(~ space, char))
+                    parser &= interpolated_form(self.s(fragment))
+
+        parser &= heredoc_end
         return parser
 
     @override
     def _puppet_literal_heredoc(self, it: PuppetLiteralHeredoc) -> ParseDirective:
-        escape_switches = s('/') & many(s('n') | 'r' | 't' | 's' | '$' | 'u' | 'L')
-        switches = optional(ws & escape_switches)
-
-        if it.syntax:
-            syntax = ws & ':' & ws & it.syntax
-        else:
-            syntax = nop
-
-        unquoted_heredoc = many(complement('):/')) & syntax & switches
-        quoted_heredoc = s('"') & many(complement('"')) & '"' & syntax & switches
-
-        heredoc_declarator = ws & '@(' & ws & (quoted_heredoc | unquoted_heredoc) & ws & ')'
-
-        # delim = stringify_match(delim_parts)
-
-        parser = heredoc_declarator
+        parser = ws & heredoc_declarator(it.syntax)
 
         for line in it.content.split('\n'):
             parser &= many(space) & line.strip()
-        parser &= many(space) & optional(s('|')) & ws & optional(s('-'))
-        parser &= ws & many1(all_(~ space, char))
 
+        parser &= heredoc_end
         return parser
 
     @override
-- 
GitLab