From 4892081b9950d760bea325e6afc2047ca1913219 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se>
Date: Mon, 25 Sep 2023 16:08:32 +0200
Subject: [PATCH] Start splitting output back into modules.

The difference between this split, and the old, is that everything is
now clearly under output.
---
 doc/muppet.rst                           |   3 +-
 muppet/{output.py => output/__init__.py} | 783 +++--------------------
 muppet/output/docstring.py               | 258 ++++++++
 muppet/output/puppet_source.py           | 418 ++++++++++++
 muppet/output/util.py                    |  44 ++
 5 files changed, 808 insertions(+), 698 deletions(-)
 rename muppet/{output.py => output/__init__.py} (58%)
 create mode 100644 muppet/output/docstring.py
 create mode 100644 muppet/output/puppet_source.py
 create mode 100644 muppet/output/util.py

diff --git a/doc/muppet.rst b/doc/muppet.rst
index 5a724a0..88d90ba 100644
--- a/doc/muppet.rst
+++ b/doc/muppet.rst
@@ -7,6 +7,7 @@ Subpackages
 .. toctree::
    :maxdepth: 4
 
+   muppet.output
    muppet.puppet
    muppet.syntax_highlight
 
@@ -21,11 +22,9 @@ Submodules
    muppet.intersperse
    muppet.lookup
    muppet.markdown
-   muppet.output
    muppet.parser_combinator
    muppet.symbols
    muppet.tabs
-   muppet.templates
    muppet.util
 
 Module contents
diff --git a/muppet/output.py b/muppet/output/__init__.py
similarity index 58%
rename from muppet/output.py
rename to muppet/output/__init__.py
index 2d3440a..3e1b59a 100644
--- a/muppet/output.py
+++ b/muppet/output/__init__.py
@@ -11,8 +11,8 @@ import os.path
 import pathlib
 import re
 import json
+from glob import glob
 from typing import (
-    Any,
     Optional,
     Protocol,
     Sequence,
@@ -28,18 +28,7 @@ from muppet.puppet.strings import (
     DataTypeAlias,
     DefinedType,
     DocString,
-    DocStringApiTag,
-    DocStringAuthorTag,
-    DocStringExampleTag,
-    DocStringOptionTag,
-    DocStringOverloadTag,
     DocStringParamTag,
-    DocStringRaiseTag,
-    DocStringReturnTag,
-    DocStringSeeTag,
-    DocStringSinceTag,
-    DocStringSummaryTag,
-    DocStringTag,
     Function,
     PuppetClass,
     PuppetStrings,
@@ -48,20 +37,16 @@ from muppet.puppet.strings import (
     puppet_strings_cached,
 )
 from muppet.parser_combinator import (
-    ParserCombinator,
     ParseError,
-    MatchCompound,
-    MatchObject,
 )
 from muppet.markdown import markdown
 from muppet.breadcrumbs import Breadcrumbs, breadcrumbs
 from muppet.util import group_by, partition
 from muppet.cache import AbstractCache
 
-from muppet.puppet.parser import puppet_parser
 
-from muppet.puppet.ast import build_ast
-from muppet.puppet.format.parser import ParserFormatter
+from .docstring import format_docstring
+from .puppet_source import hyperlink_puppet_source
 
 
 jinja = Environment(
@@ -73,11 +58,32 @@ jinja = Environment(
 logger = logging.getLogger(__name__)
 
 
-param_doc: dict[str, str] = {}
+class HtmlSerializable(Protocol):
+    """Classes which can be serialized as HTML."""
+
+    def to_html(self) -> str:  # pragma: no cover
+        """Return HTML string."""
+        ...
+
+    def to_html_list(self) -> str:  # pragma: no cover
+        """Return HTML suitable for a list."""
+        ...
 
 
 class Templates:
-    """Namespace for templates."""
+    """
+    Namespace for templates.
+
+    Almost all of these methods take these values:
+
+    :param path_base:
+        Prefix added to all links within output
+        TODO shouldn't this be bound to the object, since it should
+        NEVER change (during a run).
+
+    :path breadcrumbs:
+        Breadcrumb to current page.
+    """
 
     def __init__(self) -> None:
         self.jinja = Environment(
@@ -90,7 +96,15 @@ class Templates:
                   path_base: str,
                   breadcrumbs: Optional[Breadcrumbs] = None
                   ) -> str:  # pragma: no cover
-        """Template for a page containing puppet code."""
+        """
+        Template for a page containing puppet code.
+
+        :param content:
+            Top level h1 tag of page.
+
+        :param content:
+            Free form string content, being the main body of the page.
+        """
         template = self.jinja.get_template('code_page.html')
         return template.render(
                 title=title,
@@ -103,7 +117,12 @@ class Templates:
                 path_base: str,
                 breadcrumbs: Optional[Breadcrumbs] = None
                 ) -> str:  # pragma: no cover
-        """Template for a page with arbitrary content."""
+        """
+        Template for a page with arbitrary content.
+
+        :param content:
+            Arbitrary content
+        """
         template = self.jinja.get_template('content.html')
         return template.render(
                 content=content,
@@ -115,7 +134,12 @@ class Templates:
               path_base: str,
               breadcrumbs: Optional[Breadcrumbs] = None
               ) -> str:  # pragma: no cover
-        """Root index file."""
+        """
+        Environment index file.
+
+        :param modules:
+            List of PuppetModules to include in the environment index.
+        """
         template = self.jinja.get_template('index.html')
         return template.render(
                 path_base=path_base,
@@ -124,7 +148,7 @@ class Templates:
 
     def module_index(self, *,
                      # content: list[],  # something with to_html_list and to_html
-                     content: list[Any],  # TODO something with to_html_list and to_html
+                     content: Sequence[HtmlSerializable],
                      module_author: str,
                      module_name: str,
                      doc_files: list[tuple[str, str]],
@@ -133,7 +157,33 @@ class Templates:
                      left_sidebar: Optional[str] = None,
                      right_sidebar: Optional[str] = None,
                      ) -> str:  # pragma: no cover
-        """Index for a single module."""
+        """
+        Index for a single module.
+
+        :param content:
+            Main content of the page.
+        :param module_author:
+            Author of the module, as it appears in the metadata.json file.
+        :param module_name:
+            Name of the module, without the author component.
+        :param doc_files:
+            The free-form documentation files bundled with the module.
+
+            Each element should be a pair of
+            - The idealized name of the file
+            - The relative path to the document inside the output
+              (the HTML generated version)
+
+        :param left_sidebar:
+            Free form content of the left sidebar.
+
+            This is assumed to be a list of modules in the environment.
+        :param right_sidebar:
+            Free form content of the right sidebar.
+
+            This is assumed to be a table of contents of the module,
+            really similar to the actual body contents.
+        """
         template = self.jinja.get_template('module_index.html')
         return template.render(
                 content=content,
@@ -149,18 +199,6 @@ class Templates:
 templates = Templates()
 
 
-class HtmlSerializable(Protocol):
-    """Classes which can be serialized as HTML."""
-
-    def to_html(self) -> str:  # pragma: no cover
-        """Return HTML string."""
-        ...
-
-    def to_html_list(self) -> str:  # pragma: no cover
-        """Return HTML suitable for a list."""
-        ...
-
-
 @dataclass
 class ResourceTypeOutput:
     """Basic HTML implementation."""
@@ -381,8 +419,10 @@ class PuppetModule:
         self.toc = self._build_module_toc()
         self.output_prefix = output_prefix
 
-        # TODO
-        self.doc_files: list[str] = []
+        abspath = os.path.abspath(self.path)
+        self.doc_files: list[str] = \
+            glob(os.path.join(abspath, '*.md')) + \
+            glob(os.path.join(abspath, 'LICENSE'))
 
         try:
             with open(os.path.join(self.path, 'metadata.json')) as f:
@@ -390,7 +430,7 @@ class PuppetModule:
         except FileNotFoundError:
             self.metadata = {}
 
-    def _build_module_toc(self) -> list[ResourceIndex | IndexCategory]:
+    def _build_module_toc(self) -> Sequence[ResourceIndex | IndexCategory]:
         """Build the TOC of the module."""
         content: list[ResourceIndex | IndexCategory] = []
 
@@ -482,8 +522,8 @@ class PuppetModule:
                 breadcrumbs=crumbs,
                 content=toc,
                 path_base=self.output_prefix,
-                # doc_files=list(doc_files.items())
                 # TODO
+                # doc_files=self.doc_files,
                 doc_files=[],
                 # left_sidebar=(),
                 right_sidebar=''.join([
@@ -872,158 +912,6 @@ def index_item(obj: PuppetClass | DefinedType) -> IndexItem:
     return out
 
 
-def format_docstring(name: str, docstring: DocString) -> tuple[str, str]:
-    """
-    Format docstrings as they appear in some puppet types.
-
-    Those types being:
-
-    * puppet_classes,
-    * puppet_type_aliases, and
-    * defined_types
-    """
-    global param_doc
-
-    # The api tag is ignored, since it instead is shown from context
-
-    out = ''
-
-    param_doc = {tag.name: tag.text or ''
-                 for tag in docstring.tags
-                 if isinstance(tag, DocStringParamTag)}
-
-    grouped_tags = GroupedTags.from_taglist(docstring.tags)
-
-    # --------------------------------------------------
-
-    out += '<a href="#code">Jump to Code</a><br/>'
-
-    if tags := grouped_tags.summary_:
-        out += '<em class="summary">'
-        for tag in tags:
-            out += html.escape(tag.text)
-        out += '</em>'
-
-    out += '<div class="description">'
-    # TODO "TODO" highlighting
-    out += markdown(docstring.text)
-    out += '</div>'
-
-    # TODO proper handling of multiple @see tags
-    if sees := grouped_tags.see_:
-        out += '<b>See</b> '
-        for see in sees:
-            link: str
-            m = re.match(r'((?P<url>https?://.*)|(?P<man>.*\([0-9]\))|(?P<other>.*))', see.name)
-            assert m, "Regex always matched"
-            if m['url']:
-                link = f'<a href="{see.name}">{see.name}</a>'
-                out += link
-            elif m['man']:
-                page = see.name[:-3]
-                section = see.name[-2]
-                # TODO man providers
-                link = f"https://manned.org/man/{page}.{section}"
-                out += link
-            else:
-                if '::' in m['other']:
-                    # TODO
-                    pass
-                else:
-                    # TODO
-                    # link = see
-                    pass
-                out += m['other']
-            out += ' ' + see.text
-
-    if authors := grouped_tags.author_:
-        out += '<div class="author">'
-        out += "<em>Written by </em>"
-        if len(authors) == 1:
-            out += parse_author(authors[0].text)
-        else:
-            out += '<ul>'
-            for author in authors:
-                out += f'<li>{parse_author(author.text)}</li>'
-            out += '</ul>'
-        out += '</div>'
-
-    out += '<hr/>'
-
-    t: DocStringTag
-
-    for t in grouped_tags .example_:
-        out += '<div class="code-example">'
-
-        if name := t.name:
-            # TODO markup for title
-            out += f'<div class="code-example-header">{html.escape(name)}</div>\n'
-        # TODO highlight?
-        # Problem is that we don't know what language the example
-        # is in. Pygemntize however seems to do a reasonable job
-        # treating anything as puppet code
-        text = html.escape(t.text)
-        out += f'<pre><code class="puppet">{text}</code></pre>\n'
-        out += '</div>'
-
-    out += '<hr/>'
-
-    out += '<dl>'
-    for t in grouped_tags.param_:
-        name = html.escape(t.name)
-        out += f'<dt><span id="{name}" class="variable">{name}</span>'
-        match t.types:
-            case [x]:
-                # TODO highlight type?
-                out += f': <code>{html.escape(x)}</code>'
-            case [_, *_]:
-                raise ValueError("How did you get multiple types onto a parameter?")
-
-        # TODO Fetch default values from puppet strings output
-        # Then in javascript query Hiera to get the true "default"
-        # values for a given machine (somewhere have a setting for
-        # selecting machine).
-        out += '</dt>'
-
-        if text := t.text:
-            text = re.sub(r'(NOTE|TODO)',
-                          r'<mark>\1</mark>',
-                          markdown(text))
-
-            if options := grouped_tags.option_.get(t.name):
-                text += '<dl>'
-                for option in options:
-                    text += '<dt>'
-                    text += html.escape(option.opt_name)
-                    match option.opt_types:
-                        case [x]:
-                            text += f' [<code>{html.escape(x)}</code>]'
-                        case [_, *_]:
-                            raise ValueError("How did you get multiple types onto an option?")
-                    text += '</dt>'
-                    text += '<dd>'
-                    if option.opt_text:
-                        text += re.sub(r'(NOTE|TODO)',
-                                       r'<mark>\1</mark>',
-                                       markdown(option.opt_text))
-                    text += '</dd>'
-                text += '</dl>'
-
-            out += f"<dd>{text}</dd>"
-        else:
-            out += '<dd><em>Undocumented</em></dd>'
-    out += '</dl>'
-
-    # TODO remaining tags
-    # "overload"
-    # raise
-    # return
-    # since
-    # _other
-
-    return (name, out)
-
-
 def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]:
     """Format Puppet class."""
     out = ''
@@ -1033,7 +921,7 @@ def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]:
     out += body
 
     # ------ Old ---------------------------------------
-    # t = parse_puppet(d_type.source)
+    # t = hyperlink_puppet_source(d_type.source)
     # data = parse(t, 0, ['root'])
     # renderer = HTMLRenderer(build_param_dict(d_type.docstring))
     # out += render(renderer, data)
@@ -1047,7 +935,8 @@ def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]:
             in_parameters.append(cast(DocStringParamTag, tag).name)
 
     try:
-        result = parse_puppet(d_type.source, d_type.name, in_parameters)
+        # Calculation beforehand, for "atomic" formatting
+        result = hyperlink_puppet_source(d_type.source, d_type.name, in_parameters)
         out += '<pre class="highlight-muppet"><code class="puppet">'
         out += result
         out += '</code></pre>'
@@ -1068,53 +957,6 @@ def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]:
     return name, out
 
 
-@dataclass
-class GroupedTags:
-    """
-    All tags from a class (or similar) docstring.
-
-    Most fields are simply lists of tags. The reason for trailing
-    underscores on each entry is since some tag names collide with
-    python keywords (e.g. ``raise``).
-    """
-
-    param_:     list[DocStringParamTag]             = field(default_factory=list)  # noqa: E221
-    example_:   list[DocStringExampleTag]           = field(default_factory=list)  # noqa: E221
-    overload_:  list[DocStringOverloadTag]          = field(default_factory=list)  # noqa: E221
-    option_:    dict[str, list[DocStringOptionTag]] = field(default_factory=dict)  # noqa: E221
-    """
-    Options document Hash parameters valid values.
-
-    Each key is the corresponding parameter, and the value is the list
-    of registered options for that hash.
-    """
-
-    author_:    list[DocStringAuthorTag]            = field(default_factory=list)  # noqa: E221
-    api_:       list[DocStringApiTag]               = field(default_factory=list)  # noqa: E221
-    raise_:     list[DocStringRaiseTag]             = field(default_factory=list)  # noqa: E221
-    return_:    list[DocStringReturnTag]            = field(default_factory=list)  # noqa: E221
-    since_:     list[DocStringSinceTag]             = field(default_factory=list)  # noqa: E221
-    summary_:   list[DocStringSummaryTag]           = field(default_factory=list)  # noqa: E221
-    see_:       list[DocStringSeeTag]               = field(default_factory=list)  # noqa: E221
-    other_:     list[DocStringTag]                  = field(default_factory=list)  # noqa: E221
-    """All tags of unknown type."""
-
-    @classmethod
-    def from_taglist(cls, tags: list[DocStringTag]) -> 'GroupedTags':
-        """Group a list of tags."""
-        grouped_tags = cls()
-        for tag in tags:
-            if tag.tag_name == 'option':
-                tag = cast(DocStringOptionTag, tag)
-                grouped_tags.option_.setdefault(tag.parent, []).append(tag)
-            elif tag.tag_name in {'param', 'example', 'overload', 'author', 'api',
-                                  'raise', 'return', 'since', 'summary', 'see'}:
-                getattr(grouped_tags, tag.tag_name + '_').append(tag)
-            else:
-                grouped_tags.other_.append(tag)
-        return grouped_tags
-
-
 def build_param_dict(docstring: DocString) -> dict[str, str]:
     """
     Extract all parameter documentation from a docstring dict.
@@ -1155,7 +997,7 @@ def format_type_alias(d_type: DataTypeAlias, file: str) -> tuple[str, str]:
     out += '\n'
     out += '<pre class="highlight-muppet"><code class="puppet">'
     try:
-        out += parse_puppet(d_type.alias_of, file, [])
+        out += hyperlink_puppet_source(d_type.alias_of, file, [])
     except ParseError as e:
         logger.error("Parsing %(name)s failed: %(err)s",
                      {'name': d_type.alias_of, 'err': e})
@@ -1175,7 +1017,7 @@ def format_defined_type(d_type: DefinedType, file: str) -> tuple[str, str]:
 
     out += '<pre class="highlight-muppet"><code class="puppet">'
     try:
-        out += parse_puppet(d_type.source, file, [])
+        out += hyperlink_puppet_source(d_type.source, file, [])
     except ParseError as e:
         logger.error("Parsing %(name)s failed: %(err)s",
                      {'name': d_type.source, 'err': e})
@@ -1240,7 +1082,7 @@ def format_puppet_function(function: Function, file: str) -> str:
     elif t == 'puppet':
         out += '<pre class="highlight-muppet"><code class="puppet">'
         try:
-            out += parse_puppet(function.source, file, [])
+            out += hyperlink_puppet_source(function.source, file, [])
         except ParseError as e:
             logger.error("Parsing %(name)s failed: %(err)s",
                          {'name': function.source, 'err': e})
@@ -1261,454 +1103,3 @@ def format_puppet_task() -> str:
 def format_puppet_plan() -> str:
     """Format Puppet plan."""
     return 'TODO format_puppet_plan not implemented'
-
-
-def inner_text(obj: MatchObject | list[MatchObject]) -> str:
-    """
-    Extract the text content from a set of MatchObjects.
-
-    This is really similar to HTML's inner_text.
-
-    Empty whitespace tags are expanded into nothing, non-empty
-    whitespace tags becomes a single space (note that this discards
-    commets).
-
-    This only works properly if no function was mapped over the parser
-    return values in tree, see :func:`muppet.parser_combinator.fmap`.
-
-    :param obj:
-        Match Objects to search.
-    """
-    match obj:
-        case str(s):
-            return s
-        case MatchCompound(type='ws', matched=[]):
-            return ''
-        case MatchCompound(type='ws'):
-            return ' '
-        case MatchCompound(matched=xs):
-            return ''.join(inner_text(x) for x in xs)
-        case [*xs]:
-            return ''.join(inner_text(x) for x in xs)
-        case _:
-            raise ValueError('How did we get here')
-
-
-def name_to_url(name: str) -> tuple[str | None, str]:
-    """
-    Resolve a class or resource name into an url.
-
-    :param name:
-        The name of a class or resource, surch as "example::resource".
-    :return:
-        A tuple consisting of
-
-        - One of
-          - An internal link to the definition of that type
-          - A link to the official puppet documentation
-          - ``None``, if `name` is "class"
-        - A string indicating extra HTML classes for this url.
-          This is mostly so external references can be marked properly.
-    """
-    if name in built_in_types:
-        return (f'https://www.puppet.com/docs/puppet/7/types/{name}.html', 'puppet-doc')
-    elif name == 'class':
-        return (None, '')
-    else:
-        # TODO special cases for puppet's built in types.
-        # https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html
-        module, *items = name.lstrip(':').split('::')
-        # TODO get prefix from the command line/config file
-        return ('/code/muppet-strings/output/'
-                + '/'.join([module, 'manifests', *(items if items else ['init'])]),
-                '')
-
-
-puppet_doc_base = 'https://www.puppet.com/docs/puppet/7'
-lang_facts_builtin_variables = (f'{puppet_doc_base}/lang_facts_builtin_variables'
-                                '#lang_facts_builtin_variables')
-server_variables = f'{lang_facts_builtin_variables}-server-variables'
-compiler_variables = f'{lang_facts_builtin_variables}-compiler-variables'
-trusted_facts = f'{lang_facts_builtin_variables}-trusted-facts'
-server_facts = f'{lang_facts_builtin_variables}-server-facts'
-
-built_in_variables = {
-    'facts': 'https://google.com',
-    # clientcert, clientversion, puppetversion, clientnoop,
-    # agent_specified_environment:
-    # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-agent-facts
-    'trusted': trusted_facts,
-    'server_facts': server_facts,
-    'environment': server_variables,
-    'servername': server_variables,
-    'serverip': server_variables,
-    'serverversion': server_variables,
-    'module_name': compiler_variables,
-    'caller_module_name': compiler_variables,
-
-    # Also note the special variable $title and $name
-    # https://www.puppet.com/docs/puppet/7/lang_defined_types#lang_defined_types-title-and-name
-}
-
-
-def parse_author(author: str) -> str:
-    """
-    Format author tags' content.
-
-    :param author:
-        The contents of the author tag. If the string is on the
-        regular "author" format of ``"Firstname Lastname
-        <first.last@example.com>"`` then the email will be formatted
-        and hyperlinked. Otherwise the string is returned verbatim.
-    :return:
-        An HTML safe string, possibly including tags.
-    """
-    m = re.match(r'(?P<author>.*) (<(?P<email>.*)>)|(?P<any>.*)', author)
-    assert m, "The above regex can't fail"
-    if m['author'] and m['email']:
-        author = html.escape(m['author'])
-        email = html.escape(m['email'])
-        return f'{author} <a class="email" href="mailto:{email}">&lt;{email}&gt</a>;'
-    else:
-        return html.escape(m['any'])
-
-
-# https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html
-# https://www.puppet.com/docs/puppet/7/types/file.html
-# ...
-built_in_types = {
-    'package',
-    'file',
-    'service',
-    'notify',
-    'exec',
-    'user',
-    'group',
-}
-
-# https://www.puppet.com/docs/puppet/7/function.html#{}
-built_in_functions = {
-    'abs',
-    'alert',
-    'all',
-    'annotate',
-    'any',
-    'assert_type',
-    'binary_file',
-    'break',
-    'call',
-    'camelcase',
-    'capitalize',
-    'ceiling',
-    'chomp',
-    'chop',
-    'compare',
-    'contain',
-    'convert_to',
-    'create_resources',
-    'crit',
-    'debug',
-    'defined',
-    'dig',
-    'digest',
-    'downcase',
-    'each',
-    'emerg',
-    'empty',
-    'epp',
-    'err',
-    'eyaml_lookup_key',
-    'fail',
-    'file',
-    'filter',
-    'find_file',
-    'find_template',
-    'flatten',
-    'floor',
-    'fqdn_rand',
-    'generate',
-    'get',
-    'getvar',
-    'group_by',
-    'hiera',
-    'hiera_array',
-    'hiera_hash',
-    'hiera_include',
-    'hocon_data',
-    'import',
-    'include',
-    'index',
-    'info',
-    'inline_epp',
-    'inline_template',
-    'join',
-    'json_data',
-    'keys',
-    'length',
-    'lest',
-    'lookup',
-    'lstrip',
-    'map',
-    'match',
-    'max',
-    'md5',
-    'min',
-    'module_directory',
-    'new',
-    'next',
-    'notice',
-    'partition',
-    'realize',
-    'reduce',
-    'regsubst',
-    'require',
-    'return',
-    'reverse_each',
-    'round',
-    'rstrip',
-    'scanf',
-    'sha1',
-    'sha256',
-    'shellquote',
-    'size',
-    'slice',
-    'sort',
-    'split',
-    'sprintf',
-    'step',
-    'strftime',
-    'strip',
-    'tag',
-    'tagged',
-    'template',
-    'then',
-    'tree_each',
-    'type',
-    'unique',
-    'unwrap',
-    'upcase',
-    'values',
-    'versioncmp',
-    'warning',
-    'with',
-    'yaml_data',
-}
-
-
-def find_declarations(objs: list[MatchObject]) -> list[str]:
-    """
-    Find all local variable declarations.
-
-    Searches the code for all local variable declarations, returing a
-    list of variable names.
-
-    Note that the same variable might appear multiple times, for example:
-
-    .. code-block:: puppet
-        :caption: The same variable being declared twice
-
-        if $something {
-            $x = 10
-        } else {
-            $x = 20
-        }
-    """
-    declarations = []
-    for obj in objs:
-        match obj:
-            case MatchCompound(type='declaration', matched=xs):
-                for x in xs:
-                    match x:
-                        case MatchCompound(type='var', matched=ys):
-                            declarations.append(inner_text(ys))
-    return declarations
-
-
-class Reserializer:
-    """
-    Context for reserializing parsed data back into code.
-
-    :param local_vars:
-        Variables declared within this file. Used when resolving
-        hyperlinks.
-    """
-
-    def __init__(self, local_vars: list[str]):
-        self.local_vars: list[str] = local_vars
-
-    def reserialize(self,
-                    obj: MatchObject | Sequence[MatchObject]) -> str:
-        """
-        Reconstruct puppet code after parsing it.
-
-        After building the parser, and parsing the puppet code into a tree
-        of MatchObjects; this procedure returns it into puppet code.
-        Difference being that we now have metadata, meaning that syntax
-        highlighting and variable hyperlinks can be inserted.
-
-        :param obj:
-            Should be assumed to be a list of MatchObject's, or something similar.
-
-            MatchCompound objects are serialized as
-
-            .. code-block:: html
-
-                <span class="{type}">{body}</span>
-
-            strings as themselves, and lists have reserialize mapped over them.
-
-        """
-        out: list[str] = []
-        # logger.info("obj = %a", obj)
-
-        # TODO hyperlink functions.
-        # The problem is that a function can either be implemented in
-        # Puppet, or in Ruby. And Ruby functions' names aren't bound
-        # by the directory layout.
-        match obj:
-            case str(s):
-                out.append(html.escape(s))
-
-            case MatchCompound(type='resource-name', matched=xs):
-                name = inner_text(xs)
-                url, cls = name_to_url(name)
-                if url:
-                    out.append(f'<a href="{url}" class="resource-name {cls}">{name}</a>')
-                else:
-                    # TODO this is class, but the class name should
-                    # also be hyperlinked
-                    out.append(f'<span class="resource-name {cls}">{name}</span>')
-
-            case MatchCompound(type='invoke', matched=xs):
-                function = None
-                for x in xs:
-                    match x:
-                        case MatchCompound(type='qn', matched=ys):
-                            if function is None:
-                                function = inner_text(ys)
-                                if function in built_in_functions:
-                                    # class="qn"
-                                    url = f"https://www.puppet.com/docs/puppet/7/function.html#{function}"  # noqa: E501
-                                    tag = f'<a href="{url}" class="puppet-doc">{self.reserialize(ys)}</a>'  # noqa: E501
-                                    out.append(tag)
-                                else:
-                                    # TODO function to url
-                                    out.append(f'<span class="qn">{self.reserialize(ys)}</span>')
-                            else:
-                                if function == 'include':
-                                    url, cls = name_to_url(inner_text(ys))
-                                    # class="qn"
-                                    tag = f'<a href="{url}" class="{cls}">{self.reserialize(ys)}</a>'  # noqa: E501
-                                    out.append(tag)
-                                else:
-                                    out.append(self.reserialize(ys))
-                        case _:
-                            out.append(self.reserialize(x))
-
-            case MatchCompound(type='declaration', matched=xs):
-                for x in xs:
-                    match x:
-                        case MatchCompound(type='var', matched=ys):
-                            inner = ''.join(self.reserialize(y) for y in ys)
-                            out.append(f'<span id="{inner_text(ys)}">{inner}</span>')
-                        case _:
-                            out.append(self.reserialize(x))
-
-            case MatchCompound(type='var', matched=xs):
-                out.append(self.var_to_url(inner_text(xs)))
-
-            case MatchCompound(type=type, matched=xs):
-                body = ''.join(self.reserialize(x) for x in xs)
-                out.append(f'<span class="{type}">{body}</span>')
-
-            case [*xs]:
-                out.extend(self.reserialize(x) for x in xs)
-
-            case rest:
-                logger.error("Unknown type: %a", rest)
-
-        return ''.join(out)
-
-    def var_to_url(self, var: str) -> str:
-        """
-        Format variable, adding hyperlink to its definition.
-
-        TODO these can refer to both defined types (`manifests/*.pp`),
-        as well as resource types (`lib/puppet/provider/*/*.rb` /
-        `lib/tpuppet/type/*.rb`)
-
-        Same goes for functions (`functions/*.pp`),
-        (`lib/puppet/functions.rb`).
-
-        :param var:
-            Name of the variable.
-
-        :return:
-            An HTML anchor element.
-        """
-        match var.split('::'):
-            case [name]:
-                # Either a local or global variable
-                # https://www.puppet.com/docs/puppet/7/lang_facts_and_builtin_vars.html
-
-                href = None
-                cls = ''
-                if name in self.local_vars:
-                    href = f'#{html.escape(var)}'
-                elif name in built_in_variables:
-                    href = html.escape(built_in_variables[name])
-                    cls = 'puppet-doc'
-
-                if href:
-                    return f'<a class="var {cls}" href="{href}">{var}</a>'
-                else:
-                    # `name` refers to a global fact.
-                    return f'<span class="var">{var}</span>'
-
-            case ['', name]:
-                # A global variable
-                if name in built_in_variables:
-                    href = html.escape(built_in_variables[name])
-                    img = '<img src="/code/muppet-strings/output/static/favicon.ico" />'
-                    return f'<a class="var" href="{href}">{var}{img}</a>'
-                else:
-                    return f'<span class="var">{var}</span>'
-
-            # Note the "special module" 'settings',
-            # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-server-variables
-            case ['', module, *items, name]:
-                s = '/code/muppet-strings/output/' \
-                    + '/'.join([module, 'manifests', *(items if items else ['init'])])
-                s += f'#{name}'
-                return f'<a class="var" href="{s}">{var}</a>'
-            case [module, *items, name]:
-                s = '/code/muppet-strings/output/' \
-                    + '/'.join([module, 'manifests', *(items if items else ['init'])])
-                s += f'#{name}'
-                return f'<a class="var" href="{s}">{var}</a>'
-            case _:
-                raise ValueError()
-
-
-def parse_puppet(source: str, file: str, in_parameters: list[str]) -> str:
-    """
-    Parse and syntax highlight the given puppet source.
-
-    :returns: An HTML string
-    """
-    # Run the upstream puppet parser,
-    # then masage the tree into a usable form.
-    ast = build_ast(puppet_parser(source))
-
-    # From the ast, build a parser combinator parser.
-    # This parser will attach sufficient metadata to allow syntax
-    # highlighting and hyperlinking
-    parser = ParserFormatter().serialize(ast)
-
-    # Run the generated parser, giving us a list of match objects.
-    match_objects = ParserCombinator(source, file).get(parser)
-
-    # Reserialize the matched data back into puppet code, realizing
-    # the syntax highlighting and hyperlinks.
-    return Reserializer(find_declarations(match_objects) + (in_parameters)) \
-        .reserialize(match_objects)
diff --git a/muppet/output/docstring.py b/muppet/output/docstring.py
new file mode 100644
index 0000000..b85676c
--- /dev/null
+++ b/muppet/output/docstring.py
@@ -0,0 +1,258 @@
+"""
+Generate output for Puppet Docstrings.
+
+Docstrings are the functions preceeding any top level puppet
+declaration (such as classes, rosource definitions, ...). These have a
+number of "magic" tags for attaching metadata, along with usually
+being Markdown formatted. This module assumes that they all are
+Markdown formatted, which unfortunately leads to some (minor) errors.
+
+(The final output also contains the original source, allowing these
+errors to be overlooked).
+"""
+
+from dataclasses import dataclass, field
+import html
+import re
+from typing import cast
+from muppet.markdown import markdown
+from muppet.puppet.strings import (
+    DocString,
+    DocStringApiTag,
+    DocStringAuthorTag,
+    DocStringExampleTag,
+    DocStringOptionTag,
+    DocStringOverloadTag,
+    DocStringParamTag,
+    DocStringRaiseTag,
+    DocStringReturnTag,
+    DocStringSeeTag,
+    DocStringSinceTag,
+    DocStringSummaryTag,
+    DocStringTag,
+)
+
+
+# TODO what even is this for?
+param_doc: dict[str, str] = {}
+
+
+@dataclass
+class GroupedTags:
+    """
+    All tags from a class (or similar) docstring.
+
+    Most fields are simply lists of tags. The reason for trailing
+    underscores on each entry is since some tag names collide with
+    python keywords (e.g. ``raise``).
+    """
+
+    param_:     list[DocStringParamTag]             = field(default_factory=list)  # noqa: E221
+    example_:   list[DocStringExampleTag]           = field(default_factory=list)  # noqa: E221
+    overload_:  list[DocStringOverloadTag]          = field(default_factory=list)  # noqa: E221
+    option_:    dict[str, list[DocStringOptionTag]] = field(default_factory=dict)  # noqa: E221
+    """
+    Options document Hash parameters valid values.
+
+    Each key is the corresponding parameter, and the value is the list
+    of registered options for that hash.
+    """
+
+    author_:    list[DocStringAuthorTag]            = field(default_factory=list)  # noqa: E221
+    api_:       list[DocStringApiTag]               = field(default_factory=list)  # noqa: E221
+    raise_:     list[DocStringRaiseTag]             = field(default_factory=list)  # noqa: E221
+    return_:    list[DocStringReturnTag]            = field(default_factory=list)  # noqa: E221
+    since_:     list[DocStringSinceTag]             = field(default_factory=list)  # noqa: E221
+    summary_:   list[DocStringSummaryTag]           = field(default_factory=list)  # noqa: E221
+    see_:       list[DocStringSeeTag]               = field(default_factory=list)  # noqa: E221
+    other_:     list[DocStringTag]                  = field(default_factory=list)  # noqa: E221
+    """All tags of unknown type."""
+
+    @classmethod
+    def from_taglist(cls, tags: list[DocStringTag]) -> 'GroupedTags':
+        """Group a list of tags."""
+        grouped_tags = cls()
+        for tag in tags:
+            if tag.tag_name == 'option':
+                tag = cast(DocStringOptionTag, tag)
+                grouped_tags.option_.setdefault(tag.parent, []).append(tag)
+            elif tag.tag_name in {'param', 'example', 'overload', 'author', 'api',
+                                  'raise', 'return', 'since', 'summary', 'see'}:
+                getattr(grouped_tags, tag.tag_name + '_').append(tag)
+            else:
+                grouped_tags.other_.append(tag)
+        return grouped_tags
+
+
+def parse_author(author: str) -> str:
+    """
+    Format author tags' content.
+
+    :param author:
+        The contents of the author tag. If the string is on the
+        regular "author" format of ``"Firstname Lastname
+        <first.last@example.com>"`` then the email will be formatted
+        and hyperlinked. Otherwise the string is returned verbatim.
+    :return:
+        An HTML safe string, possibly including tags.
+    """
+    m = re.match(r'(?P<author>.*) (<(?P<email>.*)>)|(?P<any>.*)', author)
+    assert m, "The above regex can't fail"
+    if m['author'] and m['email']:
+        author = html.escape(m['author'])
+        email = html.escape(m['email'])
+        return f'{author} <a class="email" href="mailto:{email}">&lt;{email}&gt</a>;'
+    else:
+        return html.escape(m['any'])
+
+
+def format_docstring(name: str, docstring: DocString) -> tuple[str, str]:
+    """
+    Format docstrings as they appear in some puppet types.
+
+    Those types being:
+
+    * puppet_classes,
+    * puppet_type_aliases, and
+    * defined_types
+    """
+    global param_doc
+
+    # The api tag is ignored, since it instead is shown from context
+
+    out = ''
+
+    param_doc = {tag.name: tag.text or ''
+                 for tag in docstring.tags
+                 if isinstance(tag, DocStringParamTag)}
+
+    grouped_tags = GroupedTags.from_taglist(docstring.tags)
+
+    # --------------------------------------------------
+
+    out += '<a href="#code">Jump to Code</a><br/>'
+
+    if tags := grouped_tags.summary_:
+        out += '<em class="summary">'
+        for tag in tags:
+            out += html.escape(tag.text)
+        out += '</em>'
+
+    out += '<div class="description">'
+    # TODO "TODO" highlighting
+    out += markdown(docstring.text)
+    out += '</div>'
+
+    # TODO proper handling of multiple @see tags
+    if sees := grouped_tags.see_:
+        out += '<b>See</b> '
+        for see in sees:
+            link: str
+            m = re.match(r'((?P<url>https?://.*)|(?P<man>.*\([0-9]\))|(?P<other>.*))', see.name)
+            assert m, "Regex always matched"
+            if m['url']:
+                link = f'<a href="{see.name}">{see.name}</a>'
+                out += link
+            elif m['man']:
+                page = see.name[:-3]
+                section = see.name[-2]
+                # TODO man providers
+                link = f"https://manned.org/man/{page}.{section}"
+                out += link
+            else:
+                if '::' in m['other']:
+                    # TODO
+                    pass
+                else:
+                    # TODO
+                    # link = see
+                    pass
+                out += m['other']
+            out += ' ' + see.text
+
+    if authors := grouped_tags.author_:
+        out += '<div class="author">'
+        out += "<em>Written by </em>"
+        if len(authors) == 1:
+            out += parse_author(authors[0].text)
+        else:
+            out += '<ul>'
+            for author in authors:
+                out += f'<li>{parse_author(author.text)}</li>'
+            out += '</ul>'
+        out += '</div>'
+
+    out += '<hr/>'
+
+    t: DocStringTag
+
+    for t in grouped_tags .example_:
+        out += '<div class="code-example">'
+
+        if name := t.name:
+            # TODO markup for title
+            out += f'<div class="code-example-header">{html.escape(name)}</div>\n'
+        # TODO highlight?
+        # Problem is that we don't know what language the example
+        # is in. Pygemntize however seems to do a reasonable job
+        # treating anything as puppet code
+        text = html.escape(t.text)
+        out += f'<pre><code class="puppet">{text}</code></pre>\n'
+        out += '</div>'
+
+    out += '<hr/>'
+
+    out += '<dl>'
+    for t in grouped_tags.param_:
+        name = html.escape(t.name)
+        out += f'<dt><span id="{name}" class="variable">{name}</span>'
+        match t.types:
+            case [x]:
+                # TODO highlight type?
+                out += f': <code>{html.escape(x)}</code>'
+            case [_, *_]:
+                raise ValueError("How did you get multiple types onto a parameter?")
+
+        # TODO Fetch default values from puppet strings output
+        # Then in javascript query Hiera to get the true "default"
+        # values for a given machine (somewhere have a setting for
+        # selecting machine).
+        out += '</dt>'
+
+        if text := t.text:
+            text = re.sub(r'(NOTE|TODO)',
+                          r'<mark>\1</mark>',
+                          markdown(text))
+
+            if options := grouped_tags.option_.get(t.name):
+                text += '<dl>'
+                for option in options:
+                    text += '<dt>'
+                    text += html.escape(option.opt_name)
+                    match option.opt_types:
+                        case [x]:
+                            text += f' [<code>{html.escape(x)}</code>]'
+                        case [_, *_]:
+                            raise ValueError("How did you get multiple types onto an option?")
+                    text += '</dt>'
+                    text += '<dd>'
+                    if option.opt_text:
+                        text += re.sub(r'(NOTE|TODO)',
+                                       r'<mark>\1</mark>',
+                                       markdown(option.opt_text))
+                    text += '</dd>'
+                text += '</dl>'
+
+            out += f"<dd>{text}</dd>"
+        else:
+            out += '<dd><em>Undocumented</em></dd>'
+    out += '</dl>'
+
+    # TODO remaining tags
+    # "overload"
+    # raise
+    # return
+    # since
+    # _other
+
+    return (name, out)
diff --git a/muppet/output/puppet_source.py b/muppet/output/puppet_source.py
new file mode 100644
index 0000000..d3b9f4d
--- /dev/null
+++ b/muppet/output/puppet_source.py
@@ -0,0 +1,418 @@
+"""Generate output for Puppet Source code."""
+
+import html
+import logging
+from typing import Sequence
+
+from muppet.parser_combinator import (
+    ParserCombinator,
+    MatchCompound,
+    MatchObject,
+)
+from muppet.puppet.ast import build_ast
+from muppet.puppet.parser import puppet_parser
+from muppet.puppet.format.parser import ParserFormatter
+
+from .util import inner_text
+
+
+logger = logging.getLogger(__name__)
+
+
+_puppet_doc_base = 'https://www.puppet.com/docs/puppet/7'
+_lang_facts_builtin_variables = (f'{_puppet_doc_base}/lang_facts_builtin_variables'
+                                 '#lang_facts_builtin_variables')
+_server_variables = f'{_lang_facts_builtin_variables}-server-variables'
+_compiler_variables = f'{_lang_facts_builtin_variables}-compiler-variables'
+_trusted_facts = f'{_lang_facts_builtin_variables}-trusted-facts'
+_server_facts = f'{_lang_facts_builtin_variables}-server-facts'
+
+_built_in_variables = {
+    'facts': 'https://google.com',
+    # clientcert, clientversion, puppetversion, clientnoop,
+    # agent_specified_environment:
+    # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-agent-facts
+    'trusted': _trusted_facts,
+    'server_facts': _server_facts,
+    'environment': _server_variables,
+    'servername': _server_variables,
+    'serverip': _server_variables,
+    'serverversion': _server_variables,
+    'module_name': _compiler_variables,
+    'caller_module_name': _compiler_variables,
+
+    # Also note the special variable $title and $name
+    # https://www.puppet.com/docs/puppet/7/lang_defined_types#lang_defined_types-title-and-name
+}
+
+
+# https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html
+# https://www.puppet.com/docs/puppet/7/types/file.html
+# ...
+_built_in_types = {
+    'package',
+    'file',
+    'service',
+    'notify',
+    'exec',
+    'user',
+    'group',
+}
+
+# https://www.puppet.com/docs/puppet/7/function.html#{}
+_built_in_functions = {
+    'abs',
+    'alert',
+    'all',
+    'annotate',
+    'any',
+    'assert_type',
+    'binary_file',
+    'break',
+    'call',
+    'camelcase',
+    'capitalize',
+    'ceiling',
+    'chomp',
+    'chop',
+    'compare',
+    'contain',
+    'convert_to',
+    'create_resources',
+    'crit',
+    'debug',
+    'defined',
+    'dig',
+    'digest',
+    'downcase',
+    'each',
+    'emerg',
+    'empty',
+    'epp',
+    'err',
+    'eyaml_lookup_key',
+    'fail',
+    'file',
+    'filter',
+    'find_file',
+    'find_template',
+    'flatten',
+    'floor',
+    'fqdn_rand',
+    'generate',
+    'get',
+    'getvar',
+    'group_by',
+    'hiera',
+    'hiera_array',
+    'hiera_hash',
+    'hiera_include',
+    'hocon_data',
+    'import',
+    'include',
+    'index',
+    'info',
+    'inline_epp',
+    'inline_template',
+    'join',
+    'json_data',
+    'keys',
+    'length',
+    'lest',
+    'lookup',
+    'lstrip',
+    'map',
+    'match',
+    'max',
+    'md5',
+    'min',
+    'module_directory',
+    'new',
+    'next',
+    'notice',
+    'partition',
+    'realize',
+    'reduce',
+    'regsubst',
+    'require',
+    'return',
+    'reverse_each',
+    'round',
+    'rstrip',
+    'scanf',
+    'sha1',
+    'sha256',
+    'shellquote',
+    'size',
+    'slice',
+    'sort',
+    'split',
+    'sprintf',
+    'step',
+    'strftime',
+    'strip',
+    'tag',
+    'tagged',
+    'template',
+    'then',
+    'tree_each',
+    'type',
+    'unique',
+    'unwrap',
+    'upcase',
+    'values',
+    'versioncmp',
+    'warning',
+    'with',
+    'yaml_data',
+}
+
+
+def _find_declarations(objs: list[MatchObject]) -> list[str]:
+    """
+    Find all local variable declarations.
+
+    Searches the code for all local variable declarations, returing a
+    list of variable names.
+
+    Note that the same variable might appear multiple times, for example:
+
+    .. code-block:: puppet
+        :caption: The same variable being declared twice
+
+        if $something {
+            $x = 10
+        } else {
+            $x = 20
+        }
+    """
+    declarations = []
+    for obj in objs:
+        match obj:
+            case MatchCompound(type='declaration', matched=xs):
+                for x in xs:
+                    match x:
+                        case MatchCompound(type='var', matched=ys):
+                            declarations.append(inner_text(ys))
+    return declarations
+
+
+class _PuppetReserializer:
+    """
+    Reserializes parsed puppet code back into puppet code.
+
+    This allows syntax highlighting, and hyperlinking to be added to the code.
+
+    :param local_vars:
+        Variables declared within this file. Used when resolving
+        hyperlinks.
+    """
+
+    def __init__(self, local_vars: list[str]):
+        self.local_vars: list[str] = local_vars
+
+    def reserialize(self, obj: MatchObject | Sequence[MatchObject]) -> str:
+        """
+        Reconstruct puppet code after parsing it.
+
+        After building the parser, and parsing the puppet code into a tree
+        of MatchObjects; this procedure returns it into puppet code.
+        Difference being that we now have metadata, meaning that syntax
+        highlighting and variable hyperlinks can be inserted.
+
+        :param obj:
+            Should be assumed to be a list of MatchObject's, or something similar.
+
+            MatchCompound objects are serialized as
+
+            .. code-block:: html
+
+                <span class="{type}">{body}</span>
+
+            esrings as themselves, and lists have reserialize mapped over them.
+
+        """
+        out: list[str] = []
+        # logger.info("obj = %a", obj)
+
+        # TODO hyperlink functions.
+        # The problem is that a function can either be implemented in
+        # Puppet, or in Ruby. And Ruby functions' names aren't bound
+        # by the directory layout.
+        match obj:
+            case str(s):
+                out.append(html.escape(s))
+
+            case MatchCompound(type='resource-name', matched=xs):
+                name = inner_text(xs)
+                url, cls = name_to_url(name)
+                if url:
+                    out.append(f'<a href="{url}" class="resource-name {cls}">{name}</a>')
+                else:
+                    # TODO this is class, but the class name should
+                    # also be hyperlinked
+                    out.append(f'<span class="resource-name {cls}">{name}</span>')
+
+            case MatchCompound(type='invoke', matched=xs):
+                function = None
+                for x in xs:
+                    match x:
+                        case MatchCompound(type='qn', matched=ys):
+                            if function is None:
+                                function = inner_text(ys)
+                                if function in _built_in_functions:
+                                    # class="qn"
+                                    url = f"https://www.puppet.com/docs/puppet/7/function.html#{function}"  # noqa: E501
+                                    tag = f'<a href="{url}" class="puppet-doc">{self.reserialize(ys)}</a>'  # noqa: E501
+                                    out.append(tag)
+                                else:
+                                    # TODO function to url
+                                    out.append(f'<span class="qn">{self.reserialize(ys)}</span>')
+                            else:
+                                if function == 'include':
+                                    url, cls = name_to_url(inner_text(ys))
+                                    # class="qn"
+                                    tag = f'<a href="{url}" class="{cls}">{self.reserialize(ys)}</a>'  # noqa: E501
+                                    out.append(tag)
+                                else:
+                                    out.append(self.reserialize(ys))
+                        case _:
+                            out.append(self.reserialize(x))
+
+            case MatchCompound(type='declaration', matched=xs):
+                for x in xs:
+                    match x:
+                        case MatchCompound(type='var', matched=ys):
+                            inner = ''.join(self.reserialize(y) for y in ys)
+                            out.append(f'<span id="{inner_text(ys)}">{inner}</span>')
+                        case _:
+                            out.append(self.reserialize(x))
+
+            case MatchCompound(type='var', matched=xs):
+                out.append(self.var_to_url(inner_text(xs)))
+
+            case MatchCompound(type=type, matched=xs):
+                body = ''.join(self.reserialize(x) for x in xs)
+                out.append(f'<span class="{type}">{body}</span>')
+
+            case [*xs]:
+                out.extend(self.reserialize(x) for x in xs)
+
+            case rest:
+                logger.error("Unknown type: %a", rest)
+
+        return ''.join(out)
+
+    def var_to_url(self, var: str) -> str:
+        """
+        Format variable, adding hyperlink to its definition.
+
+        TODO these can refer to both defined types (`manifests/*.pp`),
+        as well as resource types (`lib/puppet/provider/*/*.rb` /
+        `lib/tpuppet/type/*.rb`)
+
+        Same goes for functions (`functions/*.pp`),
+        (`lib/puppet/functions.rb`).
+
+        :param var:
+            Name of the variable.
+
+        :return:
+            An HTML anchor element.
+        """
+        match var.split('::'):
+            case [name]:
+                # Either a local or global variable
+                # https://www.puppet.com/docs/puppet/7/lang_facts_and_builtin_vars.html
+
+                href = None
+                cls = ''
+                if name in self.local_vars:
+                    href = f'#{html.escape(var)}'
+                elif name in _built_in_variables:
+                    href = html.escape(_built_in_variables[name])
+                    cls = 'puppet-doc'
+
+                if href:
+                    return f'<a class="var {cls}" href="{href}">{var}</a>'
+                else:
+                    # `name` refers to a global fact.
+                    return f'<span class="var">{var}</span>'
+
+            case ['', name]:
+                # A global variable
+                if name in _built_in_variables:
+                    href = html.escape(_built_in_variables[name])
+                    img = '<img src="/code/muppet-strings/output/static/favicon.ico" />'
+                    return f'<a class="var" href="{href}">{var}{img}</a>'
+                else:
+                    return f'<span class="var">{var}</span>'
+
+            # Note the "special module" 'settings',
+            # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-server-variables
+            case ['', module, *items, name]:
+                s = '/code/muppet-strings/output/' \
+                    + '/'.join([module, 'manifests', *(items if items else ['init'])])
+                s += f'#{name}'
+                return f'<a class="var" href="{s}">{var}</a>'
+            case [module, *items, name]:
+                s = '/code/muppet-strings/output/' \
+                    + '/'.join([module, 'manifests', *(items if items else ['init'])])
+                s += f'#{name}'
+                return f'<a class="var" href="{s}">{var}</a>'
+            case _:
+                raise ValueError()
+
+
+def hyperlink_puppet_source(source: str, file: str, in_parameters: list[str]) -> str:
+    """
+    Parse and syntax highlight the given puppet source.
+
+    :returns: An HTML string
+    """
+    # Run the upstream puppet parser,
+    # then masage the tree into a usable form.
+    ast = build_ast(puppet_parser(source))
+
+    # From the ast, build a parser combinator parser.
+    # This parser will attach sufficient metadata to allow syntax
+    # highlighting and hyperlinking
+    parser = ParserFormatter().serialize(ast)
+
+    # Run the generated parser, giving us a list of match objects.
+    match_objects = ParserCombinator(source, file).get(parser)
+
+    # Reserialize the matched data back into puppet code, realizing
+    # the syntax highlighting and hyperlinks.
+    return _PuppetReserializer(_find_declarations(match_objects) + (in_parameters)) \
+        .reserialize(match_objects)
+
+
+def name_to_url(name: str) -> tuple[str | None, str]:
+    """
+    Resolve a class or resource name into an url.
+
+    :param name:
+        The name of a class or resource, surch as "example::resource".
+    :return:
+        A tuple consisting of
+
+        - One of
+          - An internal link to the definition of that type
+          - A link to the official puppet documentation
+          - ``None``, if `name` is "class"
+        - A string indicating extra HTML classes for this url.
+          This is mostly so external references can be marked properly.
+    """
+    if name in _built_in_types:
+        return (f'https://www.puppet.com/docs/puppet/7/types/{name}.html', 'puppet-doc')
+    elif name == 'class':
+        return (None, '')
+    else:
+        # TODO special cases for puppet's built in types.
+        # https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html
+        module, *items = name.lstrip(':').split('::')
+        # TODO get prefix from the command line/config file
+        return ('/code/muppet-strings/output/'
+                + '/'.join([module, 'manifests', *(items if items else ['init'])]),
+                '')
diff --git a/muppet/output/util.py b/muppet/output/util.py
new file mode 100644
index 0000000..b1d69f5
--- /dev/null
+++ b/muppet/output/util.py
@@ -0,0 +1,44 @@
+"""
+Misc utilities for the final output.
+
+These don't really belong to any sub-system, even though some are more
+useful than other.
+
+The aim is to only have pure functions here.
+"""
+
+from muppet.parser_combinator import (
+    MatchCompound,
+    MatchObject,
+)
+
+
+def inner_text(obj: MatchObject | list[MatchObject]) -> str:
+    """
+    Extract the text content from a set of MatchObjects.
+
+    This is really similar to HTML's inner_text.
+
+    Empty whitespace tags are expanded into nothing, non-empty
+    whitespace tags becomes a single space (note that this discards
+    commets).
+
+    This only works properly if no function was mapped over the parser
+    return values in tree, see :func:`muppet.parser_combinator.fmap`.
+
+    :param obj:
+        Match Objects to search.
+    """
+    match obj:
+        case str(s):
+            return s
+        case MatchCompound(type='ws', matched=[]):
+            return ''
+        case MatchCompound(type='ws'):
+            return ' '
+        case MatchCompound(matched=xs):
+            return ''.join(inner_text(x) for x in xs)
+        case [*xs]:
+            return ''.join(inner_text(x) for x in xs)
+        case _:
+            raise ValueError('How did we get here')
-- 
GitLab