From 4892081b9950d760bea325e6afc2047ca1913219 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se> Date: Mon, 25 Sep 2023 16:08:32 +0200 Subject: [PATCH] Start splitting output back into modules. The difference between this split, and the old, is that everything is now clearly under output. --- doc/muppet.rst | 3 +- muppet/{output.py => output/__init__.py} | 783 +++-------------------- muppet/output/docstring.py | 258 ++++++++ muppet/output/puppet_source.py | 418 ++++++++++++ muppet/output/util.py | 44 ++ 5 files changed, 808 insertions(+), 698 deletions(-) rename muppet/{output.py => output/__init__.py} (58%) create mode 100644 muppet/output/docstring.py create mode 100644 muppet/output/puppet_source.py create mode 100644 muppet/output/util.py diff --git a/doc/muppet.rst b/doc/muppet.rst index 5a724a0..88d90ba 100644 --- a/doc/muppet.rst +++ b/doc/muppet.rst @@ -7,6 +7,7 @@ Subpackages .. toctree:: :maxdepth: 4 + muppet.output muppet.puppet muppet.syntax_highlight @@ -21,11 +22,9 @@ Submodules muppet.intersperse muppet.lookup muppet.markdown - muppet.output muppet.parser_combinator muppet.symbols muppet.tabs - muppet.templates muppet.util Module contents diff --git a/muppet/output.py b/muppet/output/__init__.py similarity index 58% rename from muppet/output.py rename to muppet/output/__init__.py index 2d3440a..3e1b59a 100644 --- a/muppet/output.py +++ b/muppet/output/__init__.py @@ -11,8 +11,8 @@ import os.path import pathlib import re import json +from glob import glob from typing import ( - Any, Optional, Protocol, Sequence, @@ -28,18 +28,7 @@ from muppet.puppet.strings import ( DataTypeAlias, DefinedType, DocString, - DocStringApiTag, - DocStringAuthorTag, - DocStringExampleTag, - DocStringOptionTag, - DocStringOverloadTag, DocStringParamTag, - DocStringRaiseTag, - DocStringReturnTag, - DocStringSeeTag, - DocStringSinceTag, - DocStringSummaryTag, - DocStringTag, Function, PuppetClass, PuppetStrings, @@ -48,20 +37,16 @@ from muppet.puppet.strings import ( puppet_strings_cached, ) from muppet.parser_combinator import ( - ParserCombinator, ParseError, - MatchCompound, - MatchObject, ) from muppet.markdown import markdown from muppet.breadcrumbs import Breadcrumbs, breadcrumbs from muppet.util import group_by, partition from muppet.cache import AbstractCache -from muppet.puppet.parser import puppet_parser -from muppet.puppet.ast import build_ast -from muppet.puppet.format.parser import ParserFormatter +from .docstring import format_docstring +from .puppet_source import hyperlink_puppet_source jinja = Environment( @@ -73,11 +58,32 @@ jinja = Environment( logger = logging.getLogger(__name__) -param_doc: dict[str, str] = {} +class HtmlSerializable(Protocol): + """Classes which can be serialized as HTML.""" + + def to_html(self) -> str: # pragma: no cover + """Return HTML string.""" + ... + + def to_html_list(self) -> str: # pragma: no cover + """Return HTML suitable for a list.""" + ... class Templates: - """Namespace for templates.""" + """ + Namespace for templates. + + Almost all of these methods take these values: + + :param path_base: + Prefix added to all links within output + TODO shouldn't this be bound to the object, since it should + NEVER change (during a run). + + :path breadcrumbs: + Breadcrumb to current page. + """ def __init__(self) -> None: self.jinja = Environment( @@ -90,7 +96,15 @@ class Templates: path_base: str, breadcrumbs: Optional[Breadcrumbs] = None ) -> str: # pragma: no cover - """Template for a page containing puppet code.""" + """ + Template for a page containing puppet code. + + :param content: + Top level h1 tag of page. + + :param content: + Free form string content, being the main body of the page. + """ template = self.jinja.get_template('code_page.html') return template.render( title=title, @@ -103,7 +117,12 @@ class Templates: path_base: str, breadcrumbs: Optional[Breadcrumbs] = None ) -> str: # pragma: no cover - """Template for a page with arbitrary content.""" + """ + Template for a page with arbitrary content. + + :param content: + Arbitrary content + """ template = self.jinja.get_template('content.html') return template.render( content=content, @@ -115,7 +134,12 @@ class Templates: path_base: str, breadcrumbs: Optional[Breadcrumbs] = None ) -> str: # pragma: no cover - """Root index file.""" + """ + Environment index file. + + :param modules: + List of PuppetModules to include in the environment index. + """ template = self.jinja.get_template('index.html') return template.render( path_base=path_base, @@ -124,7 +148,7 @@ class Templates: def module_index(self, *, # content: list[], # something with to_html_list and to_html - content: list[Any], # TODO something with to_html_list and to_html + content: Sequence[HtmlSerializable], module_author: str, module_name: str, doc_files: list[tuple[str, str]], @@ -133,7 +157,33 @@ class Templates: left_sidebar: Optional[str] = None, right_sidebar: Optional[str] = None, ) -> str: # pragma: no cover - """Index for a single module.""" + """ + Index for a single module. + + :param content: + Main content of the page. + :param module_author: + Author of the module, as it appears in the metadata.json file. + :param module_name: + Name of the module, without the author component. + :param doc_files: + The free-form documentation files bundled with the module. + + Each element should be a pair of + - The idealized name of the file + - The relative path to the document inside the output + (the HTML generated version) + + :param left_sidebar: + Free form content of the left sidebar. + + This is assumed to be a list of modules in the environment. + :param right_sidebar: + Free form content of the right sidebar. + + This is assumed to be a table of contents of the module, + really similar to the actual body contents. + """ template = self.jinja.get_template('module_index.html') return template.render( content=content, @@ -149,18 +199,6 @@ class Templates: templates = Templates() -class HtmlSerializable(Protocol): - """Classes which can be serialized as HTML.""" - - def to_html(self) -> str: # pragma: no cover - """Return HTML string.""" - ... - - def to_html_list(self) -> str: # pragma: no cover - """Return HTML suitable for a list.""" - ... - - @dataclass class ResourceTypeOutput: """Basic HTML implementation.""" @@ -381,8 +419,10 @@ class PuppetModule: self.toc = self._build_module_toc() self.output_prefix = output_prefix - # TODO - self.doc_files: list[str] = [] + abspath = os.path.abspath(self.path) + self.doc_files: list[str] = \ + glob(os.path.join(abspath, '*.md')) + \ + glob(os.path.join(abspath, 'LICENSE')) try: with open(os.path.join(self.path, 'metadata.json')) as f: @@ -390,7 +430,7 @@ class PuppetModule: except FileNotFoundError: self.metadata = {} - def _build_module_toc(self) -> list[ResourceIndex | IndexCategory]: + def _build_module_toc(self) -> Sequence[ResourceIndex | IndexCategory]: """Build the TOC of the module.""" content: list[ResourceIndex | IndexCategory] = [] @@ -482,8 +522,8 @@ class PuppetModule: breadcrumbs=crumbs, content=toc, path_base=self.output_prefix, - # doc_files=list(doc_files.items()) # TODO + # doc_files=self.doc_files, doc_files=[], # left_sidebar=(), right_sidebar=''.join([ @@ -872,158 +912,6 @@ def index_item(obj: PuppetClass | DefinedType) -> IndexItem: return out -def format_docstring(name: str, docstring: DocString) -> tuple[str, str]: - """ - Format docstrings as they appear in some puppet types. - - Those types being: - - * puppet_classes, - * puppet_type_aliases, and - * defined_types - """ - global param_doc - - # The api tag is ignored, since it instead is shown from context - - out = '' - - param_doc = {tag.name: tag.text or '' - for tag in docstring.tags - if isinstance(tag, DocStringParamTag)} - - grouped_tags = GroupedTags.from_taglist(docstring.tags) - - # -------------------------------------------------- - - out += '<a href="#code">Jump to Code</a><br/>' - - if tags := grouped_tags.summary_: - out += '<em class="summary">' - for tag in tags: - out += html.escape(tag.text) - out += '</em>' - - out += '<div class="description">' - # TODO "TODO" highlighting - out += markdown(docstring.text) - out += '</div>' - - # TODO proper handling of multiple @see tags - if sees := grouped_tags.see_: - out += '<b>See</b> ' - for see in sees: - link: str - m = re.match(r'((?P<url>https?://.*)|(?P<man>.*\([0-9]\))|(?P<other>.*))', see.name) - assert m, "Regex always matched" - if m['url']: - link = f'<a href="{see.name}">{see.name}</a>' - out += link - elif m['man']: - page = see.name[:-3] - section = see.name[-2] - # TODO man providers - link = f"https://manned.org/man/{page}.{section}" - out += link - else: - if '::' in m['other']: - # TODO - pass - else: - # TODO - # link = see - pass - out += m['other'] - out += ' ' + see.text - - if authors := grouped_tags.author_: - out += '<div class="author">' - out += "<em>Written by </em>" - if len(authors) == 1: - out += parse_author(authors[0].text) - else: - out += '<ul>' - for author in authors: - out += f'<li>{parse_author(author.text)}</li>' - out += '</ul>' - out += '</div>' - - out += '<hr/>' - - t: DocStringTag - - for t in grouped_tags .example_: - out += '<div class="code-example">' - - if name := t.name: - # TODO markup for title - out += f'<div class="code-example-header">{html.escape(name)}</div>\n' - # TODO highlight? - # Problem is that we don't know what language the example - # is in. Pygemntize however seems to do a reasonable job - # treating anything as puppet code - text = html.escape(t.text) - out += f'<pre><code class="puppet">{text}</code></pre>\n' - out += '</div>' - - out += '<hr/>' - - out += '<dl>' - for t in grouped_tags.param_: - name = html.escape(t.name) - out += f'<dt><span id="{name}" class="variable">{name}</span>' - match t.types: - case [x]: - # TODO highlight type? - out += f': <code>{html.escape(x)}</code>' - case [_, *_]: - raise ValueError("How did you get multiple types onto a parameter?") - - # TODO Fetch default values from puppet strings output - # Then in javascript query Hiera to get the true "default" - # values for a given machine (somewhere have a setting for - # selecting machine). - out += '</dt>' - - if text := t.text: - text = re.sub(r'(NOTE|TODO)', - r'<mark>\1</mark>', - markdown(text)) - - if options := grouped_tags.option_.get(t.name): - text += '<dl>' - for option in options: - text += '<dt>' - text += html.escape(option.opt_name) - match option.opt_types: - case [x]: - text += f' [<code>{html.escape(x)}</code>]' - case [_, *_]: - raise ValueError("How did you get multiple types onto an option?") - text += '</dt>' - text += '<dd>' - if option.opt_text: - text += re.sub(r'(NOTE|TODO)', - r'<mark>\1</mark>', - markdown(option.opt_text)) - text += '</dd>' - text += '</dl>' - - out += f"<dd>{text}</dd>" - else: - out += '<dd><em>Undocumented</em></dd>' - out += '</dl>' - - # TODO remaining tags - # "overload" - # raise - # return - # since - # _other - - return (name, out) - - def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]: """Format Puppet class.""" out = '' @@ -1033,7 +921,7 @@ def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]: out += body # ------ Old --------------------------------------- - # t = parse_puppet(d_type.source) + # t = hyperlink_puppet_source(d_type.source) # data = parse(t, 0, ['root']) # renderer = HTMLRenderer(build_param_dict(d_type.docstring)) # out += render(renderer, data) @@ -1047,7 +935,8 @@ def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]: in_parameters.append(cast(DocStringParamTag, tag).name) try: - result = parse_puppet(d_type.source, d_type.name, in_parameters) + # Calculation beforehand, for "atomic" formatting + result = hyperlink_puppet_source(d_type.source, d_type.name, in_parameters) out += '<pre class="highlight-muppet"><code class="puppet">' out += result out += '</code></pre>' @@ -1068,53 +957,6 @@ def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]: return name, out -@dataclass -class GroupedTags: - """ - All tags from a class (or similar) docstring. - - Most fields are simply lists of tags. The reason for trailing - underscores on each entry is since some tag names collide with - python keywords (e.g. ``raise``). - """ - - param_: list[DocStringParamTag] = field(default_factory=list) # noqa: E221 - example_: list[DocStringExampleTag] = field(default_factory=list) # noqa: E221 - overload_: list[DocStringOverloadTag] = field(default_factory=list) # noqa: E221 - option_: dict[str, list[DocStringOptionTag]] = field(default_factory=dict) # noqa: E221 - """ - Options document Hash parameters valid values. - - Each key is the corresponding parameter, and the value is the list - of registered options for that hash. - """ - - author_: list[DocStringAuthorTag] = field(default_factory=list) # noqa: E221 - api_: list[DocStringApiTag] = field(default_factory=list) # noqa: E221 - raise_: list[DocStringRaiseTag] = field(default_factory=list) # noqa: E221 - return_: list[DocStringReturnTag] = field(default_factory=list) # noqa: E221 - since_: list[DocStringSinceTag] = field(default_factory=list) # noqa: E221 - summary_: list[DocStringSummaryTag] = field(default_factory=list) # noqa: E221 - see_: list[DocStringSeeTag] = field(default_factory=list) # noqa: E221 - other_: list[DocStringTag] = field(default_factory=list) # noqa: E221 - """All tags of unknown type.""" - - @classmethod - def from_taglist(cls, tags: list[DocStringTag]) -> 'GroupedTags': - """Group a list of tags.""" - grouped_tags = cls() - for tag in tags: - if tag.tag_name == 'option': - tag = cast(DocStringOptionTag, tag) - grouped_tags.option_.setdefault(tag.parent, []).append(tag) - elif tag.tag_name in {'param', 'example', 'overload', 'author', 'api', - 'raise', 'return', 'since', 'summary', 'see'}: - getattr(grouped_tags, tag.tag_name + '_').append(tag) - else: - grouped_tags.other_.append(tag) - return grouped_tags - - def build_param_dict(docstring: DocString) -> dict[str, str]: """ Extract all parameter documentation from a docstring dict. @@ -1155,7 +997,7 @@ def format_type_alias(d_type: DataTypeAlias, file: str) -> tuple[str, str]: out += '\n' out += '<pre class="highlight-muppet"><code class="puppet">' try: - out += parse_puppet(d_type.alias_of, file, []) + out += hyperlink_puppet_source(d_type.alias_of, file, []) except ParseError as e: logger.error("Parsing %(name)s failed: %(err)s", {'name': d_type.alias_of, 'err': e}) @@ -1175,7 +1017,7 @@ def format_defined_type(d_type: DefinedType, file: str) -> tuple[str, str]: out += '<pre class="highlight-muppet"><code class="puppet">' try: - out += parse_puppet(d_type.source, file, []) + out += hyperlink_puppet_source(d_type.source, file, []) except ParseError as e: logger.error("Parsing %(name)s failed: %(err)s", {'name': d_type.source, 'err': e}) @@ -1240,7 +1082,7 @@ def format_puppet_function(function: Function, file: str) -> str: elif t == 'puppet': out += '<pre class="highlight-muppet"><code class="puppet">' try: - out += parse_puppet(function.source, file, []) + out += hyperlink_puppet_source(function.source, file, []) except ParseError as e: logger.error("Parsing %(name)s failed: %(err)s", {'name': function.source, 'err': e}) @@ -1261,454 +1103,3 @@ def format_puppet_task() -> str: def format_puppet_plan() -> str: """Format Puppet plan.""" return 'TODO format_puppet_plan not implemented' - - -def inner_text(obj: MatchObject | list[MatchObject]) -> str: - """ - Extract the text content from a set of MatchObjects. - - This is really similar to HTML's inner_text. - - Empty whitespace tags are expanded into nothing, non-empty - whitespace tags becomes a single space (note that this discards - commets). - - This only works properly if no function was mapped over the parser - return values in tree, see :func:`muppet.parser_combinator.fmap`. - - :param obj: - Match Objects to search. - """ - match obj: - case str(s): - return s - case MatchCompound(type='ws', matched=[]): - return '' - case MatchCompound(type='ws'): - return ' ' - case MatchCompound(matched=xs): - return ''.join(inner_text(x) for x in xs) - case [*xs]: - return ''.join(inner_text(x) for x in xs) - case _: - raise ValueError('How did we get here') - - -def name_to_url(name: str) -> tuple[str | None, str]: - """ - Resolve a class or resource name into an url. - - :param name: - The name of a class or resource, surch as "example::resource". - :return: - A tuple consisting of - - - One of - - An internal link to the definition of that type - - A link to the official puppet documentation - - ``None``, if `name` is "class" - - A string indicating extra HTML classes for this url. - This is mostly so external references can be marked properly. - """ - if name in built_in_types: - return (f'https://www.puppet.com/docs/puppet/7/types/{name}.html', 'puppet-doc') - elif name == 'class': - return (None, '') - else: - # TODO special cases for puppet's built in types. - # https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html - module, *items = name.lstrip(':').split('::') - # TODO get prefix from the command line/config file - return ('/code/muppet-strings/output/' - + '/'.join([module, 'manifests', *(items if items else ['init'])]), - '') - - -puppet_doc_base = 'https://www.puppet.com/docs/puppet/7' -lang_facts_builtin_variables = (f'{puppet_doc_base}/lang_facts_builtin_variables' - '#lang_facts_builtin_variables') -server_variables = f'{lang_facts_builtin_variables}-server-variables' -compiler_variables = f'{lang_facts_builtin_variables}-compiler-variables' -trusted_facts = f'{lang_facts_builtin_variables}-trusted-facts' -server_facts = f'{lang_facts_builtin_variables}-server-facts' - -built_in_variables = { - 'facts': 'https://google.com', - # clientcert, clientversion, puppetversion, clientnoop, - # agent_specified_environment: - # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-agent-facts - 'trusted': trusted_facts, - 'server_facts': server_facts, - 'environment': server_variables, - 'servername': server_variables, - 'serverip': server_variables, - 'serverversion': server_variables, - 'module_name': compiler_variables, - 'caller_module_name': compiler_variables, - - # Also note the special variable $title and $name - # https://www.puppet.com/docs/puppet/7/lang_defined_types#lang_defined_types-title-and-name -} - - -def parse_author(author: str) -> str: - """ - Format author tags' content. - - :param author: - The contents of the author tag. If the string is on the - regular "author" format of ``"Firstname Lastname - <first.last@example.com>"`` then the email will be formatted - and hyperlinked. Otherwise the string is returned verbatim. - :return: - An HTML safe string, possibly including tags. - """ - m = re.match(r'(?P<author>.*) (<(?P<email>.*)>)|(?P<any>.*)', author) - assert m, "The above regex can't fail" - if m['author'] and m['email']: - author = html.escape(m['author']) - email = html.escape(m['email']) - return f'{author} <a class="email" href="mailto:{email}"><{email}></a>;' - else: - return html.escape(m['any']) - - -# https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html -# https://www.puppet.com/docs/puppet/7/types/file.html -# ... -built_in_types = { - 'package', - 'file', - 'service', - 'notify', - 'exec', - 'user', - 'group', -} - -# https://www.puppet.com/docs/puppet/7/function.html#{} -built_in_functions = { - 'abs', - 'alert', - 'all', - 'annotate', - 'any', - 'assert_type', - 'binary_file', - 'break', - 'call', - 'camelcase', - 'capitalize', - 'ceiling', - 'chomp', - 'chop', - 'compare', - 'contain', - 'convert_to', - 'create_resources', - 'crit', - 'debug', - 'defined', - 'dig', - 'digest', - 'downcase', - 'each', - 'emerg', - 'empty', - 'epp', - 'err', - 'eyaml_lookup_key', - 'fail', - 'file', - 'filter', - 'find_file', - 'find_template', - 'flatten', - 'floor', - 'fqdn_rand', - 'generate', - 'get', - 'getvar', - 'group_by', - 'hiera', - 'hiera_array', - 'hiera_hash', - 'hiera_include', - 'hocon_data', - 'import', - 'include', - 'index', - 'info', - 'inline_epp', - 'inline_template', - 'join', - 'json_data', - 'keys', - 'length', - 'lest', - 'lookup', - 'lstrip', - 'map', - 'match', - 'max', - 'md5', - 'min', - 'module_directory', - 'new', - 'next', - 'notice', - 'partition', - 'realize', - 'reduce', - 'regsubst', - 'require', - 'return', - 'reverse_each', - 'round', - 'rstrip', - 'scanf', - 'sha1', - 'sha256', - 'shellquote', - 'size', - 'slice', - 'sort', - 'split', - 'sprintf', - 'step', - 'strftime', - 'strip', - 'tag', - 'tagged', - 'template', - 'then', - 'tree_each', - 'type', - 'unique', - 'unwrap', - 'upcase', - 'values', - 'versioncmp', - 'warning', - 'with', - 'yaml_data', -} - - -def find_declarations(objs: list[MatchObject]) -> list[str]: - """ - Find all local variable declarations. - - Searches the code for all local variable declarations, returing a - list of variable names. - - Note that the same variable might appear multiple times, for example: - - .. code-block:: puppet - :caption: The same variable being declared twice - - if $something { - $x = 10 - } else { - $x = 20 - } - """ - declarations = [] - for obj in objs: - match obj: - case MatchCompound(type='declaration', matched=xs): - for x in xs: - match x: - case MatchCompound(type='var', matched=ys): - declarations.append(inner_text(ys)) - return declarations - - -class Reserializer: - """ - Context for reserializing parsed data back into code. - - :param local_vars: - Variables declared within this file. Used when resolving - hyperlinks. - """ - - def __init__(self, local_vars: list[str]): - self.local_vars: list[str] = local_vars - - def reserialize(self, - obj: MatchObject | Sequence[MatchObject]) -> str: - """ - Reconstruct puppet code after parsing it. - - After building the parser, and parsing the puppet code into a tree - of MatchObjects; this procedure returns it into puppet code. - Difference being that we now have metadata, meaning that syntax - highlighting and variable hyperlinks can be inserted. - - :param obj: - Should be assumed to be a list of MatchObject's, or something similar. - - MatchCompound objects are serialized as - - .. code-block:: html - - <span class="{type}">{body}</span> - - strings as themselves, and lists have reserialize mapped over them. - - """ - out: list[str] = [] - # logger.info("obj = %a", obj) - - # TODO hyperlink functions. - # The problem is that a function can either be implemented in - # Puppet, or in Ruby. And Ruby functions' names aren't bound - # by the directory layout. - match obj: - case str(s): - out.append(html.escape(s)) - - case MatchCompound(type='resource-name', matched=xs): - name = inner_text(xs) - url, cls = name_to_url(name) - if url: - out.append(f'<a href="{url}" class="resource-name {cls}">{name}</a>') - else: - # TODO this is class, but the class name should - # also be hyperlinked - out.append(f'<span class="resource-name {cls}">{name}</span>') - - case MatchCompound(type='invoke', matched=xs): - function = None - for x in xs: - match x: - case MatchCompound(type='qn', matched=ys): - if function is None: - function = inner_text(ys) - if function in built_in_functions: - # class="qn" - url = f"https://www.puppet.com/docs/puppet/7/function.html#{function}" # noqa: E501 - tag = f'<a href="{url}" class="puppet-doc">{self.reserialize(ys)}</a>' # noqa: E501 - out.append(tag) - else: - # TODO function to url - out.append(f'<span class="qn">{self.reserialize(ys)}</span>') - else: - if function == 'include': - url, cls = name_to_url(inner_text(ys)) - # class="qn" - tag = f'<a href="{url}" class="{cls}">{self.reserialize(ys)}</a>' # noqa: E501 - out.append(tag) - else: - out.append(self.reserialize(ys)) - case _: - out.append(self.reserialize(x)) - - case MatchCompound(type='declaration', matched=xs): - for x in xs: - match x: - case MatchCompound(type='var', matched=ys): - inner = ''.join(self.reserialize(y) for y in ys) - out.append(f'<span id="{inner_text(ys)}">{inner}</span>') - case _: - out.append(self.reserialize(x)) - - case MatchCompound(type='var', matched=xs): - out.append(self.var_to_url(inner_text(xs))) - - case MatchCompound(type=type, matched=xs): - body = ''.join(self.reserialize(x) for x in xs) - out.append(f'<span class="{type}">{body}</span>') - - case [*xs]: - out.extend(self.reserialize(x) for x in xs) - - case rest: - logger.error("Unknown type: %a", rest) - - return ''.join(out) - - def var_to_url(self, var: str) -> str: - """ - Format variable, adding hyperlink to its definition. - - TODO these can refer to both defined types (`manifests/*.pp`), - as well as resource types (`lib/puppet/provider/*/*.rb` / - `lib/tpuppet/type/*.rb`) - - Same goes for functions (`functions/*.pp`), - (`lib/puppet/functions.rb`). - - :param var: - Name of the variable. - - :return: - An HTML anchor element. - """ - match var.split('::'): - case [name]: - # Either a local or global variable - # https://www.puppet.com/docs/puppet/7/lang_facts_and_builtin_vars.html - - href = None - cls = '' - if name in self.local_vars: - href = f'#{html.escape(var)}' - elif name in built_in_variables: - href = html.escape(built_in_variables[name]) - cls = 'puppet-doc' - - if href: - return f'<a class="var {cls}" href="{href}">{var}</a>' - else: - # `name` refers to a global fact. - return f'<span class="var">{var}</span>' - - case ['', name]: - # A global variable - if name in built_in_variables: - href = html.escape(built_in_variables[name]) - img = '<img src="/code/muppet-strings/output/static/favicon.ico" />' - return f'<a class="var" href="{href}">{var}{img}</a>' - else: - return f'<span class="var">{var}</span>' - - # Note the "special module" 'settings', - # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-server-variables - case ['', module, *items, name]: - s = '/code/muppet-strings/output/' \ - + '/'.join([module, 'manifests', *(items if items else ['init'])]) - s += f'#{name}' - return f'<a class="var" href="{s}">{var}</a>' - case [module, *items, name]: - s = '/code/muppet-strings/output/' \ - + '/'.join([module, 'manifests', *(items if items else ['init'])]) - s += f'#{name}' - return f'<a class="var" href="{s}">{var}</a>' - case _: - raise ValueError() - - -def parse_puppet(source: str, file: str, in_parameters: list[str]) -> str: - """ - Parse and syntax highlight the given puppet source. - - :returns: An HTML string - """ - # Run the upstream puppet parser, - # then masage the tree into a usable form. - ast = build_ast(puppet_parser(source)) - - # From the ast, build a parser combinator parser. - # This parser will attach sufficient metadata to allow syntax - # highlighting and hyperlinking - parser = ParserFormatter().serialize(ast) - - # Run the generated parser, giving us a list of match objects. - match_objects = ParserCombinator(source, file).get(parser) - - # Reserialize the matched data back into puppet code, realizing - # the syntax highlighting and hyperlinks. - return Reserializer(find_declarations(match_objects) + (in_parameters)) \ - .reserialize(match_objects) diff --git a/muppet/output/docstring.py b/muppet/output/docstring.py new file mode 100644 index 0000000..b85676c --- /dev/null +++ b/muppet/output/docstring.py @@ -0,0 +1,258 @@ +""" +Generate output for Puppet Docstrings. + +Docstrings are the functions preceeding any top level puppet +declaration (such as classes, rosource definitions, ...). These have a +number of "magic" tags for attaching metadata, along with usually +being Markdown formatted. This module assumes that they all are +Markdown formatted, which unfortunately leads to some (minor) errors. + +(The final output also contains the original source, allowing these +errors to be overlooked). +""" + +from dataclasses import dataclass, field +import html +import re +from typing import cast +from muppet.markdown import markdown +from muppet.puppet.strings import ( + DocString, + DocStringApiTag, + DocStringAuthorTag, + DocStringExampleTag, + DocStringOptionTag, + DocStringOverloadTag, + DocStringParamTag, + DocStringRaiseTag, + DocStringReturnTag, + DocStringSeeTag, + DocStringSinceTag, + DocStringSummaryTag, + DocStringTag, +) + + +# TODO what even is this for? +param_doc: dict[str, str] = {} + + +@dataclass +class GroupedTags: + """ + All tags from a class (or similar) docstring. + + Most fields are simply lists of tags. The reason for trailing + underscores on each entry is since some tag names collide with + python keywords (e.g. ``raise``). + """ + + param_: list[DocStringParamTag] = field(default_factory=list) # noqa: E221 + example_: list[DocStringExampleTag] = field(default_factory=list) # noqa: E221 + overload_: list[DocStringOverloadTag] = field(default_factory=list) # noqa: E221 + option_: dict[str, list[DocStringOptionTag]] = field(default_factory=dict) # noqa: E221 + """ + Options document Hash parameters valid values. + + Each key is the corresponding parameter, and the value is the list + of registered options for that hash. + """ + + author_: list[DocStringAuthorTag] = field(default_factory=list) # noqa: E221 + api_: list[DocStringApiTag] = field(default_factory=list) # noqa: E221 + raise_: list[DocStringRaiseTag] = field(default_factory=list) # noqa: E221 + return_: list[DocStringReturnTag] = field(default_factory=list) # noqa: E221 + since_: list[DocStringSinceTag] = field(default_factory=list) # noqa: E221 + summary_: list[DocStringSummaryTag] = field(default_factory=list) # noqa: E221 + see_: list[DocStringSeeTag] = field(default_factory=list) # noqa: E221 + other_: list[DocStringTag] = field(default_factory=list) # noqa: E221 + """All tags of unknown type.""" + + @classmethod + def from_taglist(cls, tags: list[DocStringTag]) -> 'GroupedTags': + """Group a list of tags.""" + grouped_tags = cls() + for tag in tags: + if tag.tag_name == 'option': + tag = cast(DocStringOptionTag, tag) + grouped_tags.option_.setdefault(tag.parent, []).append(tag) + elif tag.tag_name in {'param', 'example', 'overload', 'author', 'api', + 'raise', 'return', 'since', 'summary', 'see'}: + getattr(grouped_tags, tag.tag_name + '_').append(tag) + else: + grouped_tags.other_.append(tag) + return grouped_tags + + +def parse_author(author: str) -> str: + """ + Format author tags' content. + + :param author: + The contents of the author tag. If the string is on the + regular "author" format of ``"Firstname Lastname + <first.last@example.com>"`` then the email will be formatted + and hyperlinked. Otherwise the string is returned verbatim. + :return: + An HTML safe string, possibly including tags. + """ + m = re.match(r'(?P<author>.*) (<(?P<email>.*)>)|(?P<any>.*)', author) + assert m, "The above regex can't fail" + if m['author'] and m['email']: + author = html.escape(m['author']) + email = html.escape(m['email']) + return f'{author} <a class="email" href="mailto:{email}"><{email}></a>;' + else: + return html.escape(m['any']) + + +def format_docstring(name: str, docstring: DocString) -> tuple[str, str]: + """ + Format docstrings as they appear in some puppet types. + + Those types being: + + * puppet_classes, + * puppet_type_aliases, and + * defined_types + """ + global param_doc + + # The api tag is ignored, since it instead is shown from context + + out = '' + + param_doc = {tag.name: tag.text or '' + for tag in docstring.tags + if isinstance(tag, DocStringParamTag)} + + grouped_tags = GroupedTags.from_taglist(docstring.tags) + + # -------------------------------------------------- + + out += '<a href="#code">Jump to Code</a><br/>' + + if tags := grouped_tags.summary_: + out += '<em class="summary">' + for tag in tags: + out += html.escape(tag.text) + out += '</em>' + + out += '<div class="description">' + # TODO "TODO" highlighting + out += markdown(docstring.text) + out += '</div>' + + # TODO proper handling of multiple @see tags + if sees := grouped_tags.see_: + out += '<b>See</b> ' + for see in sees: + link: str + m = re.match(r'((?P<url>https?://.*)|(?P<man>.*\([0-9]\))|(?P<other>.*))', see.name) + assert m, "Regex always matched" + if m['url']: + link = f'<a href="{see.name}">{see.name}</a>' + out += link + elif m['man']: + page = see.name[:-3] + section = see.name[-2] + # TODO man providers + link = f"https://manned.org/man/{page}.{section}" + out += link + else: + if '::' in m['other']: + # TODO + pass + else: + # TODO + # link = see + pass + out += m['other'] + out += ' ' + see.text + + if authors := grouped_tags.author_: + out += '<div class="author">' + out += "<em>Written by </em>" + if len(authors) == 1: + out += parse_author(authors[0].text) + else: + out += '<ul>' + for author in authors: + out += f'<li>{parse_author(author.text)}</li>' + out += '</ul>' + out += '</div>' + + out += '<hr/>' + + t: DocStringTag + + for t in grouped_tags .example_: + out += '<div class="code-example">' + + if name := t.name: + # TODO markup for title + out += f'<div class="code-example-header">{html.escape(name)}</div>\n' + # TODO highlight? + # Problem is that we don't know what language the example + # is in. Pygemntize however seems to do a reasonable job + # treating anything as puppet code + text = html.escape(t.text) + out += f'<pre><code class="puppet">{text}</code></pre>\n' + out += '</div>' + + out += '<hr/>' + + out += '<dl>' + for t in grouped_tags.param_: + name = html.escape(t.name) + out += f'<dt><span id="{name}" class="variable">{name}</span>' + match t.types: + case [x]: + # TODO highlight type? + out += f': <code>{html.escape(x)}</code>' + case [_, *_]: + raise ValueError("How did you get multiple types onto a parameter?") + + # TODO Fetch default values from puppet strings output + # Then in javascript query Hiera to get the true "default" + # values for a given machine (somewhere have a setting for + # selecting machine). + out += '</dt>' + + if text := t.text: + text = re.sub(r'(NOTE|TODO)', + r'<mark>\1</mark>', + markdown(text)) + + if options := grouped_tags.option_.get(t.name): + text += '<dl>' + for option in options: + text += '<dt>' + text += html.escape(option.opt_name) + match option.opt_types: + case [x]: + text += f' [<code>{html.escape(x)}</code>]' + case [_, *_]: + raise ValueError("How did you get multiple types onto an option?") + text += '</dt>' + text += '<dd>' + if option.opt_text: + text += re.sub(r'(NOTE|TODO)', + r'<mark>\1</mark>', + markdown(option.opt_text)) + text += '</dd>' + text += '</dl>' + + out += f"<dd>{text}</dd>" + else: + out += '<dd><em>Undocumented</em></dd>' + out += '</dl>' + + # TODO remaining tags + # "overload" + # raise + # return + # since + # _other + + return (name, out) diff --git a/muppet/output/puppet_source.py b/muppet/output/puppet_source.py new file mode 100644 index 0000000..d3b9f4d --- /dev/null +++ b/muppet/output/puppet_source.py @@ -0,0 +1,418 @@ +"""Generate output for Puppet Source code.""" + +import html +import logging +from typing import Sequence + +from muppet.parser_combinator import ( + ParserCombinator, + MatchCompound, + MatchObject, +) +from muppet.puppet.ast import build_ast +from muppet.puppet.parser import puppet_parser +from muppet.puppet.format.parser import ParserFormatter + +from .util import inner_text + + +logger = logging.getLogger(__name__) + + +_puppet_doc_base = 'https://www.puppet.com/docs/puppet/7' +_lang_facts_builtin_variables = (f'{_puppet_doc_base}/lang_facts_builtin_variables' + '#lang_facts_builtin_variables') +_server_variables = f'{_lang_facts_builtin_variables}-server-variables' +_compiler_variables = f'{_lang_facts_builtin_variables}-compiler-variables' +_trusted_facts = f'{_lang_facts_builtin_variables}-trusted-facts' +_server_facts = f'{_lang_facts_builtin_variables}-server-facts' + +_built_in_variables = { + 'facts': 'https://google.com', + # clientcert, clientversion, puppetversion, clientnoop, + # agent_specified_environment: + # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-agent-facts + 'trusted': _trusted_facts, + 'server_facts': _server_facts, + 'environment': _server_variables, + 'servername': _server_variables, + 'serverip': _server_variables, + 'serverversion': _server_variables, + 'module_name': _compiler_variables, + 'caller_module_name': _compiler_variables, + + # Also note the special variable $title and $name + # https://www.puppet.com/docs/puppet/7/lang_defined_types#lang_defined_types-title-and-name +} + + +# https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html +# https://www.puppet.com/docs/puppet/7/types/file.html +# ... +_built_in_types = { + 'package', + 'file', + 'service', + 'notify', + 'exec', + 'user', + 'group', +} + +# https://www.puppet.com/docs/puppet/7/function.html#{} +_built_in_functions = { + 'abs', + 'alert', + 'all', + 'annotate', + 'any', + 'assert_type', + 'binary_file', + 'break', + 'call', + 'camelcase', + 'capitalize', + 'ceiling', + 'chomp', + 'chop', + 'compare', + 'contain', + 'convert_to', + 'create_resources', + 'crit', + 'debug', + 'defined', + 'dig', + 'digest', + 'downcase', + 'each', + 'emerg', + 'empty', + 'epp', + 'err', + 'eyaml_lookup_key', + 'fail', + 'file', + 'filter', + 'find_file', + 'find_template', + 'flatten', + 'floor', + 'fqdn_rand', + 'generate', + 'get', + 'getvar', + 'group_by', + 'hiera', + 'hiera_array', + 'hiera_hash', + 'hiera_include', + 'hocon_data', + 'import', + 'include', + 'index', + 'info', + 'inline_epp', + 'inline_template', + 'join', + 'json_data', + 'keys', + 'length', + 'lest', + 'lookup', + 'lstrip', + 'map', + 'match', + 'max', + 'md5', + 'min', + 'module_directory', + 'new', + 'next', + 'notice', + 'partition', + 'realize', + 'reduce', + 'regsubst', + 'require', + 'return', + 'reverse_each', + 'round', + 'rstrip', + 'scanf', + 'sha1', + 'sha256', + 'shellquote', + 'size', + 'slice', + 'sort', + 'split', + 'sprintf', + 'step', + 'strftime', + 'strip', + 'tag', + 'tagged', + 'template', + 'then', + 'tree_each', + 'type', + 'unique', + 'unwrap', + 'upcase', + 'values', + 'versioncmp', + 'warning', + 'with', + 'yaml_data', +} + + +def _find_declarations(objs: list[MatchObject]) -> list[str]: + """ + Find all local variable declarations. + + Searches the code for all local variable declarations, returing a + list of variable names. + + Note that the same variable might appear multiple times, for example: + + .. code-block:: puppet + :caption: The same variable being declared twice + + if $something { + $x = 10 + } else { + $x = 20 + } + """ + declarations = [] + for obj in objs: + match obj: + case MatchCompound(type='declaration', matched=xs): + for x in xs: + match x: + case MatchCompound(type='var', matched=ys): + declarations.append(inner_text(ys)) + return declarations + + +class _PuppetReserializer: + """ + Reserializes parsed puppet code back into puppet code. + + This allows syntax highlighting, and hyperlinking to be added to the code. + + :param local_vars: + Variables declared within this file. Used when resolving + hyperlinks. + """ + + def __init__(self, local_vars: list[str]): + self.local_vars: list[str] = local_vars + + def reserialize(self, obj: MatchObject | Sequence[MatchObject]) -> str: + """ + Reconstruct puppet code after parsing it. + + After building the parser, and parsing the puppet code into a tree + of MatchObjects; this procedure returns it into puppet code. + Difference being that we now have metadata, meaning that syntax + highlighting and variable hyperlinks can be inserted. + + :param obj: + Should be assumed to be a list of MatchObject's, or something similar. + + MatchCompound objects are serialized as + + .. code-block:: html + + <span class="{type}">{body}</span> + + esrings as themselves, and lists have reserialize mapped over them. + + """ + out: list[str] = [] + # logger.info("obj = %a", obj) + + # TODO hyperlink functions. + # The problem is that a function can either be implemented in + # Puppet, or in Ruby. And Ruby functions' names aren't bound + # by the directory layout. + match obj: + case str(s): + out.append(html.escape(s)) + + case MatchCompound(type='resource-name', matched=xs): + name = inner_text(xs) + url, cls = name_to_url(name) + if url: + out.append(f'<a href="{url}" class="resource-name {cls}">{name}</a>') + else: + # TODO this is class, but the class name should + # also be hyperlinked + out.append(f'<span class="resource-name {cls}">{name}</span>') + + case MatchCompound(type='invoke', matched=xs): + function = None + for x in xs: + match x: + case MatchCompound(type='qn', matched=ys): + if function is None: + function = inner_text(ys) + if function in _built_in_functions: + # class="qn" + url = f"https://www.puppet.com/docs/puppet/7/function.html#{function}" # noqa: E501 + tag = f'<a href="{url}" class="puppet-doc">{self.reserialize(ys)}</a>' # noqa: E501 + out.append(tag) + else: + # TODO function to url + out.append(f'<span class="qn">{self.reserialize(ys)}</span>') + else: + if function == 'include': + url, cls = name_to_url(inner_text(ys)) + # class="qn" + tag = f'<a href="{url}" class="{cls}">{self.reserialize(ys)}</a>' # noqa: E501 + out.append(tag) + else: + out.append(self.reserialize(ys)) + case _: + out.append(self.reserialize(x)) + + case MatchCompound(type='declaration', matched=xs): + for x in xs: + match x: + case MatchCompound(type='var', matched=ys): + inner = ''.join(self.reserialize(y) for y in ys) + out.append(f'<span id="{inner_text(ys)}">{inner}</span>') + case _: + out.append(self.reserialize(x)) + + case MatchCompound(type='var', matched=xs): + out.append(self.var_to_url(inner_text(xs))) + + case MatchCompound(type=type, matched=xs): + body = ''.join(self.reserialize(x) for x in xs) + out.append(f'<span class="{type}">{body}</span>') + + case [*xs]: + out.extend(self.reserialize(x) for x in xs) + + case rest: + logger.error("Unknown type: %a", rest) + + return ''.join(out) + + def var_to_url(self, var: str) -> str: + """ + Format variable, adding hyperlink to its definition. + + TODO these can refer to both defined types (`manifests/*.pp`), + as well as resource types (`lib/puppet/provider/*/*.rb` / + `lib/tpuppet/type/*.rb`) + + Same goes for functions (`functions/*.pp`), + (`lib/puppet/functions.rb`). + + :param var: + Name of the variable. + + :return: + An HTML anchor element. + """ + match var.split('::'): + case [name]: + # Either a local or global variable + # https://www.puppet.com/docs/puppet/7/lang_facts_and_builtin_vars.html + + href = None + cls = '' + if name in self.local_vars: + href = f'#{html.escape(var)}' + elif name in _built_in_variables: + href = html.escape(_built_in_variables[name]) + cls = 'puppet-doc' + + if href: + return f'<a class="var {cls}" href="{href}">{var}</a>' + else: + # `name` refers to a global fact. + return f'<span class="var">{var}</span>' + + case ['', name]: + # A global variable + if name in _built_in_variables: + href = html.escape(_built_in_variables[name]) + img = '<img src="/code/muppet-strings/output/static/favicon.ico" />' + return f'<a class="var" href="{href}">{var}{img}</a>' + else: + return f'<span class="var">{var}</span>' + + # Note the "special module" 'settings', + # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-server-variables + case ['', module, *items, name]: + s = '/code/muppet-strings/output/' \ + + '/'.join([module, 'manifests', *(items if items else ['init'])]) + s += f'#{name}' + return f'<a class="var" href="{s}">{var}</a>' + case [module, *items, name]: + s = '/code/muppet-strings/output/' \ + + '/'.join([module, 'manifests', *(items if items else ['init'])]) + s += f'#{name}' + return f'<a class="var" href="{s}">{var}</a>' + case _: + raise ValueError() + + +def hyperlink_puppet_source(source: str, file: str, in_parameters: list[str]) -> str: + """ + Parse and syntax highlight the given puppet source. + + :returns: An HTML string + """ + # Run the upstream puppet parser, + # then masage the tree into a usable form. + ast = build_ast(puppet_parser(source)) + + # From the ast, build a parser combinator parser. + # This parser will attach sufficient metadata to allow syntax + # highlighting and hyperlinking + parser = ParserFormatter().serialize(ast) + + # Run the generated parser, giving us a list of match objects. + match_objects = ParserCombinator(source, file).get(parser) + + # Reserialize the matched data back into puppet code, realizing + # the syntax highlighting and hyperlinks. + return _PuppetReserializer(_find_declarations(match_objects) + (in_parameters)) \ + .reserialize(match_objects) + + +def name_to_url(name: str) -> tuple[str | None, str]: + """ + Resolve a class or resource name into an url. + + :param name: + The name of a class or resource, surch as "example::resource". + :return: + A tuple consisting of + + - One of + - An internal link to the definition of that type + - A link to the official puppet documentation + - ``None``, if `name` is "class" + - A string indicating extra HTML classes for this url. + This is mostly so external references can be marked properly. + """ + if name in _built_in_types: + return (f'https://www.puppet.com/docs/puppet/7/types/{name}.html', 'puppet-doc') + elif name == 'class': + return (None, '') + else: + # TODO special cases for puppet's built in types. + # https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html + module, *items = name.lstrip(':').split('::') + # TODO get prefix from the command line/config file + return ('/code/muppet-strings/output/' + + '/'.join([module, 'manifests', *(items if items else ['init'])]), + '') diff --git a/muppet/output/util.py b/muppet/output/util.py new file mode 100644 index 0000000..b1d69f5 --- /dev/null +++ b/muppet/output/util.py @@ -0,0 +1,44 @@ +""" +Misc utilities for the final output. + +These don't really belong to any sub-system, even though some are more +useful than other. + +The aim is to only have pure functions here. +""" + +from muppet.parser_combinator import ( + MatchCompound, + MatchObject, +) + + +def inner_text(obj: MatchObject | list[MatchObject]) -> str: + """ + Extract the text content from a set of MatchObjects. + + This is really similar to HTML's inner_text. + + Empty whitespace tags are expanded into nothing, non-empty + whitespace tags becomes a single space (note that this discards + commets). + + This only works properly if no function was mapped over the parser + return values in tree, see :func:`muppet.parser_combinator.fmap`. + + :param obj: + Match Objects to search. + """ + match obj: + case str(s): + return s + case MatchCompound(type='ws', matched=[]): + return '' + case MatchCompound(type='ws'): + return ' ' + case MatchCompound(matched=xs): + return ''.join(inner_text(x) for x in xs) + case [*xs]: + return ''.join(inner_text(x) for x in xs) + case _: + raise ValueError('How did we get here') -- GitLab