Skip to content
Snippets Groups Projects
Commit 4892081b authored by Hugo Hörnquist's avatar Hugo Hörnquist
Browse files

Start splitting output back into modules.

The difference between this split, and the old, is that everything is
now clearly under output.
parent 030b736f
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@ Subpackages
.. toctree::
:maxdepth: 4
muppet.output
muppet.puppet
muppet.syntax_highlight
......@@ -21,11 +22,9 @@ Submodules
muppet.intersperse
muppet.lookup
muppet.markdown
muppet.output
muppet.parser_combinator
muppet.symbols
muppet.tabs
muppet.templates
muppet.util
Module contents
......
This diff is collapsed.
"""
Generate output for Puppet Docstrings.
Docstrings are the functions preceeding any top level puppet
declaration (such as classes, rosource definitions, ...). These have a
number of "magic" tags for attaching metadata, along with usually
being Markdown formatted. This module assumes that they all are
Markdown formatted, which unfortunately leads to some (minor) errors.
(The final output also contains the original source, allowing these
errors to be overlooked).
"""
from dataclasses import dataclass, field
import html
import re
from typing import cast
from muppet.markdown import markdown
from muppet.puppet.strings import (
DocString,
DocStringApiTag,
DocStringAuthorTag,
DocStringExampleTag,
DocStringOptionTag,
DocStringOverloadTag,
DocStringParamTag,
DocStringRaiseTag,
DocStringReturnTag,
DocStringSeeTag,
DocStringSinceTag,
DocStringSummaryTag,
DocStringTag,
)
# TODO what even is this for?
param_doc: dict[str, str] = {}
@dataclass
class GroupedTags:
"""
All tags from a class (or similar) docstring.
Most fields are simply lists of tags. The reason for trailing
underscores on each entry is since some tag names collide with
python keywords (e.g. ``raise``).
"""
param_: list[DocStringParamTag] = field(default_factory=list) # noqa: E221
example_: list[DocStringExampleTag] = field(default_factory=list) # noqa: E221
overload_: list[DocStringOverloadTag] = field(default_factory=list) # noqa: E221
option_: dict[str, list[DocStringOptionTag]] = field(default_factory=dict) # noqa: E221
"""
Options document Hash parameters valid values.
Each key is the corresponding parameter, and the value is the list
of registered options for that hash.
"""
author_: list[DocStringAuthorTag] = field(default_factory=list) # noqa: E221
api_: list[DocStringApiTag] = field(default_factory=list) # noqa: E221
raise_: list[DocStringRaiseTag] = field(default_factory=list) # noqa: E221
return_: list[DocStringReturnTag] = field(default_factory=list) # noqa: E221
since_: list[DocStringSinceTag] = field(default_factory=list) # noqa: E221
summary_: list[DocStringSummaryTag] = field(default_factory=list) # noqa: E221
see_: list[DocStringSeeTag] = field(default_factory=list) # noqa: E221
other_: list[DocStringTag] = field(default_factory=list) # noqa: E221
"""All tags of unknown type."""
@classmethod
def from_taglist(cls, tags: list[DocStringTag]) -> 'GroupedTags':
"""Group a list of tags."""
grouped_tags = cls()
for tag in tags:
if tag.tag_name == 'option':
tag = cast(DocStringOptionTag, tag)
grouped_tags.option_.setdefault(tag.parent, []).append(tag)
elif tag.tag_name in {'param', 'example', 'overload', 'author', 'api',
'raise', 'return', 'since', 'summary', 'see'}:
getattr(grouped_tags, tag.tag_name + '_').append(tag)
else:
grouped_tags.other_.append(tag)
return grouped_tags
def parse_author(author: str) -> str:
"""
Format author tags' content.
:param author:
The contents of the author tag. If the string is on the
regular "author" format of ``"Firstname Lastname
<first.last@example.com>"`` then the email will be formatted
and hyperlinked. Otherwise the string is returned verbatim.
:return:
An HTML safe string, possibly including tags.
"""
m = re.match(r'(?P<author>.*) (<(?P<email>.*)>)|(?P<any>.*)', author)
assert m, "The above regex can't fail"
if m['author'] and m['email']:
author = html.escape(m['author'])
email = html.escape(m['email'])
return f'{author} <a class="email" href="mailto:{email}">&lt;{email}&gt</a>;'
else:
return html.escape(m['any'])
def format_docstring(name: str, docstring: DocString) -> tuple[str, str]:
"""
Format docstrings as they appear in some puppet types.
Those types being:
* puppet_classes,
* puppet_type_aliases, and
* defined_types
"""
global param_doc
# The api tag is ignored, since it instead is shown from context
out = ''
param_doc = {tag.name: tag.text or ''
for tag in docstring.tags
if isinstance(tag, DocStringParamTag)}
grouped_tags = GroupedTags.from_taglist(docstring.tags)
# --------------------------------------------------
out += '<a href="#code">Jump to Code</a><br/>'
if tags := grouped_tags.summary_:
out += '<em class="summary">'
for tag in tags:
out += html.escape(tag.text)
out += '</em>'
out += '<div class="description">'
# TODO "TODO" highlighting
out += markdown(docstring.text)
out += '</div>'
# TODO proper handling of multiple @see tags
if sees := grouped_tags.see_:
out += '<b>See</b> '
for see in sees:
link: str
m = re.match(r'((?P<url>https?://.*)|(?P<man>.*\([0-9]\))|(?P<other>.*))', see.name)
assert m, "Regex always matched"
if m['url']:
link = f'<a href="{see.name}">{see.name}</a>'
out += link
elif m['man']:
page = see.name[:-3]
section = see.name[-2]
# TODO man providers
link = f"https://manned.org/man/{page}.{section}"
out += link
else:
if '::' in m['other']:
# TODO
pass
else:
# TODO
# link = see
pass
out += m['other']
out += ' ' + see.text
if authors := grouped_tags.author_:
out += '<div class="author">'
out += "<em>Written by </em>"
if len(authors) == 1:
out += parse_author(authors[0].text)
else:
out += '<ul>'
for author in authors:
out += f'<li>{parse_author(author.text)}</li>'
out += '</ul>'
out += '</div>'
out += '<hr/>'
t: DocStringTag
for t in grouped_tags .example_:
out += '<div class="code-example">'
if name := t.name:
# TODO markup for title
out += f'<div class="code-example-header">{html.escape(name)}</div>\n'
# TODO highlight?
# Problem is that we don't know what language the example
# is in. Pygemntize however seems to do a reasonable job
# treating anything as puppet code
text = html.escape(t.text)
out += f'<pre><code class="puppet">{text}</code></pre>\n'
out += '</div>'
out += '<hr/>'
out += '<dl>'
for t in grouped_tags.param_:
name = html.escape(t.name)
out += f'<dt><span id="{name}" class="variable">{name}</span>'
match t.types:
case [x]:
# TODO highlight type?
out += f': <code>{html.escape(x)}</code>'
case [_, *_]:
raise ValueError("How did you get multiple types onto a parameter?")
# TODO Fetch default values from puppet strings output
# Then in javascript query Hiera to get the true "default"
# values for a given machine (somewhere have a setting for
# selecting machine).
out += '</dt>'
if text := t.text:
text = re.sub(r'(NOTE|TODO)',
r'<mark>\1</mark>',
markdown(text))
if options := grouped_tags.option_.get(t.name):
text += '<dl>'
for option in options:
text += '<dt>'
text += html.escape(option.opt_name)
match option.opt_types:
case [x]:
text += f' [<code>{html.escape(x)}</code>]'
case [_, *_]:
raise ValueError("How did you get multiple types onto an option?")
text += '</dt>'
text += '<dd>'
if option.opt_text:
text += re.sub(r'(NOTE|TODO)',
r'<mark>\1</mark>',
markdown(option.opt_text))
text += '</dd>'
text += '</dl>'
out += f"<dd>{text}</dd>"
else:
out += '<dd><em>Undocumented</em></dd>'
out += '</dl>'
# TODO remaining tags
# "overload"
# raise
# return
# since
# _other
return (name, out)
"""Generate output for Puppet Source code."""
import html
import logging
from typing import Sequence
from muppet.parser_combinator import (
ParserCombinator,
MatchCompound,
MatchObject,
)
from muppet.puppet.ast import build_ast
from muppet.puppet.parser import puppet_parser
from muppet.puppet.format.parser import ParserFormatter
from .util import inner_text
logger = logging.getLogger(__name__)
_puppet_doc_base = 'https://www.puppet.com/docs/puppet/7'
_lang_facts_builtin_variables = (f'{_puppet_doc_base}/lang_facts_builtin_variables'
'#lang_facts_builtin_variables')
_server_variables = f'{_lang_facts_builtin_variables}-server-variables'
_compiler_variables = f'{_lang_facts_builtin_variables}-compiler-variables'
_trusted_facts = f'{_lang_facts_builtin_variables}-trusted-facts'
_server_facts = f'{_lang_facts_builtin_variables}-server-facts'
_built_in_variables = {
'facts': 'https://google.com',
# clientcert, clientversion, puppetversion, clientnoop,
# agent_specified_environment:
# https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-agent-facts
'trusted': _trusted_facts,
'server_facts': _server_facts,
'environment': _server_variables,
'servername': _server_variables,
'serverip': _server_variables,
'serverversion': _server_variables,
'module_name': _compiler_variables,
'caller_module_name': _compiler_variables,
# Also note the special variable $title and $name
# https://www.puppet.com/docs/puppet/7/lang_defined_types#lang_defined_types-title-and-name
}
# https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html
# https://www.puppet.com/docs/puppet/7/types/file.html
# ...
_built_in_types = {
'package',
'file',
'service',
'notify',
'exec',
'user',
'group',
}
# https://www.puppet.com/docs/puppet/7/function.html#{}
_built_in_functions = {
'abs',
'alert',
'all',
'annotate',
'any',
'assert_type',
'binary_file',
'break',
'call',
'camelcase',
'capitalize',
'ceiling',
'chomp',
'chop',
'compare',
'contain',
'convert_to',
'create_resources',
'crit',
'debug',
'defined',
'dig',
'digest',
'downcase',
'each',
'emerg',
'empty',
'epp',
'err',
'eyaml_lookup_key',
'fail',
'file',
'filter',
'find_file',
'find_template',
'flatten',
'floor',
'fqdn_rand',
'generate',
'get',
'getvar',
'group_by',
'hiera',
'hiera_array',
'hiera_hash',
'hiera_include',
'hocon_data',
'import',
'include',
'index',
'info',
'inline_epp',
'inline_template',
'join',
'json_data',
'keys',
'length',
'lest',
'lookup',
'lstrip',
'map',
'match',
'max',
'md5',
'min',
'module_directory',
'new',
'next',
'notice',
'partition',
'realize',
'reduce',
'regsubst',
'require',
'return',
'reverse_each',
'round',
'rstrip',
'scanf',
'sha1',
'sha256',
'shellquote',
'size',
'slice',
'sort',
'split',
'sprintf',
'step',
'strftime',
'strip',
'tag',
'tagged',
'template',
'then',
'tree_each',
'type',
'unique',
'unwrap',
'upcase',
'values',
'versioncmp',
'warning',
'with',
'yaml_data',
}
def _find_declarations(objs: list[MatchObject]) -> list[str]:
"""
Find all local variable declarations.
Searches the code for all local variable declarations, returing a
list of variable names.
Note that the same variable might appear multiple times, for example:
.. code-block:: puppet
:caption: The same variable being declared twice
if $something {
$x = 10
} else {
$x = 20
}
"""
declarations = []
for obj in objs:
match obj:
case MatchCompound(type='declaration', matched=xs):
for x in xs:
match x:
case MatchCompound(type='var', matched=ys):
declarations.append(inner_text(ys))
return declarations
class _PuppetReserializer:
"""
Reserializes parsed puppet code back into puppet code.
This allows syntax highlighting, and hyperlinking to be added to the code.
:param local_vars:
Variables declared within this file. Used when resolving
hyperlinks.
"""
def __init__(self, local_vars: list[str]):
self.local_vars: list[str] = local_vars
def reserialize(self, obj: MatchObject | Sequence[MatchObject]) -> str:
"""
Reconstruct puppet code after parsing it.
After building the parser, and parsing the puppet code into a tree
of MatchObjects; this procedure returns it into puppet code.
Difference being that we now have metadata, meaning that syntax
highlighting and variable hyperlinks can be inserted.
:param obj:
Should be assumed to be a list of MatchObject's, or something similar.
MatchCompound objects are serialized as
.. code-block:: html
<span class="{type}">{body}</span>
esrings as themselves, and lists have reserialize mapped over them.
"""
out: list[str] = []
# logger.info("obj = %a", obj)
# TODO hyperlink functions.
# The problem is that a function can either be implemented in
# Puppet, or in Ruby. And Ruby functions' names aren't bound
# by the directory layout.
match obj:
case str(s):
out.append(html.escape(s))
case MatchCompound(type='resource-name', matched=xs):
name = inner_text(xs)
url, cls = name_to_url(name)
if url:
out.append(f'<a href="{url}" class="resource-name {cls}">{name}</a>')
else:
# TODO this is class, but the class name should
# also be hyperlinked
out.append(f'<span class="resource-name {cls}">{name}</span>')
case MatchCompound(type='invoke', matched=xs):
function = None
for x in xs:
match x:
case MatchCompound(type='qn', matched=ys):
if function is None:
function = inner_text(ys)
if function in _built_in_functions:
# class="qn"
url = f"https://www.puppet.com/docs/puppet/7/function.html#{function}" # noqa: E501
tag = f'<a href="{url}" class="puppet-doc">{self.reserialize(ys)}</a>' # noqa: E501
out.append(tag)
else:
# TODO function to url
out.append(f'<span class="qn">{self.reserialize(ys)}</span>')
else:
if function == 'include':
url, cls = name_to_url(inner_text(ys))
# class="qn"
tag = f'<a href="{url}" class="{cls}">{self.reserialize(ys)}</a>' # noqa: E501
out.append(tag)
else:
out.append(self.reserialize(ys))
case _:
out.append(self.reserialize(x))
case MatchCompound(type='declaration', matched=xs):
for x in xs:
match x:
case MatchCompound(type='var', matched=ys):
inner = ''.join(self.reserialize(y) for y in ys)
out.append(f'<span id="{inner_text(ys)}">{inner}</span>')
case _:
out.append(self.reserialize(x))
case MatchCompound(type='var', matched=xs):
out.append(self.var_to_url(inner_text(xs)))
case MatchCompound(type=type, matched=xs):
body = ''.join(self.reserialize(x) for x in xs)
out.append(f'<span class="{type}">{body}</span>')
case [*xs]:
out.extend(self.reserialize(x) for x in xs)
case rest:
logger.error("Unknown type: %a", rest)
return ''.join(out)
def var_to_url(self, var: str) -> str:
"""
Format variable, adding hyperlink to its definition.
TODO these can refer to both defined types (`manifests/*.pp`),
as well as resource types (`lib/puppet/provider/*/*.rb` /
`lib/tpuppet/type/*.rb`)
Same goes for functions (`functions/*.pp`),
(`lib/puppet/functions.rb`).
:param var:
Name of the variable.
:return:
An HTML anchor element.
"""
match var.split('::'):
case [name]:
# Either a local or global variable
# https://www.puppet.com/docs/puppet/7/lang_facts_and_builtin_vars.html
href = None
cls = ''
if name in self.local_vars:
href = f'#{html.escape(var)}'
elif name in _built_in_variables:
href = html.escape(_built_in_variables[name])
cls = 'puppet-doc'
if href:
return f'<a class="var {cls}" href="{href}">{var}</a>'
else:
# `name` refers to a global fact.
return f'<span class="var">{var}</span>'
case ['', name]:
# A global variable
if name in _built_in_variables:
href = html.escape(_built_in_variables[name])
img = '<img src="/code/muppet-strings/output/static/favicon.ico" />'
return f'<a class="var" href="{href}">{var}{img}</a>'
else:
return f'<span class="var">{var}</span>'
# Note the "special module" 'settings',
# https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-server-variables
case ['', module, *items, name]:
s = '/code/muppet-strings/output/' \
+ '/'.join([module, 'manifests', *(items if items else ['init'])])
s += f'#{name}'
return f'<a class="var" href="{s}">{var}</a>'
case [module, *items, name]:
s = '/code/muppet-strings/output/' \
+ '/'.join([module, 'manifests', *(items if items else ['init'])])
s += f'#{name}'
return f'<a class="var" href="{s}">{var}</a>'
case _:
raise ValueError()
def hyperlink_puppet_source(source: str, file: str, in_parameters: list[str]) -> str:
"""
Parse and syntax highlight the given puppet source.
:returns: An HTML string
"""
# Run the upstream puppet parser,
# then masage the tree into a usable form.
ast = build_ast(puppet_parser(source))
# From the ast, build a parser combinator parser.
# This parser will attach sufficient metadata to allow syntax
# highlighting and hyperlinking
parser = ParserFormatter().serialize(ast)
# Run the generated parser, giving us a list of match objects.
match_objects = ParserCombinator(source, file).get(parser)
# Reserialize the matched data back into puppet code, realizing
# the syntax highlighting and hyperlinks.
return _PuppetReserializer(_find_declarations(match_objects) + (in_parameters)) \
.reserialize(match_objects)
def name_to_url(name: str) -> tuple[str | None, str]:
"""
Resolve a class or resource name into an url.
:param name:
The name of a class or resource, surch as "example::resource".
:return:
A tuple consisting of
- One of
- An internal link to the definition of that type
- A link to the official puppet documentation
- ``None``, if `name` is "class"
- A string indicating extra HTML classes for this url.
This is mostly so external references can be marked properly.
"""
if name in _built_in_types:
return (f'https://www.puppet.com/docs/puppet/7/types/{name}.html', 'puppet-doc')
elif name == 'class':
return (None, '')
else:
# TODO special cases for puppet's built in types.
# https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html
module, *items = name.lstrip(':').split('::')
# TODO get prefix from the command line/config file
return ('/code/muppet-strings/output/'
+ '/'.join([module, 'manifests', *(items if items else ['init'])]),
'')
"""
Misc utilities for the final output.
These don't really belong to any sub-system, even though some are more
useful than other.
The aim is to only have pure functions here.
"""
from muppet.parser_combinator import (
MatchCompound,
MatchObject,
)
def inner_text(obj: MatchObject | list[MatchObject]) -> str:
"""
Extract the text content from a set of MatchObjects.
This is really similar to HTML's inner_text.
Empty whitespace tags are expanded into nothing, non-empty
whitespace tags becomes a single space (note that this discards
commets).
This only works properly if no function was mapped over the parser
return values in tree, see :func:`muppet.parser_combinator.fmap`.
:param obj:
Match Objects to search.
"""
match obj:
case str(s):
return s
case MatchCompound(type='ws', matched=[]):
return ''
case MatchCompound(type='ws'):
return ' '
case MatchCompound(matched=xs):
return ''.join(inner_text(x) for x in xs)
case [*xs]:
return ''.join(inner_text(x) for x in xs)
case _:
raise ValueError('How did we get here')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment