From 030b736ffaf1766fafbae2505a16bfae65bc3d14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se> Date: Sun, 24 Sep 2023 19:47:15 +0200 Subject: [PATCH] Merge a bunch of files into one. The files - format - gather - templates - output have now all been merged into `output`. The difference between `format` and `output` was allways unclear. `gather` could be separate, but it was still so tightly linked to the output. `templates` could have been kept, but it started depending on types from `output`, making it easier to merge (at least of now). --- Makefile | 2 +- doc/muppet.rst | 2 - muppet/__main__.py | 50 +- muppet/cache.py | 132 +- muppet/format.py | 349 ---- muppet/gather.py | 141 -- muppet/output.py | 1734 +++++++++++++---- muppet/puppet/format/parser.py | 79 +- muppet/puppet/strings/__init__.py | 72 +- muppet/templates.py | 83 - muppet/util.py | 18 + static-src/style.scss | 39 +- templates/base.html | 3 +- templates/code_page.html | 1 - templates/module_index.html | 9 +- .../snippets/ResourceType-index-entry.html | 7 +- 16 files changed, 1718 insertions(+), 1003 deletions(-) delete mode 100644 muppet/format.py delete mode 100644 muppet/gather.py delete mode 100644 muppet/templates.py diff --git a/Makefile b/Makefile index 372fc39..6cb8138 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ all: output DOC_OUTPUT = doc.rendered OUTPUT_FLAGS = --path-base /code/muppet-strings/output \ - --env ~/puppet/generated-environments/production/modules/ + --env ~/puppet/generated-environments/production/ SCSS = scss diff --git a/doc/muppet.rst b/doc/muppet.rst index 326920e..5a724a0 100644 --- a/doc/muppet.rst +++ b/doc/muppet.rst @@ -18,8 +18,6 @@ Submodules muppet.breadcrumbs muppet.cache - muppet.format - muppet.gather muppet.intersperse muppet.lookup muppet.markdown diff --git a/muppet/__main__.py b/muppet/__main__.py index db069cd..a63df52 100644 --- a/muppet/__main__.py +++ b/muppet/__main__.py @@ -5,10 +5,12 @@ import pathlib import os import logging import colorlog +import subprocess +import shutil from .cache import Cache -from .gather import get_module, get_modules, ModuleEntry -from .output import setup_index, setup_module +# from .gather import get_module, get_modules, ModuleEntry +from .output import PuppetEnvironment # Our parser combinator system builds heavily on recursing, # overflowing Pythons' stack. Simply cranking up the stack size solves @@ -45,7 +47,8 @@ def __main() -> None: ''') # If deploying to http://example.com/~user/muppet then this should # be set to `~/user/muppet` - parser.add_argument('--path-base', action='store', default='', help=''' + parser.add_argument('--path-base', action='store', metavar='path_base', + default='', help=''' Prefix to web path the pages will be displayed under. ''') parser.add_argument('modules', nargs='*', type=pathlib.Path, help=''' @@ -55,26 +58,31 @@ def __main() -> None: args = parser.parse_args() - env = args.env - cache = Cache('/home/hugo/.cache/muppet-strings') - modules: list[ModuleEntry] - if args.modules != []: - modules = [get_module(cache, mod) - for mod in args.modules] - else: - modules = get_modules(cache, env) - - setup_index('output', modules, path_base=args.path_base) - - for module in modules: - # print(module) - logger.info('Handling %s', module.name) - setup_module('output', module, path_base=args.path_base) - - os.system('make -C static-src --silent install-full PREFIX=$PWD/output') - os.system("cp -r static/* output/static/") + output_directory = 'output' + + # modules: list[ModuleEntry] + # if args.modules != []: + # modules = [get_module(cache, mod) + # for mod in args.modules] + # else: + # modules = get_modules(cache, env) + + env = PuppetEnvironment(source_path=args.env, + output_prefix=args.path_base, + cache=cache) + env.output(output_directory) + + # TODO do this properly + subprocess.run(['make', + '-C', 'static-src', + '--silent', + 'install-full', + f'PREFIX={os.path.join(os.getcwd(), output_directory)}'], + check=True) + shutil.copytree('static', os.path.join(output_directory, 'static'), + dirs_exist_ok=True) if __name__ == '__main__': diff --git a/muppet/cache.py b/muppet/cache.py index a9279d1..dccc1b9 100644 --- a/muppet/cache.py +++ b/muppet/cache.py @@ -3,13 +3,109 @@ from typing import ( Callable, Optional, + final, ) import os.path import pathlib import hashlib -class Cache: +class AbstractCache: + """ + Abstract base class for caches. + + Caches allows storing data under arbitrary keys, and later look up + their values. + + These procedures use strings for the keys, since they are easy to + manipulate in python. Individual implementations might however + want to encode them into UTF-8 (or something else). + + Values are bytes, since anything should be storable. + """ + + def __init__(self) -> None: + raise NotImplementedError() + + def put(self, key: str, value: bytes) -> None: + """ + Store data into the cache. + + Multiple writes to the same key are assumed to overwrite each + other. + + :param key: + Name to associate with data + :param value: + Actuall data. + """ + raise NotImplementedError() + + def get(self, key: str) -> Optional[bytes]: + """ + Retrieve data from the cache. + + :param key: + Name which was (hopefully) previously associated with some + data. + + :return: + The previously stored data, if available, otherwise + nothing. + """ + raise NotImplementedError() + + @final + def memoize_function( + self, + prefix: str, + func: Callable[[bytes], bytes]) -> Callable[[bytes], bytes]: + """ + Return a memoized version of the given function. + + A new function is created, which will first check the cache + for the same input, and either return that, or it will run the + original function. + + .. code-block:: python + :caption: How the cache key will be generated. + + key = prefix + hashlib.sha1(data).hexdigest() + + :param prefix: + String to prefix cache keys with. Should be unique among all memoized functions + + :param func: + The function to memoize. + """ + def inner(data: bytes) -> bytes: + key = prefix + hashlib.sha1(data).hexdigest() + if value := self.get(key): + return value + else: + value = func(data) + self.put(key, value) + return value + return inner + + @final + def memoize(self, prefix: str) -> Callable[[Callable[[bytes], bytes]], + Callable[[bytes], bytes]]: + """ + :func:`memoize_function`, but as a decorator. + + .. code-block:: python + :caption: Example usage + + @memoize('my_expansive_function') + def my_expansive_function(in: bytes) -> bytes: + ... + """ + return lambda func: self.memoize_function(prefix, func) + + +# TODO rename this to fs-cache +class Cache(AbstractCache): """ A simple cache. @@ -35,21 +131,21 @@ class Cache: except FileNotFoundError: return None - def memoize_function(self, - prefix: str, - func: Callable[[bytes], bytes]) -> Callable[[bytes], bytes]: - """Return a new function identical to the one given, but memoized.""" - def inner(data: bytes) -> bytes: - key = prefix + hashlib.sha1(data).hexdigest() - if value := self.get(key): - return value - else: - value = func(data) - self.put(key, value) - return value - return inner - def memoize(self, prefix: str) -> Callable[[Callable[[bytes], bytes]], - Callable[[bytes], bytes]]: - """memoize_function, but as a decorator.""" - return lambda func: self.memoize_function(prefix, func) +class UnCache(AbstractCache): + """ + A non-caching cache. + + Stores are no-ops, and lookups always fail. + + This is mostly useful for testing. + """ + + def __init__(self) -> None: + pass + + def put(self, key: str, value: bytes) -> None: # noqa: D102 + return None + + def get(self, key: str) -> Optional[bytes]: # noqa: D102 + return None diff --git a/muppet/format.py b/muppet/format.py deleted file mode 100644 index b55d397..0000000 --- a/muppet/format.py +++ /dev/null @@ -1,349 +0,0 @@ -""" -Pretty print a complete puppet documentation. - -An ``output.json``, as per produced by ``./merge-json.py`` should be -provided as the first element. This program goes through every -definition in it, and outputs a complete index.html. -""" - -from .markdown import markdown -import html -import re -from typing import ( - Tuple, - Any, -) -import types - -from .puppet.parser import puppet_parser -import logging - -from .puppet.strings import ( - DataTypeAlias, - DefinedType, - DocString, - Function, - PuppetClass, - ResourceType, - DocStringParamTag, - DocStringExampleTag, -) -from muppet.puppet.ast import build_ast -# from muppet.puppet.format import to_string -from muppet.parser_combinator import ( - ParserCombinator, - ParseError, - MatchCompound, -) -from muppet.puppet.format.parser import ParserFormatter - - -logger = logging.getLogger(__name__) - -# parse_puppet = puppet_parser - -param_doc: dict[str, str] = {} - - -def reserialize(obj: Any) -> str: - """ - Reconstruct puppet code after parsing it. - - After building the parser, and parsing the puppet code into a tree - of MatchObjects; this procedure returns it into puppet code. - Difference being that we now have metadata, meaning that syntax - highlighting and variable hyperlinks can be inserted. - - :param obj: - Should be assumed to be a list of MatchObject's, or something similar. - - MatchCompound objects are serialized as - - .. code-block:: html - - <span class="{type}">{body}</span> - - strings as themselves, and lists have reserialize mapped over them. - - """ - out: list[str] = [] - # logger.info("obj = %a", obj) - match obj: - case str(s): - out += [s] - case MatchCompound(type=type, matched=xs): - # logger.warning("xs = %a", xs) - body = ''.join(reserialize(x) for x in xs) - out += [f'<span class="{type}">{body}</span>'] - case [*xs]: - out += [reserialize(x for x in xs)] - case rest: - if isinstance(rest, types.GeneratorType): - out += [reserialize(x) for x in rest] - else: - logger.error("Unknown type: %a", rest) - - return ''.join(out) - - -def parse_puppet(source: str, file: str) -> str: - """ - Parse and syntax highlight the given puppet source. - - :returns: An HTML string - """ - # logger.debug("source: %a", source) - # Run the upstream puppet parser, - # then masage the tree into a usable form. - ast = build_ast(puppet_parser(source)) - # logger.info("ast: %a", ast) - # From the ast, build a parser combinator parser. - parser = ParserFormatter().serialize(ast) - # logger.warning("parser: %a", parser) - # Run the generatefd parser, giving us a list of match objects - match_objects = ParserCombinator(source, file).get(parser) - # logger.error("match_objects: %a", match_objects) - return reserialize(match_objects) - -# -------------------------------------------------- - - -def format_docstring(name: str, docstring: DocString) -> Tuple[str, str]: - """ - Format docstrings as they appear in some puppet types. - - Those types being: - - * puppet_classes, - * puppet_type_aliases, and - * defined_types - """ - global param_doc - - out = '' - - param_doc = {tag.name: tag.text or '' - for tag in docstring.tags - if isinstance(tag, DocStringParamTag)} - tags = docstring.tags - - # print(param_doc, file=sys.stderr) - - # param_defaults = d_type['defaults'] - - for t in tags: - text = html.escape(t.text) - if t.tag_name == 'summary': - out += '<em class="summary">' - out += text - out += '</em>' - - for t in tags: - text = html.escape(t.text) - if isinstance(t, DocStringExampleTag): - if name := t.name: - out += f'<h3>{name}</h3>\n' - # TODO highlight? - out += f'<pre class="example"><code class="puppet">{text}</code></pre>\n' - - # out += '<dl>' - # for t in tags: - # if t['tag_name'] == 'param': - # out += f"<dt>{t['name']}</dt>" - # if text := t.get('text'): - # text = re.sub(r'(NOTE|TODO)', - # r'<mark>\1</mark>', - # markdown(text)) - # out += f"<dd>{text}</dd>" - # out += '</dl>' - - out += '<div>' - out += markdown(docstring.text) - out += '</div>' - - return (name, out) - - -# TODO @option tags -def build_param_dict(docstring: DocString) -> dict[str, str]: - """ - Extract all parameter documentation from a docstring dict. - - :param docstring: - The object present under 'docstring' in the information about - a single object (class, resource, ...) in the output of - `puppet strings`. - - :returns: - A dictionary where the keys are the variables which have - documentation, and the value is the (formatted) documentation - for that key. Undocumented keys (even those with the tag, but - no text) are ommitted from the resulting dictionary. - """ - obj = {} - for t in docstring.tags: - if isinstance(t, DocStringParamTag): - obj[t.name] = re.sub(r'(NOTE|TODO)', - r'<mark>\1</mark>', - markdown(t.text)) - return obj - - -def format_class(d_type: DefinedType | PuppetClass) -> Tuple[str, str]: - """Format Puppet class.""" - out = '' - logger.info("Formatting class %s", d_type.name) - # print(name, file=sys.stderr) - name, body = format_docstring(d_type.name, d_type.docstring) - out += body - - # ------ Old --------------------------------------- - # t = parse_puppet(d_type.source) - # data = parse(t, 0, ['root']) - # renderer = HTMLRenderer(build_param_dict(d_type.docstring)) - # out += render(renderer, data) - # ------ New --------------------------------------- - try: - result = parse_puppet(d_type.source, d_type.name) - out += '<pre class="highlight-muppet"><code class="puppet">' - out += result - out += '</code></pre>' - except ParseError as e: - logger.error("Parsing %(name)s failed: %(err)s", - {'name': d_type.name, 'err': e}) - out += f'<div class="error">{html.escape(str(e))}</div>' - out += '<pre><code class="puppet">' - if e.pos: - out += d_type.source[:e.pos] - out += '<span class="error">' - out += d_type.source[e.pos] - out += '</span>' - out += d_type.source[e.pos+1:] - else: - out += d_type.source - out += '</code></pre>' - return name, out - - -def format_type() -> str: - """Format Puppet type.""" - return 'TODO format_type not implemented' - - -def format_type_alias(d_type: DataTypeAlias, file: str) -> Tuple[str, str]: - """Format Puppet type alias.""" - out = '' - name = d_type.name - logger.info("Formatting type alias %s", name) - # print(name, file=sys.stderr) - title, body = format_docstring(name, d_type.docstring) - out += body - out += '\n' - out += '<pre class="highlight-muppet"><code class="puppet">' - try: - out += parse_puppet(d_type.alias_of, file) - except ParseError as e: - logger.error("Parsing %(name)s failed: %(err)s", - {'name': d_type.alias_of, 'err': e}) - out += '</code></pre>\n' - return title, out - - -def format_defined_type(d_type: DefinedType, file: str) -> Tuple[str, str]: - """Format Puppet defined type.""" - # renderer = HTMLRenderer(build_param_dict(d_type.docstring)) - out = '' - name = d_type.name - logger.info("Formatting defined type %s", name) - # print(name, file=sys.stderr) - title, body = format_docstring(name, d_type.docstring) - out += body - - out += '<pre class="highlight-muppet"><code class="puppet">' - try: - out += parse_puppet(d_type.source, file) - except ParseError as e: - logger.error("Parsing %(name)s failed: %(err)s", - {'name': d_type.source, 'err': e}) - out += '</code></pre>\n' - return title, out - - -def format_resource_type(r_type: ResourceType) -> str: - """Format Puppet resource type.""" - name = r_type.name - logger.info("Formatting resource type %s", name) - out = '' - out += f'<h2>{name}</h2>\n' - out += str(r_type.docstring) - - out += '<h3>Properties</h3>\n' - if props := r_type.properties: - out += '<ul>\n' - for property in props: - out += f'<li>{property.name}</li>\n' - # description, values, default - out += '</ul>\n' - else: - out += '<em>No providers</em>' - - out += '<h3>Parameters</h3>\n' - out += '<ul>\n' - for parameter in r_type.parameters: - out += f'<li>{parameter.name}</li>\n' - # description - # Optional[isnamevar] - out += '</ul>\n' - - out += '<h3>Providers</h3>\n' - if providers := r_type.providers: - for provider in providers: - out += f'<h4>{provider.name}</h4>\n' - # TODO - else: - print('<em>No providers</em>') - - return out - - -def format_puppet_function(function: Function, file: str) -> str: - """Format Puppet function.""" - out = '' - name = function.name - logger.info("Formatting puppet function %s", name) - out += f'<h2>{name}</h2>\n' - t = function.type - # docstring = function.docstring - for signature in function.signatures: - signature.signature - signature.docstring - if t in ['ruby3x', 'ruby4x']: - # TODO syntax highlighting - s = '<pre class="highlight-muppet"><code class="ruby">' - s += function.source - s += '</code></pre>\n' - out += s - elif t == 'puppet': - out += '<pre class="highlight-muppet"><code class="puppet">' - try: - out += parse_puppet(function.source, file) - except ParseError as e: - logger.error("Parsing %(name)s failed: %(err)s", - {'name': function.source, 'err': e}) - - out += '</code></pre>\n' - else: - # TODO do something - pass - - return out - - -def format_puppet_task() -> str: - """Format Puppet task.""" - return 'TODO format_puppet_task not implemented' - - -def format_puppet_plan() -> str: - """Format Puppet plan.""" - return 'TODO format_puppet_plan not implemented' diff --git a/muppet/gather.py b/muppet/gather.py deleted file mode 100644 index f0a43d9..0000000 --- a/muppet/gather.py +++ /dev/null @@ -1,141 +0,0 @@ -""" -Methods for gathering data. - -Gathers information about all puppet modules, including which are -present in our environment, their metadata, and their output of -``puppet strings``. -""" - -from dataclasses import dataclass -from typing import ( - Any, - Optional, -) -import json -import os.path -import hashlib -from glob import glob -from .puppet.strings import puppet_strings, PuppetStrings -from .cache import Cache -import logging - - -logger = logging.getLogger(__name__) - - -@dataclass -class ModuleEntry: - """ - One entry in a module. - - :param name: - Local name of the module, should always be the basename of path - :param path: - Absolute path in the filesystem where the module can be found. - :param strings_output: - Output of ``puppet strings``. - :param metadata: - Parsed contents of the modules ``metadata.json``. - :param doc_files: - List of absolute paths to documentation files provided by the - upstream module. - """ - - name: str - path: str - strings_output: Optional[PuppetStrings] - metadata: dict[str, Any] - doc_files: list[str] - - def file(self, path: str) -> str: - """Return the absolute path of a path inside the module.""" - return os.path.join(self.path, path) - - -def get_puppet_strings(cache: Cache, path: str) -> Optional[PuppetStrings]: - """ - Run puppet string, but check cache first. - - The cache uses the contents of metadata.json as its key, - so any updates without an updated metadata.json wont't be - detected. - - Hashing the entire contents of the module was tested, but was to - slow. - """ - try: - with open(os.path.join(path, 'metadata.json'), 'rb') as f: - data = f.read() - key = 'puppet-strings' + hashlib.sha1(data).hexdigest() - if parsed := cache.get(key): - result = parsed - else: - result = puppet_strings(path) - cache.put(key, result) - logger.info('Deserializing %s', path) - return PuppetStrings.from_json(json.loads(result)) - except FileNotFoundError: - # TODO actually run puppet strings again. - # This is just since without a metadata.json we always get a - # cache miss, which is slow. - logger.info("Running 'puppet-strings %s'", - os.path.basename(path)) - result = puppet_strings(path) - return PuppetStrings.from_json(json.loads(result)) - return None - - # try: - # with open(module.file('.git/FETCH_HEAD')) as f: - # st = os.stat(f.fileno()) - # st.st_mtime - # except FileNotFoundError: - # pass - - -def get_module(cache: Cache, - path: str) -> ModuleEntry: - """ - Return the metadata of a given module. - - :param cache: - Cache objcet for modules, see python module configuration. - - :param path: - Path of the given module. - """ - name = os.path.basename(path) - strings_data = get_puppet_strings(cache, path) - - try: - with open(os.path.join(path, 'metadata.json')) as f: - metadata = json.load(f) - except FileNotFoundError: - metadata = {} - - doc_files = glob(os.path.join(os.path.abspath(path), '*.md')) \ - + glob(os.path.join(os.path.abspath(path), 'LICENSE')) - - return ModuleEntry(name=name, - path=path, - strings_output=strings_data, - metadata=metadata, - doc_files=doc_files) - - -def get_modules(cache: Cache, dir: str) -> list[ModuleEntry]: - """ - Return all modules present in a given directory. - - The directory should be the modules subdirectory of an environment, - e.g. /etc/puppetlabs/code/environments/production/modules. - """ - modules: list[ModuleEntry] = [] - - for entry in sorted(list(os.scandir(dir)), key=lambda d: d.name): - # TODO Logging - # print('- entry', entry, file=sys.stderr) - path = os.path.join(dir, entry) - - modules.append(get_module(cache, path)) - - return modules diff --git a/muppet/output.py b/muppet/output.py index 08239c9..2d3440a 100644 --- a/muppet/output.py +++ b/muppet/output.py @@ -1,65 +1,162 @@ """ -Functions for actually generating output. +Generate all output files. -Both generates output strings, and writes them to disk. +The primary entry point of this module is the class +:class:`PuppetEnvironment`. """ - -import os +from dataclasses import dataclass, field +import html +import logging import os.path import pathlib -# import json -import html -from .gather import ModuleEntry -from jinja2 import ( - Environment, - FileSystemLoader, -) -# from .lookup import lookup, Ref -from .markdown import markdown -from .format import ( - format_class, - format_type_alias, -) +import re +import json from typing import ( + Any, Optional, Protocol, + Sequence, + cast, ) -from .util import group_by -from .puppet.strings import ( - isprivate, - PuppetStrings, - ResourceType, - DefinedType, +from jinja2 import ( + Environment, + FileSystemLoader, +) + +from muppet.syntax_highlight import highlight +from muppet.puppet.strings import ( DataTypeAlias, + DefinedType, + DocString, + DocStringApiTag, + DocStringAuthorTag, + DocStringExampleTag, + DocStringOptionTag, + DocStringOverloadTag, + DocStringParamTag, + DocStringRaiseTag, + DocStringReturnTag, + DocStringSeeTag, + DocStringSinceTag, + DocStringSummaryTag, + DocStringTag, + Function, PuppetClass, + PuppetStrings, + ResourceType, + isprivate, + puppet_strings_cached, ) -import logging - -from .breadcrumbs import breadcrumbs -from .syntax_highlight import highlight -from dataclasses import dataclass, field -from . import templates +from muppet.parser_combinator import ( + ParserCombinator, + ParseError, + MatchCompound, + MatchObject, +) +from muppet.markdown import markdown +from muppet.breadcrumbs import Breadcrumbs, breadcrumbs +from muppet.util import group_by, partition +from muppet.cache import AbstractCache +from muppet.puppet.parser import puppet_parser -logger = logging.getLogger(__name__) +from muppet.puppet.ast import build_ast +from muppet.puppet.format.parser import ParserFormatter -# TODO replace 'output' with base, or put this somewhere else -pathlib.Path('output').mkdir(exist_ok=True) jinja = Environment( loader=FileSystemLoader('templates'), autoescape=False, ) +logger = logging.getLogger(__name__) + + +param_doc: dict[str, str] = {} + + +class Templates: + """Namespace for templates.""" + + def __init__(self) -> None: + self.jinja = Environment( + loader=FileSystemLoader('templates'), + autoescape=False) + + def code_page(self, *, + title: str, + content: str, + path_base: str, + breadcrumbs: Optional[Breadcrumbs] = None + ) -> str: # pragma: no cover + """Template for a page containing puppet code.""" + template = self.jinja.get_template('code_page.html') + return template.render( + title=title, + content=content, + path_base=path_base, + breadcrumbs=breadcrumbs) + + def content(self, *, + content: str, + path_base: str, + breadcrumbs: Optional[Breadcrumbs] = None + ) -> str: # pragma: no cover + """Template for a page with arbitrary content.""" + template = self.jinja.get_template('content.html') + return template.render( + content=content, + path_base=path_base, + breadcrumbs=breadcrumbs) + + def index(self, *, + modules: list['PuppetModule'], + path_base: str, + breadcrumbs: Optional[Breadcrumbs] = None + ) -> str: # pragma: no cover + """Root index file.""" + template = self.jinja.get_template('index.html') + return template.render( + path_base=path_base, + modules=modules, + breadcrumbs=breadcrumbs) + + def module_index(self, *, + # content: list[], # something with to_html_list and to_html + content: list[Any], # TODO something with to_html_list and to_html + module_author: str, + module_name: str, + doc_files: list[tuple[str, str]], + path_base: str, + breadcrumbs: Optional[Breadcrumbs] = None, + left_sidebar: Optional[str] = None, + right_sidebar: Optional[str] = None, + ) -> str: # pragma: no cover + """Index for a single module.""" + template = self.jinja.get_template('module_index.html') + return template.render( + content=content, + module_author=module_author, + module_name=module_name, + doc_files=doc_files, + path_base=path_base, + breadcrumbs=breadcrumbs, + left_sidebar=left_sidebar, + right_sidebar=right_sidebar) + + +templates = Templates() + + class HtmlSerializable(Protocol): """Classes which can be serialized as HTML.""" - def to_html(self) -> str: + def to_html(self) -> str: # pragma: no cover """Return HTML string.""" ... - def to_html_list(self) -> str: + def to_html_list(self) -> str: # pragma: no cover """Return HTML suitable for a list.""" ... @@ -69,6 +166,7 @@ class ResourceTypeOutput: """Basic HTML implementation.""" title: str + module_name: str children: list['HtmlSerializable'] = field(default_factory=list) link: Optional[str] = None summary: Optional[str] = None @@ -90,7 +188,9 @@ class ResourceTypeOutput: # self.__class__.__name__ return jinja \ .get_template('snippets/ResourceType-index-entry.html') \ - .render(item=self) + .render(item=self, + module_name=self.module_name, + prefix='/code/muppet-strings/output') def to_html_list(self) -> str: """Return HTML suitable for a list.""" @@ -99,22 +199,6 @@ class ResourceTypeOutput: .render(item=self) -def setup_index(base: str, modules: list[ModuleEntry], *, path_base: str) -> None: - """ - Create the main index.html file. - - :param base: - Path to output directory. - :param modules: - Modules to include in the index. - :param path_base: - Web path where this module will be deployed - """ - with open(os.path.join(base, 'index.html'), 'w') as f: - f.write(templates.index(modules=modules, - path_base=path_base)) - - @dataclass class IndexItem: """ @@ -170,7 +254,8 @@ class IndexSubcategory: def to_html(self) -> str: """Convert subcategory to an HTML string.""" out: str = '' - out += f'<h3>{html.escape(self.title)}</h3><dl class="overview-list">' + out += f'<h3>{html.escape(self.title)}</h3>' + out += '<dl class="overview-list">' for item in self.list: out += item.to_html() @@ -230,48 +315,448 @@ class IndexCategory: return out -def index_item(obj: PuppetClass | DefinedType) -> IndexItem: +@dataclass +class ResourceIndex: + """Placeholder.""" + + title: str + children: list[HtmlSerializable] + + def to_html(self) -> str: + """Return something.""" + out: str = '' + out += f'<h2>{self.title}</h2>' + out += '<dl>' + for child in self.children: + out += child.to_html() + out += '</dl>' + return out + + def to_html_list(self) -> str: + """Return something.""" + out: str = '' + out += f'<li>{self.title}<ul>' + for child in self.children: + out += child.to_html_list() + out += '</ul></li>' + return out + + +class PuppetModule: """ - Format a puppet type declaration into an index entry. + A representation of an entire Puppet module. - :param obj: - A dictionary at least containing the keys 'name' and 'file', - and optionally containing 'docstring'. If docstring is present - then a summary tag is searched for, and added to the resulting - object. + :param name: + Name the module, without the author information. + This is also used as the published named. + :param path: + :param strings_output: + :param metadata: + :param doc_files: """ - name = obj.name - out: IndexItem = IndexItem( - file=os.path.splitext(obj.file)[0], - name=name, - ) + # name: str + # path: str + # strings_output: PuppetStrings + # metadata: dict[str, Any] + # doc_files: list[str] - for tag in obj.docstring.tags: - if tag.tag_name == 'summary': - out.summary = markdown(tag.text) - break + def __init__(self, path: str, output_prefix: str, puppet_strings: PuppetStrings): + """ + Construct a new instance. - return out + :param path: + Path to the source of this module. + + :param output_prefix: + Which web path the pages should be outputed as. + + :param puppet_strings: + (Re-formatted) output of ``puppet strings``. Taken as a + paärameter to enable caching outside. + """ + self.path = path + self.name = os.path.basename(path) + self.strings_output = puppet_strings + self.toc = self._build_module_toc() + self.output_prefix = output_prefix + + # TODO + self.doc_files: list[str] = [] + + try: + with open(os.path.join(self.path, 'metadata.json')) as f: + self.metadata = json.load(f) + except FileNotFoundError: + self.metadata = {} + + def _build_module_toc(self) -> list[ResourceIndex | IndexCategory]: + """Build the TOC of the module.""" + content: list[ResourceIndex | IndexCategory] = [] + + if puppet_classes := self.strings_output.puppet_classes: + content.append(class_index(puppet_classes)) + + # data_types + if _ := self.strings_output.data_types: + content.append(IndexCategory( + title='Data types not yet implmented', + list=[])) + + if data_type_aliases := self.strings_output.data_type_aliases: + content.append(type_aliases_index(data_type_aliases)) + + if defined_types := self.strings_output.defined_types: + content.append(defined_types_index(defined_types)) + + if resource_types := self.strings_output.resource_types: + content.append(ResourceIndex( + title='Resource Types', + children=resource_type_index( + resource_types, + self.name))) + + # providers + if _ := self.strings_output.providers: + content.append(IndexCategory( + title='Providers not yet implmented', + list=[])) + + # puppet_functions + if _ := self.strings_output.puppet_functions: + content.append(IndexCategory( + title='Puppet Functions not yet implmented', + list=[])) + + # templates/ + # files/ + # examples or tests/ + # (spec)/ + # lib/puppet_x/ + # lib/facter/ + # facts.d/ + # data/ + # hiera.yaml + + # puppet_tasks + if _ := self.strings_output.puppet_tasks: + content.append(IndexCategory( + title='Puppet Tasks not yet implmented', + list=[])) + + # puppet_plans + if _ := self.strings_output.puppet_plans: + content.append(IndexCategory( + title='Puppet Plans not yet implmented', + list=[], + )) + + return content + + def file(self, path: str) -> str: + """Return the absolute path of a path inside the module.""" + return os.path.join(self.path, path) + + def index_page(self, destination: str) -> None: + """ + Generate the index file for a specific module. + + :param destination: + Path which this module should be created in, such as + '/var/www/muppet/{env}/{module_name}. + """ + crumbs = breadcrumbs( + ('Environment', ''), + self.name, + ) + + # TODO left sidebar should contain list of modules in + # environment. + + toc = self._build_module_toc() + + with open(os.path.join(destination, 'index.html'), 'w') as f: + f.write(templates.module_index( + module_name=self.name, + module_author='TODO', # module.metadata['author'], + breadcrumbs=crumbs, + content=toc, + path_base=self.output_prefix, + # doc_files=list(doc_files.items()) + # TODO + doc_files=[], + # left_sidebar=(), + right_sidebar=''.join([ + '<ul class="toc">', + *(e.to_html_list() for e in toc), + '</ul>', + ]))) + + def _generate_classes(self, destination: str) -> None: + """TODO test document private.""" + for puppet_class in self.strings_output.puppet_classes \ + + self.strings_output.defined_types: + logger.info('Formamting %s', puppet_class.name) + # localpath = puppet_class['name'].split('::') + localpath, _ = os.path.splitext(puppet_class.file) + dir = os.path.join(destination, localpath) + pathlib.Path(dir).mkdir(parents=True, exist_ok=True) + # puppet_class['docstring'] + # puppet_class['defaults'] + + # TODO option to add .txt extension (for web serverse which + # treat .pp as application/binary) + with open(os.path.join(dir, 'source.pp.txt'), 'wb') as f: + with open(self.file(puppet_class.file), 'rb') as g: + f.write(g.read()) + + crumbs = breadcrumbs( + ('Environment', ''), + self.name, + (puppet_class.name, + 'manifests/' + '/'.join(puppet_class.name.split('::')[1:])), + 'This', + ) + + with open(os.path.join(dir, 'source.pp.html'), 'w') as f: + + with open(self.file(puppet_class.file), 'r') as g: + f.write(templates.code_page( + title='', + content=highlight(g.read(), 'puppet'), + path_base=self.output_prefix, + breadcrumbs=crumbs)) + + # TODO reimplement this? + # with open(os.path.join(dir, 'source.json'), 'w') as f: + # json.dump(puppet_class, f, indent=2) + + # with open(os.path.join(dir, 'source.pp.html'), 'w') as f: + # f.write(format_class(puppet_class)) + + crumbs = breadcrumbs( + ('Environment', ''), + self.name, + (puppet_class.name, + 'manifests/' + '/'.join(puppet_class.name.split('::')[1:])), + ) + + title, body = format_class(puppet_class) + with open(os.path.join(dir, 'index.html'), 'w') as f: + f.write(templates.code_page( + title=self.name, + content=body, + path_base=self.output_prefix, + breadcrumbs=crumbs)) + + # puppet_class['file'] + # puppet_class['line'] + + def _generate_type_aliases(self, destination: str) -> None: + for type_alias in self.strings_output.data_type_aliases: + logger.debug('Formamting %s', type_alias.name) + + localpath, _ = os.path.splitext(type_alias.file) + dir = os.path.join(destination, localpath) + pathlib.Path(dir).mkdir(parents=True, exist_ok=True) + + with open(os.path.join(dir, 'source.pp.txt'), 'w') as f: + f.write(type_alias.alias_of) + + # TODO reimplement this? + # with open(os.path.join(dir, 'source.json'), 'w') as f: + # json.dump(type_alias, f, indent=2) + + title, body = format_type_alias(type_alias, type_alias.name) + with open(os.path.join(dir, 'index.html'), 'w') as f: + f.write(templates.code_page( + title=title, + content=body, + path_base=self.output_prefix)) + + def _documentation_files(self, destination: str) -> None: + GENERATED_MESSAGE = '<!-- DO NOT EDIT: This document was generated by Puppet Strings -->\n' + """ + REFERENCE.md files generated by Puppet Strings include this string on + their third line. We use this to ignore auto-generated files, since we + replace that output. + """ + + files: dict[str, str] = {} + # TODO is this set? + for file in self.doc_files: + logger.debug('Formamting %s', file) + + basename = os.path.basename(file) + if basename == 'REFERENCE.md': + with open(file) as f: + f.readline() + f.readline() + line3 = f.readline() + if line3 == GENERATED_MESSAGE: + continue + files[basename] = file + + doc_files: dict[str, str] = {} + for filename, filepath in files.items(): + logger.debug('Formamting %s', filename) + + name, _ = os.path.splitext(filename) + with open(filepath) as f: + raw_content = f.read() + + pathlib.Path(os.path.join(destination, name)).mkdir(exist_ok=True) + out_path = os.path.join(destination, name, 'index.html') + + if filename.endswith('.md'): + content = markdown(raw_content) + else: + content = '<pre>' + html.escape(raw_content) + '</pre>' + + crumbs = breadcrumbs(('Environment', ''), + self.name, + name) + + with open(out_path, 'w') as f: + f.write(templates.content( + content=content, + path_base=self.output_prefix, + breadcrumbs=crumbs)) + + doc_files[name] = os.path.join(self.name, name, 'index.html') + + def output(self, destination: str) -> None: + """ + Generate output for module. + + :param dest: + Where the content should end up. This should NOT include + the module name. + + example: '/var/www/muppet/{environment}/' + """ + destination = os.path.join(destination, self.name) + pathlib.Path(destination).mkdir(exist_ok=True) + self.index_page(destination) + + self._generate_classes(destination) + self._generate_type_aliases(destination) + # data_type_aliases + # defined_types + # resource_types + + +class PuppetEnvironment: + """ + Representation of a complete puppet environment. + + This module holds all information about a parser environment, + along with procedures for generating documentation output from it. + + See `The official docs <https://www.puppet.com/docs/puppet/7/environments_creating.html>`_ + + Environments are usually found in + ``/etc/puppetlabs/code/environments/{name}``, where each module + may contain any of the following files: + + - modules/ + - manifests/ + - hiera.yaml + - environment.conf + + Along with any other files. + + :param modules: + List of all modules in the environment. This will be a list of + :class:`PuppetModule` objects. + + :param name: + Name of the environment. 'production' is a common choice. + + :param source_path: + Absolute path to the environments source code. + + :param cache: + A cache object. + + :param output_prefix: + Prefixes which will be added to all links in output. + """ + + def __init__(self, source_path: str, output_prefix: str, cache: AbstractCache): + """ + Construct a new instance. + + :param source_path: + Absolute source directory where the environment exists, + + .. code-block:: python + + >>> f'/etc/puppetlabs/code/environments/{self.name}' + + :param output_prefix: + Prefixes which will be added to all links in output. + + :param cache: + Cache instance. + """ + # TODO an alternative constructor could take environment name, + # and then expands with the default path. + self.source_path = source_path + self.name = os.path.basename(source_path) + self.output_prefix = output_prefix + + self.modules: list[PuppetModule] = [] + dir = os.path.join(self.source_path, 'modules') + + for entry in sorted(list(os.scandir(dir)), + key=lambda d: d.name): + module_path = entry.path + # entry_ = entry.name + # module_path = os.path.join(dir, entry_) + # print(f"entry = {entry_}, module_path = {module_path}, dir={dir}") + self.modules.append( + PuppetModule(path=module_path, + output_prefix=output_prefix, + puppet_strings=puppet_strings_cached(module_path, + cache))) + + def output(self, destination: str) -> None: + """ + Generate all output files. + + :param destination: + /var/www/muppet/ + + self.name will be appended to it + """ + destination = os.path.join(destination, self.name) + pathlib.Path(destination).mkdir(exist_ok=True) + + with open(os.path.join(destination, 'index.html'), 'w') as f: + # TODO breadcrumbs + f.write(templates.index(modules=self.modules, + path_base=self.output_prefix)) + + for module in self.modules: + module.output(destination) def class_index(class_list: list[PuppetClass]) -> IndexCategory: """Prepage class index list.""" - groups = group_by(isprivate, class_list) + publics, privates = partition(isprivate, class_list) lst: list[IndexSubcategory] = [] - if publics := groups.get(False): + if publics: lst.append(IndexSubcategory( title='Public Classes', - list=[index_item(i) for i in publics], - )) + list=[index_item(i) for i in publics])) - if privates := groups.get(True): + if privates: lst.append(IndexSubcategory( title='Private Classes', - list=[index_item(i) for i in privates], - )) + list=[index_item(i) for i in privates])) return IndexCategory( title='Classes', @@ -334,34 +819,8 @@ def type_aliases_index(alias_list: list[DataTypeAlias]) -> IndexCategory: ) -@dataclass -class ResourceIndex: - """Placeholder.""" - - title: str - children: list[HtmlSerializable] - - def to_html(self) -> str: - """Return something.""" - out: str = '' - out += f'<h2>{self.title}</h2>' - out += '<dl>' - for child in self.children: - out += child.to_html() - out += '</dl>' - return out - - def to_html_list(self) -> str: - """Return something.""" - out: str = '' - out += f'<li>{self.title}<ul>' - for child in self.children: - out += child.to_html_list() - out += '</ul></li>' - return out - - -def resource_type_index(resource_types: list[ResourceType]) -> list[HtmlSerializable]: +def resource_type_index(resource_types: list[ResourceType], + module_name: str) -> list[HtmlSerializable]: """Generate index for all known resource types.""" lst: list[HtmlSerializable] = [] @@ -378,255 +837,878 @@ def resource_type_index(resource_types: list[ResourceType]) -> list[HtmlSerializ items.append(ResourceTypeOutput( title=provider.name, link=provider.file, + module_name=module_name, summary=documentation)) lst.append(ResourceTypeOutput(title=resource_type.name, + module_name=module_name, children=items)) return lst -# def resource_types_index(resource_list: list) -> IndexCategory: -# """ -# Prepare resource type index list. -# -# These are the resource types introduced through ruby. Each can -# have multiple implementations. -# """ -# return {} - - -def setup_module_index(*, - base: str, - module: ModuleEntry, - data: PuppetStrings, - path_base: str, - doc_files: dict[str, str], - ) -> None: - """Create the index file for a specific module.""" - content: list[ResourceIndex | IndexCategory] = [] - - content.append(class_index(data.puppet_classes)) - - # data['data_types'] - content.append(IndexCategory( - title='Data types not yet implmented', - list=[], - )) - - content.append(type_aliases_index(data.data_type_aliases)) - - content.append(defined_types_index(data.defined_types)) - - content.append(ResourceIndex( - title='Resource Types', - children=resource_type_index(data.resource_types))) - - # data['providers'] - content.append(IndexCategory( - title='Providers not yet implmented', - list=[], - )) - - # data['puppet_functions'] - content.append(IndexCategory( - title='Puppet Functions not yet implmented', - list=[], - )) - - # templates/ - # files/ - # examples or tests/ - # (spec)/ - # lib/puppet_x/ - # lib/facter/ - # facts.d/ - # data/ - # hiera.yaml - - # data['puppet_tasks'] - content.append(IndexCategory( - title='Puppet Tasks not yet implmented', - list=[], - )) - - # data['puppet_plans'] - content.append(IndexCategory( - title='Puppet Plans not yet implmented', - list=[], - )) - - crumbs = breadcrumbs( - ('Environment', ''), - module.name, - ) - - with open(os.path.join(base, 'index.html'), 'w') as f: - f.write(templates.module_index( - module_name=module.name, - module_author='TODO', # module.metadata['author'], - breadcrumbs=crumbs, - content=content, - path_base=path_base, - doc_files=list(doc_files.items()))) - - -GENERATED_MESSAGE = '<!-- DO NOT EDIT: This document was generated by Puppet Strings -->\n' -""" -REFERENCE.md files generated by Puppet Strings include this string on -their third line. We use this to ignore auto-generated files, since we -replace that output. -""" +def index_item(obj: PuppetClass | DefinedType) -> IndexItem: + """ + Format a puppet type declaration into an index entry. + :param obj: + A dictionary at least containing the keys 'name' and 'file', + and optionally containing 'docstring'. If docstring is present + then a summary tag is searched for, and added to the resulting + object. + """ + name = obj.name + + out: IndexItem = IndexItem( + file=os.path.splitext(obj.file)[0], + name=name, + ) -def setup_module(base: str, module: ModuleEntry, *, path_base: str) -> None: + for tag in obj.docstring.tags: + if tag.tag_name == 'summary': + out.summary = markdown(tag.text) + break + + return out + + +def format_docstring(name: str, docstring: DocString) -> tuple[str, str]: """ - Create all output files for a puppet module. + Format docstrings as they appear in some puppet types. - Will generate a directory under base for the module. + Those types being: - :param base: - Path to output directory. - :param modules: - Modules to document. - :param path_base: - Web path where this module will be deployed - """ - path = os.path.join(base, module.name) - pathlib.Path(path).mkdir(exist_ok=True) - if not module.strings_output: - logger.warning("No strings output for %s", module.name) - return - - data = module.strings_output - - for puppet_class in data.puppet_classes + data.defined_types: - logger.info('Formamting %s', puppet_class.name) - # localpath = puppet_class['name'].split('::') - localpath, _ = os.path.splitext(puppet_class.file) - dir = os.path.join(path, localpath) - pathlib.Path(dir).mkdir(parents=True, exist_ok=True) - # puppet_class['docstring'] - # puppet_class['defaults'] - - # TODO option to add .txt extension (for web serverse which - # treat .pp as application/binary) - with open(os.path.join(dir, 'source.pp.txt'), 'wb') as f: - with open(module.file(puppet_class.file), 'rb') as g: - f.write(g.read()) + * puppet_classes, + * puppet_type_aliases, and + * defined_types + """ + global param_doc + + # The api tag is ignored, since it instead is shown from context + + out = '' + + param_doc = {tag.name: tag.text or '' + for tag in docstring.tags + if isinstance(tag, DocStringParamTag)} + + grouped_tags = GroupedTags.from_taglist(docstring.tags) + + # -------------------------------------------------- + + out += '<a href="#code">Jump to Code</a><br/>' + + if tags := grouped_tags.summary_: + out += '<em class="summary">' + for tag in tags: + out += html.escape(tag.text) + out += '</em>' + + out += '<div class="description">' + # TODO "TODO" highlighting + out += markdown(docstring.text) + out += '</div>' + + # TODO proper handling of multiple @see tags + if sees := grouped_tags.see_: + out += '<b>See</b> ' + for see in sees: + link: str + m = re.match(r'((?P<url>https?://.*)|(?P<man>.*\([0-9]\))|(?P<other>.*))', see.name) + assert m, "Regex always matched" + if m['url']: + link = f'<a href="{see.name}">{see.name}</a>' + out += link + elif m['man']: + page = see.name[:-3] + section = see.name[-2] + # TODO man providers + link = f"https://manned.org/man/{page}.{section}" + out += link + else: + if '::' in m['other']: + # TODO + pass + else: + # TODO + # link = see + pass + out += m['other'] + out += ' ' + see.text + + if authors := grouped_tags.author_: + out += '<div class="author">' + out += "<em>Written by </em>" + if len(authors) == 1: + out += parse_author(authors[0].text) + else: + out += '<ul>' + for author in authors: + out += f'<li>{parse_author(author.text)}</li>' + out += '</ul>' + out += '</div>' + + out += '<hr/>' + + t: DocStringTag + + for t in grouped_tags .example_: + out += '<div class="code-example">' + + if name := t.name: + # TODO markup for title + out += f'<div class="code-example-header">{html.escape(name)}</div>\n' + # TODO highlight? + # Problem is that we don't know what language the example + # is in. Pygemntize however seems to do a reasonable job + # treating anything as puppet code + text = html.escape(t.text) + out += f'<pre><code class="puppet">{text}</code></pre>\n' + out += '</div>' + + out += '<hr/>' + + out += '<dl>' + for t in grouped_tags.param_: + name = html.escape(t.name) + out += f'<dt><span id="{name}" class="variable">{name}</span>' + match t.types: + case [x]: + # TODO highlight type? + out += f': <code>{html.escape(x)}</code>' + case [_, *_]: + raise ValueError("How did you get multiple types onto a parameter?") + + # TODO Fetch default values from puppet strings output + # Then in javascript query Hiera to get the true "default" + # values for a given machine (somewhere have a setting for + # selecting machine). + out += '</dt>' + + if text := t.text: + text = re.sub(r'(NOTE|TODO)', + r'<mark>\1</mark>', + markdown(text)) + + if options := grouped_tags.option_.get(t.name): + text += '<dl>' + for option in options: + text += '<dt>' + text += html.escape(option.opt_name) + match option.opt_types: + case [x]: + text += f' [<code>{html.escape(x)}</code>]' + case [_, *_]: + raise ValueError("How did you get multiple types onto an option?") + text += '</dt>' + text += '<dd>' + if option.opt_text: + text += re.sub(r'(NOTE|TODO)', + r'<mark>\1</mark>', + markdown(option.opt_text)) + text += '</dd>' + text += '</dl>' + + out += f"<dd>{text}</dd>" + else: + out += '<dd><em>Undocumented</em></dd>' + out += '</dl>' + + # TODO remaining tags + # "overload" + # raise + # return + # since + # _other + + return (name, out) + + +def format_class(d_type: DefinedType | PuppetClass) -> tuple[str, str]: + """Format Puppet class.""" + out = '' + logger.info("Formatting class %s", d_type.name) + # print(name, file=sys.stderr) + name, body = format_docstring(d_type.name, d_type.docstring) + out += body + + # ------ Old --------------------------------------- + # t = parse_puppet(d_type.source) + # data = parse(t, 0, ['root']) + # renderer = HTMLRenderer(build_param_dict(d_type.docstring)) + # out += render(renderer, data) + # ------ New --------------------------------------- + out += '<hr/>' + out += '<a id="code"></a>' + + in_parameters: list[str] = [] + for tag in d_type.docstring.tags: + if tag.tag_name == 'param': + in_parameters.append(cast(DocStringParamTag, tag).name) + + try: + result = parse_puppet(d_type.source, d_type.name, in_parameters) + out += '<pre class="highlight-muppet"><code class="puppet">' + out += result + out += '</code></pre>' + except ParseError as e: + logger.error("Parsing %(name)s failed: %(err)s", + {'name': d_type.name, 'err': e}) + out += f'<div class="error">{html.escape(str(e))}</div>' + out += '<pre><code class="puppet">' + if e.pos: + out += d_type.source[:e.pos] + out += '<span class="error">' + out += d_type.source[e.pos] + out += '</span>' + out += d_type.source[e.pos+1:] + else: + out += d_type.source + out += '</code></pre>' + return name, out - crumbs = breadcrumbs( - ('Environment', ''), - module.name, - (puppet_class.name, - 'manifests/' + '/'.join(puppet_class.name.split('::')[1:])), - 'This', - ) - with open(os.path.join(dir, 'source.pp.html'), 'w') as f: +@dataclass +class GroupedTags: + """ + All tags from a class (or similar) docstring. - with open(module.file(puppet_class.file), 'r') as g: - f.write(templates.code_page( - title='', - content=highlight(g.read(), 'puppet'), - path_base=path_base, - breadcrumbs=crumbs)) + Most fields are simply lists of tags. The reason for trailing + underscores on each entry is since some tag names collide with + python keywords (e.g. ``raise``). + """ - # TODO reimplement this? - # with open(os.path.join(dir, 'source.json'), 'w') as f: - # json.dump(puppet_class, f, indent=2) + param_: list[DocStringParamTag] = field(default_factory=list) # noqa: E221 + example_: list[DocStringExampleTag] = field(default_factory=list) # noqa: E221 + overload_: list[DocStringOverloadTag] = field(default_factory=list) # noqa: E221 + option_: dict[str, list[DocStringOptionTag]] = field(default_factory=dict) # noqa: E221 + """ + Options document Hash parameters valid values. - # with open(os.path.join(dir, 'source.pp.html'), 'w') as f: - # f.write(format_class(puppet_class)) + Each key is the corresponding parameter, and the value is the list + of registered options for that hash. + """ - crumbs = breadcrumbs( - ('Environment', ''), - module.name, - (puppet_class.name, - 'manifests/' + '/'.join(puppet_class.name.split('::')[1:])), - ) + author_: list[DocStringAuthorTag] = field(default_factory=list) # noqa: E221 + api_: list[DocStringApiTag] = field(default_factory=list) # noqa: E221 + raise_: list[DocStringRaiseTag] = field(default_factory=list) # noqa: E221 + return_: list[DocStringReturnTag] = field(default_factory=list) # noqa: E221 + since_: list[DocStringSinceTag] = field(default_factory=list) # noqa: E221 + summary_: list[DocStringSummaryTag] = field(default_factory=list) # noqa: E221 + see_: list[DocStringSeeTag] = field(default_factory=list) # noqa: E221 + other_: list[DocStringTag] = field(default_factory=list) # noqa: E221 + """All tags of unknown type.""" + + @classmethod + def from_taglist(cls, tags: list[DocStringTag]) -> 'GroupedTags': + """Group a list of tags.""" + grouped_tags = cls() + for tag in tags: + if tag.tag_name == 'option': + tag = cast(DocStringOptionTag, tag) + grouped_tags.option_.setdefault(tag.parent, []).append(tag) + elif tag.tag_name in {'param', 'example', 'overload', 'author', 'api', + 'raise', 'return', 'since', 'summary', 'see'}: + getattr(grouped_tags, tag.tag_name + '_').append(tag) + else: + grouped_tags.other_.append(tag) + return grouped_tags + + +def build_param_dict(docstring: DocString) -> dict[str, str]: + """ + Extract all parameter documentation from a docstring dict. + + :param docstring: + The object present under 'docstring' in the information about + a single object (class, resource, ...) in the output of + `puppet strings`. + + :returns: + A dictionary where the keys are the variables which have + documentation, and the value is the (formatted) documentation + for that key. Undocumented keys (even those with the tag, but + no text) are ommitted from the resulting dictionary. + """ + obj = {} + for t in docstring.tags: + if isinstance(t, DocStringParamTag): + obj[t.name] = re.sub(r'(NOTE|TODO)', + r'<mark>\1</mark>', + markdown(t.text)) + return obj + + +def format_type() -> str: + """Format Puppet type.""" + return 'TODO format_type not implemented' + + +def format_type_alias(d_type: DataTypeAlias, file: str) -> tuple[str, str]: + """Format Puppet type alias.""" + out = '' + name = d_type.name + logger.info("Formatting type alias %s", name) + # print(name, file=sys.stderr) + title, body = format_docstring(name, d_type.docstring) + out += body + out += '\n' + out += '<pre class="highlight-muppet"><code class="puppet">' + try: + out += parse_puppet(d_type.alias_of, file, []) + except ParseError as e: + logger.error("Parsing %(name)s failed: %(err)s", + {'name': d_type.alias_of, 'err': e}) + out += '</code></pre>\n' + return title, out + + +def format_defined_type(d_type: DefinedType, file: str) -> tuple[str, str]: + """Format Puppet defined type.""" + # renderer = HTMLRenderer(build_param_dict(d_type.docstring)) + out = '' + name = d_type.name + logger.info("Formatting defined type %s", name) + # print(name, file=sys.stderr) + title, body = format_docstring(name, d_type.docstring) + out += body + + out += '<pre class="highlight-muppet"><code class="puppet">' + try: + out += parse_puppet(d_type.source, file, []) + except ParseError as e: + logger.error("Parsing %(name)s failed: %(err)s", + {'name': d_type.source, 'err': e}) + out += '</code></pre>\n' + return title, out + + +def format_resource_type(r_type: ResourceType) -> str: + """Format Puppet resource type.""" + name = r_type.name + logger.info("Formatting resource type %s", name) + out = '' + out += f'<h2>{name}</h2>\n' + out += str(r_type.docstring) + + out += '<h3>Properties</h3>\n' + if props := r_type.properties: + out += '<ul>\n' + for property in props: + out += f'<li>{property.name}</li>\n' + # description, values, default + out += '</ul>\n' + else: + out += '<em>No providers</em>' + + out += '<h3>Parameters</h3>\n' + out += '<ul>\n' + for parameter in r_type.parameters: + out += f'<li>{parameter.name}</li>\n' + # description + # Optional[isnamevar] + out += '</ul>\n' + + out += '<h3>Providers</h3>\n' + if providers := r_type.providers: + for provider in providers: + out += f'<h4>{provider.name}</h4>\n' + # TODO + else: + print('<em>No providers</em>') - title, body = format_class(puppet_class) - with open(os.path.join(dir, 'index.html'), 'w') as f: - f.write(templates.code_page( - title=title, - content=body, - path_base=path_base, - breadcrumbs=crumbs)) + return out + + +def format_puppet_function(function: Function, file: str) -> str: + """Format Puppet function.""" + out = '' + name = function.name + logger.info("Formatting puppet function %s", name) + out += f'<h2>{name}</h2>\n' + t = function.type + # docstring = function.docstring + for signature in function.signatures: + signature.signature + signature.docstring + if t in ['ruby3x', 'ruby4x']: + # TODO syntax highlighting + s = '<pre class="highlight-muppet"><code class="ruby">' + s += function.source + s += '</code></pre>\n' + out += s + elif t == 'puppet': + out += '<pre class="highlight-muppet"><code class="puppet">' + try: + out += parse_puppet(function.source, file, []) + except ParseError as e: + logger.error("Parsing %(name)s failed: %(err)s", + {'name': function.source, 'err': e}) + + out += '</code></pre>\n' + else: + # TODO do something + pass - # puppet_class['file'] - # puppet_class['line'] + return out - for type_alias in data.data_type_aliases: - logger.debug('Formamting %s', type_alias.name) - localpath, _ = os.path.splitext(type_alias.file) - dir = os.path.join(path, localpath) - pathlib.Path(dir).mkdir(parents=True, exist_ok=True) +def format_puppet_task() -> str: + """Format Puppet task.""" + return 'TODO format_puppet_task not implemented' - with open(os.path.join(dir, 'source.pp.txt'), 'w') as f: - f.write(type_alias.alias_of) - # TODO reimplement this? - # with open(os.path.join(dir, 'source.json'), 'w') as f: - # json.dump(type_alias, f, indent=2) +def format_puppet_plan() -> str: + """Format Puppet plan.""" + return 'TODO format_puppet_plan not implemented' - title, body = format_type_alias(type_alias, type_alias.name) - with open(os.path.join(dir, 'index.html'), 'w') as f: - f.write(templates.code_page( - title=title, - content=body, - path_base=path_base)) - - # data['data_type_aliases'] - # data['defined_types'] - # data['resource_types'] - - files: dict[str, str] = {} - for file in module.doc_files: - logger.debug('Formamting %s', file) - - basename = os.path.basename(file) - if basename == 'REFERENCE.md': - with open(file) as f: - f.readline() - f.readline() - line3 = f.readline() - if line3 == GENERATED_MESSAGE: - continue - files[basename] = file - - doc_files: dict[str, str] = {} - for filename, filepath in files.items(): - logger.debug('Formamting %s', filename) - - name, _ = os.path.splitext(filename) - with open(filepath) as f: - raw_content = f.read() - - pathlib.Path(os.path.join(path, name)).mkdir(exist_ok=True) - out_path = os.path.join(path, name, 'index.html') - - if filename.endswith('.md'): - content = markdown(raw_content) - else: - content = '<pre>' + html.escape(raw_content) + '</pre>' - crumbs = breadcrumbs(('Environment', ''), - module.name, - name) +def inner_text(obj: MatchObject | list[MatchObject]) -> str: + """ + Extract the text content from a set of MatchObjects. - with open(out_path, 'w') as f: - f.write(templates.content( - content=content, - path_base=path_base, - breadcrumbs=crumbs)) + This is really similar to HTML's inner_text. + + Empty whitespace tags are expanded into nothing, non-empty + whitespace tags becomes a single space (note that this discards + commets). + + This only works properly if no function was mapped over the parser + return values in tree, see :func:`muppet.parser_combinator.fmap`. - doc_files[name] = os.path.join(module.name, name, 'index.html') + :param obj: + Match Objects to search. + """ + match obj: + case str(s): + return s + case MatchCompound(type='ws', matched=[]): + return '' + case MatchCompound(type='ws'): + return ' ' + case MatchCompound(matched=xs): + return ''.join(inner_text(x) for x in xs) + case [*xs]: + return ''.join(inner_text(x) for x in xs) + case _: + raise ValueError('How did we get here') + + +def name_to_url(name: str) -> tuple[str | None, str]: + """ + Resolve a class or resource name into an url. + + :param name: + The name of a class or resource, surch as "example::resource". + :return: + A tuple consisting of + + - One of + - An internal link to the definition of that type + - A link to the official puppet documentation + - ``None``, if `name` is "class" + - A string indicating extra HTML classes for this url. + This is mostly so external references can be marked properly. + """ + if name in built_in_types: + return (f'https://www.puppet.com/docs/puppet/7/types/{name}.html', 'puppet-doc') + elif name == 'class': + return (None, '') + else: + # TODO special cases for puppet's built in types. + # https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html + module, *items = name.lstrip(':').split('::') + # TODO get prefix from the command line/config file + return ('/code/muppet-strings/output/' + + '/'.join([module, 'manifests', *(items if items else ['init'])]), + '') + + +puppet_doc_base = 'https://www.puppet.com/docs/puppet/7' +lang_facts_builtin_variables = (f'{puppet_doc_base}/lang_facts_builtin_variables' + '#lang_facts_builtin_variables') +server_variables = f'{lang_facts_builtin_variables}-server-variables' +compiler_variables = f'{lang_facts_builtin_variables}-compiler-variables' +trusted_facts = f'{lang_facts_builtin_variables}-trusted-facts' +server_facts = f'{lang_facts_builtin_variables}-server-facts' + +built_in_variables = { + 'facts': 'https://google.com', + # clientcert, clientversion, puppetversion, clientnoop, + # agent_specified_environment: + # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-agent-facts + 'trusted': trusted_facts, + 'server_facts': server_facts, + 'environment': server_variables, + 'servername': server_variables, + 'serverip': server_variables, + 'serverversion': server_variables, + 'module_name': compiler_variables, + 'caller_module_name': compiler_variables, + + # Also note the special variable $title and $name + # https://www.puppet.com/docs/puppet/7/lang_defined_types#lang_defined_types-title-and-name +} + + +def parse_author(author: str) -> str: + """ + Format author tags' content. + + :param author: + The contents of the author tag. If the string is on the + regular "author" format of ``"Firstname Lastname + <first.last@example.com>"`` then the email will be formatted + and hyperlinked. Otherwise the string is returned verbatim. + :return: + An HTML safe string, possibly including tags. + """ + m = re.match(r'(?P<author>.*) (<(?P<email>.*)>)|(?P<any>.*)', author) + assert m, "The above regex can't fail" + if m['author'] and m['email']: + author = html.escape(m['author']) + email = html.escape(m['email']) + return f'{author} <a class="email" href="mailto:{email}"><{email}></a>;' + else: + return html.escape(m['any']) + + +# https://www.puppet.com/docs/puppet/7/cheatsheet_core_types.html +# https://www.puppet.com/docs/puppet/7/types/file.html +# ... +built_in_types = { + 'package', + 'file', + 'service', + 'notify', + 'exec', + 'user', + 'group', +} + +# https://www.puppet.com/docs/puppet/7/function.html#{} +built_in_functions = { + 'abs', + 'alert', + 'all', + 'annotate', + 'any', + 'assert_type', + 'binary_file', + 'break', + 'call', + 'camelcase', + 'capitalize', + 'ceiling', + 'chomp', + 'chop', + 'compare', + 'contain', + 'convert_to', + 'create_resources', + 'crit', + 'debug', + 'defined', + 'dig', + 'digest', + 'downcase', + 'each', + 'emerg', + 'empty', + 'epp', + 'err', + 'eyaml_lookup_key', + 'fail', + 'file', + 'filter', + 'find_file', + 'find_template', + 'flatten', + 'floor', + 'fqdn_rand', + 'generate', + 'get', + 'getvar', + 'group_by', + 'hiera', + 'hiera_array', + 'hiera_hash', + 'hiera_include', + 'hocon_data', + 'import', + 'include', + 'index', + 'info', + 'inline_epp', + 'inline_template', + 'join', + 'json_data', + 'keys', + 'length', + 'lest', + 'lookup', + 'lstrip', + 'map', + 'match', + 'max', + 'md5', + 'min', + 'module_directory', + 'new', + 'next', + 'notice', + 'partition', + 'realize', + 'reduce', + 'regsubst', + 'require', + 'return', + 'reverse_each', + 'round', + 'rstrip', + 'scanf', + 'sha1', + 'sha256', + 'shellquote', + 'size', + 'slice', + 'sort', + 'split', + 'sprintf', + 'step', + 'strftime', + 'strip', + 'tag', + 'tagged', + 'template', + 'then', + 'tree_each', + 'type', + 'unique', + 'unwrap', + 'upcase', + 'values', + 'versioncmp', + 'warning', + 'with', + 'yaml_data', +} + + +def find_declarations(objs: list[MatchObject]) -> list[str]: + """ + Find all local variable declarations. + + Searches the code for all local variable declarations, returing a + list of variable names. + + Note that the same variable might appear multiple times, for example: + + .. code-block:: puppet + :caption: The same variable being declared twice + + if $something { + $x = 10 + } else { + $x = 20 + } + """ + declarations = [] + for obj in objs: + match obj: + case MatchCompound(type='declaration', matched=xs): + for x in xs: + match x: + case MatchCompound(type='var', matched=ys): + declarations.append(inner_text(ys)) + return declarations + + +class Reserializer: + """ + Context for reserializing parsed data back into code. + + :param local_vars: + Variables declared within this file. Used when resolving + hyperlinks. + """ + + def __init__(self, local_vars: list[str]): + self.local_vars: list[str] = local_vars - setup_module_index(base=path, - module=module, - data=data, - path_base=path_base, - doc_files=doc_files, - ) + def reserialize(self, + obj: MatchObject | Sequence[MatchObject]) -> str: + """ + Reconstruct puppet code after parsing it. + + After building the parser, and parsing the puppet code into a tree + of MatchObjects; this procedure returns it into puppet code. + Difference being that we now have metadata, meaning that syntax + highlighting and variable hyperlinks can be inserted. + + :param obj: + Should be assumed to be a list of MatchObject's, or something similar. + + MatchCompound objects are serialized as + + .. code-block:: html + + <span class="{type}">{body}</span> + + strings as themselves, and lists have reserialize mapped over them. + + """ + out: list[str] = [] + # logger.info("obj = %a", obj) + + # TODO hyperlink functions. + # The problem is that a function can either be implemented in + # Puppet, or in Ruby. And Ruby functions' names aren't bound + # by the directory layout. + match obj: + case str(s): + out.append(html.escape(s)) + + case MatchCompound(type='resource-name', matched=xs): + name = inner_text(xs) + url, cls = name_to_url(name) + if url: + out.append(f'<a href="{url}" class="resource-name {cls}">{name}</a>') + else: + # TODO this is class, but the class name should + # also be hyperlinked + out.append(f'<span class="resource-name {cls}">{name}</span>') + + case MatchCompound(type='invoke', matched=xs): + function = None + for x in xs: + match x: + case MatchCompound(type='qn', matched=ys): + if function is None: + function = inner_text(ys) + if function in built_in_functions: + # class="qn" + url = f"https://www.puppet.com/docs/puppet/7/function.html#{function}" # noqa: E501 + tag = f'<a href="{url}" class="puppet-doc">{self.reserialize(ys)}</a>' # noqa: E501 + out.append(tag) + else: + # TODO function to url + out.append(f'<span class="qn">{self.reserialize(ys)}</span>') + else: + if function == 'include': + url, cls = name_to_url(inner_text(ys)) + # class="qn" + tag = f'<a href="{url}" class="{cls}">{self.reserialize(ys)}</a>' # noqa: E501 + out.append(tag) + else: + out.append(self.reserialize(ys)) + case _: + out.append(self.reserialize(x)) + + case MatchCompound(type='declaration', matched=xs): + for x in xs: + match x: + case MatchCompound(type='var', matched=ys): + inner = ''.join(self.reserialize(y) for y in ys) + out.append(f'<span id="{inner_text(ys)}">{inner}</span>') + case _: + out.append(self.reserialize(x)) + + case MatchCompound(type='var', matched=xs): + out.append(self.var_to_url(inner_text(xs))) + + case MatchCompound(type=type, matched=xs): + body = ''.join(self.reserialize(x) for x in xs) + out.append(f'<span class="{type}">{body}</span>') + + case [*xs]: + out.extend(self.reserialize(x) for x in xs) + + case rest: + logger.error("Unknown type: %a", rest) + + return ''.join(out) + + def var_to_url(self, var: str) -> str: + """ + Format variable, adding hyperlink to its definition. + + TODO these can refer to both defined types (`manifests/*.pp`), + as well as resource types (`lib/puppet/provider/*/*.rb` / + `lib/tpuppet/type/*.rb`) + + Same goes for functions (`functions/*.pp`), + (`lib/puppet/functions.rb`). + + :param var: + Name of the variable. + + :return: + An HTML anchor element. + """ + match var.split('::'): + case [name]: + # Either a local or global variable + # https://www.puppet.com/docs/puppet/7/lang_facts_and_builtin_vars.html + + href = None + cls = '' + if name in self.local_vars: + href = f'#{html.escape(var)}' + elif name in built_in_variables: + href = html.escape(built_in_variables[name]) + cls = 'puppet-doc' + + if href: + return f'<a class="var {cls}" href="{href}">{var}</a>' + else: + # `name` refers to a global fact. + return f'<span class="var">{var}</span>' + + case ['', name]: + # A global variable + if name in built_in_variables: + href = html.escape(built_in_variables[name]) + img = '<img src="/code/muppet-strings/output/static/favicon.ico" />' + return f'<a class="var" href="{href}">{var}{img}</a>' + else: + return f'<span class="var">{var}</span>' + + # Note the "special module" 'settings', + # https://www.puppet.com/docs/puppet/7/lang_facts_builtin_variables#lang_facts_builtin_variables-server-variables + case ['', module, *items, name]: + s = '/code/muppet-strings/output/' \ + + '/'.join([module, 'manifests', *(items if items else ['init'])]) + s += f'#{name}' + return f'<a class="var" href="{s}">{var}</a>' + case [module, *items, name]: + s = '/code/muppet-strings/output/' \ + + '/'.join([module, 'manifests', *(items if items else ['init'])]) + s += f'#{name}' + return f'<a class="var" href="{s}">{var}</a>' + case _: + raise ValueError() + + +def parse_puppet(source: str, file: str, in_parameters: list[str]) -> str: + """ + Parse and syntax highlight the given puppet source. + + :returns: An HTML string + """ + # Run the upstream puppet parser, + # then masage the tree into a usable form. + ast = build_ast(puppet_parser(source)) + + # From the ast, build a parser combinator parser. + # This parser will attach sufficient metadata to allow syntax + # highlighting and hyperlinking + parser = ParserFormatter().serialize(ast) + + # Run the generated parser, giving us a list of match objects. + match_objects = ParserCombinator(source, file).get(parser) + + # Reserialize the matched data back into puppet code, realizing + # the syntax highlighting and hyperlinks. + return Reserializer(find_declarations(match_objects) + (in_parameters)) \ + .reserialize(match_objects) diff --git a/muppet/puppet/format/parser.py b/muppet/puppet/format/parser.py index aaffae3..8366612 100644 --- a/muppet/puppet/format/parser.py +++ b/muppet/puppet/format/parser.py @@ -210,7 +210,7 @@ class ParserFormatter(Serializer[ParseDirective]): type = self.s(item.type) value = optional(ws & '=' & ws & self.s(item.v)) - return name(f'decl-${item.k}', ws & type & ws & '$' & item.k & value) + return name(f'decl-${item.k}', ws & type & ws & '$' & tag('declaration', item.k) & value) def instanciation_parameter(self, param: PuppetInstanciationParameter) -> ParseDirective: """ @@ -331,15 +331,15 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_access(self, it: PuppetAccess) -> ParseDirective: - return tag('access', ws & self.s(it.how) & ws & self.known_array('[]', it.args)) + return ws & tag('access', self.s(it.how) & ws & self.known_array('[]', it.args)) @override def _puppet_array(self, it: PuppetArray) -> ParseDirective: - return tag('array', ws & self.known_array('[]', it.items)) + return ws & tag('array', self.known_array('[]', it.items)) @override def _puppet_binary_operator(self, it: PuppetBinaryOperator) -> ParseDirective: - return ws & self.s(it.lhs) & ws & it.op & ws & self.s(it.rhs) + return ws & self.s(it.lhs) & ws & tag('op', it.op) & ws & self.s(it.rhs) @override def _puppet_block(self, it: PuppetBlock) -> ParseDirective: @@ -347,15 +347,17 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_call(self, it: PuppetCall) -> ParseDirective: - return ws & self.s(it.func) & \ - optional(ws & self.known_array('()', it.args)) & \ - optional(ws & self.s(it.block)) + return ws & tag('call', + self.s(it.func) & + optional(ws & self.known_array('()', it.args)) & + optional(ws & self.s(it.block))) @override def _puppet_call_method(self, it: PuppetCallMethod) -> ParseDirective: - return ws & self.s(it.func) & \ - optional(ws & self.known_array('()', it.args)) & \ - optional(ws & self.s(it.block)) + return ws & tag('call-method', + self.s(it.func) & + optional(ws & self.known_array('()', it.args)) & + optional(ws & self.s(it.block))) @override def _puppet_case(self, it: PuppetCase) -> ParseDirective: @@ -373,7 +375,7 @@ class ParserFormatter(Serializer[ParseDirective]): def _puppet_class(self, it: PuppetClass) -> ParseDirective: parser = (ws & tag('keyword', 'class') & ws & tag('name', it.name) & optional(ws & self.declaration_parameters('()', it.params))) - parser &= optional(ws & 'inherits' & ws & tag('inherits', it.parent)) + parser &= optional(ws & tag('keyword', 'inherits') & ws & tag('inherits', it.parent)) parser &= ws & '{' & ws & self.s(it.body) & ws & '}' return parser @@ -391,7 +393,7 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_concat(self, it: PuppetConcat) -> ParseDirective: - parser = ws & '"' + parser: ParseDirective = s('"') for fragment in it.fragments: match fragment: case PuppetString(st): @@ -400,15 +402,15 @@ class ParserFormatter(Serializer[ParseDirective]): case _: parser &= interpolated_form(self.s(fragment)) parser &= s('"') & ws - return parser + return ws & tag('string', parser) @override def _puppet_declaration(self, it: PuppetDeclaration) -> ParseDirective: - # TODO tag with declaration - return ws & self.s(it.k) & ws & '=' & ws & self.s(it.v) + return ws & tag('declaration', self.s(it.k)) & ws & '=' & ws & self.s(it.v) @override def _puppet_define(self, it: PuppetDefine) -> ParseDirective: + # TODO tag name with something return (ws & tag('keyword', 'define') & ws & it.name & optional(ws & self.declaration_parameters('()', it.params)) & ws & '{' & ws & self.s(it.body) & ws & '}') @@ -451,14 +453,14 @@ class ParserFormatter(Serializer[ParseDirective]): This will however not give any false positives, since our parser is built from the source. """ - parser = ws & optional(s('{')) + parser = optional(s('{')) for entry in it.entries: parser &= (ws & self.s(entry.k) & - ws & '=>' & + ws & tag('op', '=>') & ws & self.s(entry.v) & optional(ws & ',')) parser &= ws & optional(s('}')) - return parser + return ws & tag('hash', parser) @override def _puppet_if_chain(self, it: PuppetIfChain) -> ParseDirective: @@ -497,7 +499,7 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_keyword(self, it: PuppetKeyword) -> ParseDirective: - return tag('keyword', ws & it.name) + return ws & tag('keyword', it.name) @override def _puppet_lambda(self, it: PuppetLambda) -> ParseDirective: @@ -507,7 +509,7 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_literal(self, it: PuppetLiteral) -> ParseDirective: - return tag('literal', ws & it.literal) + return ws & tag('literal', it.literal) @override def _puppet_heredoc(self, it: PuppetHeredoc) -> ParseDirective: @@ -561,7 +563,7 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_node(self, it: PuppetNode) -> ParseDirective: - parser = ws & 'node' & ws + parser = ws & tag('keyword', 'node') & ws for match in it.matches: parser &= ws & match & ws & "," parser &= ws & "{" & ws & self.s(it.body) & "}" @@ -573,18 +575,18 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_number(self, it: PuppetNumber) -> ParseDirective: - parser: ParseDirective = ws + parser: ParseDirective match (it.x, it.radix): case int(x), 8: - parser &= s('0') & oct(x)[2:] + parser = s('0') & oct(x)[2:] case int(x), 16: - parser &= s('0') & 'x' & hex(x)[2:] + parser = s('0') & 'x' & hex(x)[2:] case x, None: - parser &= str(x) + parser = s(str(x)) case _: raise ValueError(f"Unexpected radix: {it.radix}") - return parser + return ws & tag('number', parser) @override def _puppet_parenthesis(self, it: PuppetParenthesis) -> ParseDirective: @@ -592,15 +594,15 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_qn(self, it: PuppetQn) -> ParseDirective: - return tag('qn', ws & it.name) + return ws & tag('qn', it.name) @override def _puppet_qr(self, it: PuppetQr) -> ParseDirective: - return tag('qr', ws & it.name) + return ws & tag('qr', it.name) @override def _puppet_regex(self, it: PuppetRegex) -> ParseDirective: - return tag('rx', ws & '/' & it.s.replace('/', r'\/') & '/') + return ws & tag('rx', s('/') & it.s.replace('/', r'\/') & '/') @override def _puppet_resource(self, it: PuppetResource) -> ParseDirective: @@ -611,17 +613,19 @@ class ParserFormatter(Serializer[ParseDirective]): case 'exported': parser &= '@@' - parser &= ws & self.s(it.type) & ws & '{' + parser &= ws & tag('resource-name', self.s(it.type)) & ws & '{' + # TODO tag things here for key, params in it.bodies: parser &= ws & self.s(key) & ws & ':' for param in params: parser &= self.instanciation_parameter(param) parser &= ws & optional(s(';')) parser &= ws & '}' - return parser + return tag('resource', parser) @override def _puppet_resource_defaults(self, it: PuppetResourceDefaults) -> ParseDirective: + # TODO tag things here parser = ws & self.s(it.type) & ws & '{' & ws for param in it.ops: parser &= self.instanciation_parameter(param) @@ -630,6 +634,7 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_resource_override(self, it: PuppetResourceOverride) -> ParseDirective: + # TODO tag things here parser = ws & self.s(it.resource) & ws & '{' & ws for param in it.ops: parser &= self.instanciation_parameter(param) @@ -638,6 +643,7 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_selector(self, it: PuppetSelector) -> ParseDirective: + # TODO tag things here parser = ws & self.s(it.resource) & ws & '?' & ws & '{' for key, body in it.cases: parser &= ws & self.s(key) & ws & '=>' & ws & self.s(body) & ws & optional(s(',')) @@ -669,21 +675,20 @@ class ParserFormatter(Serializer[ParseDirective]): @override def _puppet_unary_operator(self, it: PuppetUnaryOperator) -> ParseDirective: - return ws & it.op & ws & self.s(it.x) + return ws & tag('op', it.op) & ws & self.s(it.x) @override def _puppet_unless(self, it: PuppetUnless) -> ParseDirective: - parser = (ws & 'unless' & ws & self.s(it.condition) & ws & '{' & + parser = (ws & tag('keyword', 'unless') & ws & self.s(it.condition) & ws & '{' & ws & self.s(it.consequent) & ws & '}') - parser &= optional(ws & 'else' & ws & '{' & ws & self.s(it.alternative) & + parser &= optional(ws & tag('keyword', 'else') & + ws & '{' & + ws & self.s(it.alternative) & ws & '}') return parser @override def _puppet_var(self, it: PuppetVar) -> ParseDirective: - # TODO highlight entire decalaration - # TODO hyperlink? - # The leading '$' is optional, since it's optional for # variables in string interpolations, e.g. "${x}". return name(f'${it.name}', ws & optional(s('$')) & tag('var', it.name)) diff --git a/muppet/puppet/strings/__init__.py b/muppet/puppet/strings/__init__.py index a8a9987..6970662 100644 --- a/muppet/puppet/strings/__init__.py +++ b/muppet/puppet/strings/__init__.py @@ -35,6 +35,10 @@ from dataclasses import dataclass, field import logging from .internal import Deserializable import re +import json +import os.path +import hashlib +from muppet.cache import AbstractCache logger = logging.getLogger(__name__) @@ -639,17 +643,42 @@ class PuppetStrings(Deserializable): # -------------------------------------------------- +class HasDocstring(Protocol): + """Something which has a docstring attribute.""" + + docstring: DocString + + +def isprivate(entry: HasDocstring) -> bool: + """ + Is the given puppet declaration marked private. + + Assumes input is a dictionary as returned by puppet strings, one + of the entries in (for example) 'puppet_classes'. + + Currently only checks for an "@api private" tag. + """ + for tag in entry.docstring.tags: + if tag.tag_name == 'api' and \ + tag.text == 'private': + return True + return False + + def puppet_strings(path: str) -> bytes: """ Run ``puppet strings`` on puppet module at path. Returns a bytes object rather than a :class:`PuppetStrings` - object, to efficeiently writing the output to a cache. + object, to help with the implementation of cachinge. + + See :func:`puppet_strings_cached` for a caching version. .. code-block:: python :caption: Example Invocation - >>> PuppetStrings.from_json(puppet_strings("/etc/puppetlabs/code/modules/stdlib")) + >>> path = "/etc/puppetlabs/code/modules/stdlib" + >>> PuppetStrings.from_json(puppet_strings(path)) """ # All this extra weird stuff with tempfiles and pipes since puppet # strings output errors on stdout, and only if the --out flag @@ -693,23 +722,30 @@ def puppet_strings(path: str) -> bytes: return tmpfile.read() -class HasDocstring(Protocol): - """Something which has a docstring attribute.""" - - docstring: DocString - - -def isprivate(entry: HasDocstring) -> bool: +def puppet_strings_cached(path: str, cache: AbstractCache) -> PuppetStrings: """ - Is the given puppet declaration marked private. + Run puppet strings, but check cache first. - Assumes input is a dictionary as returned by puppet strings, one - of the entries in (for example) 'puppet_classes'. + :param path: + Path to the source of this module. - Currently only checks for an "@api private" tag. + :param cache: + Cache instance for """ - for tag in entry.docstring.tags: - if tag.tag_name == 'api' and \ - tag.text == 'private': - return True - return False + print("path =", path) + try: + with open(os.path.join(path, 'metadata.json'), 'rb') as f: + # self.metadata = json.loads(data) + data = f.read() + checksum = hashlib.sha1(data).hexdigest() + key = f'puppet-strings{checksum}' + + if parsed := cache.get(key): + result = parsed + else: + result = puppet_strings(path) + cache.put(key, result) + except FileNotFoundError: + result = puppet_strings(path) + + return PuppetStrings.from_json(json.loads(result)) diff --git a/muppet/templates.py b/muppet/templates.py deleted file mode 100644 index 80254a7..0000000 --- a/muppet/templates.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Function wrappers around jinja templates. - -This allows for type checking. -""" - -from typing import ( - Any, - Optional, -) -from jinja2 import ( - Environment, - FileSystemLoader, -) -from .breadcrumbs import Breadcrumbs -from .gather import ModuleEntry - -jinja = Environment( - loader=FileSystemLoader('templates'), - autoescape=False, -) - - -def code_page(*, - title: str, - content: str, - path_base: str, - breadcrumbs: Optional[Breadcrumbs] = None - ) -> str: # pragma: no cover - """Template for a page containing puppet code.""" - template = jinja.get_template('code_page.html') - return template.render( - title=title, - content=content, - path_base=path_base, - breadcrumbs=breadcrumbs) - - -def content(*, - content: str, - path_base: str, - breadcrumbs: Optional[Breadcrumbs] = None - ) -> str: # pragma: no cover - """Template for a page with arbitrary content.""" - template = jinja.get_template('content.html') - return template.render( - content=content, - path_base=path_base, - breadcrumbs=breadcrumbs) - - -def index(*, - modules: list[ModuleEntry], - path_base: str, - breadcrumbs: Optional[Breadcrumbs] = None - ) -> str: # pragma: no cover - """Root index file.""" - template = jinja.get_template('index.html') - return template.render( - path_base=path_base, - modules=modules, - breadcrumbs=breadcrumbs) - - -def module_index( - *, - # content: list[], # something with to_html_list and to_html - content: list[Any], # TODO something with to_html_list and to_html - module_author: str, - module_name: str, - doc_files: list[tuple[str, str]], - path_base: str, - breadcrumbs: Optional[Breadcrumbs] = None, - ) -> str: # pragma: no cover - """Index for a single module.""" - template = jinja.get_template('module_index.html') - return template.render( - content=content, - module_author=module_author, - module_name=module_name, - doc_files=doc_files, - path_base=path_base, - breadcrumbs=breadcrumbs) diff --git a/muppet/util.py b/muppet/util.py index df48f32..252fd31 100644 --- a/muppet/util.py +++ b/muppet/util.py @@ -32,6 +32,24 @@ def group_by(proc: Callable[[T], U], seq: Sequence[T]) -> dict[U, list[T]]: return d +def partition(proc: Callable[[T], bool], seq: Sequence[T]) -> tuple[list[T], list[T]]: + """ + Partition items into the items not matching, and matching. + + :param proc: + Predicate function, grouping elements. + + :proc seq: + Sequence of items to partition. + + :return: + Two lists, with the left one being all items not matching the + predicate, and the right one being all elements that matched. + """ + groups = group_by(proc, seq) + return groups.get(False, []), groups.get(True, []) + + def concatenate(lstlst: Iterable[Iterable[T]]) -> list[T]: """Concatenate a list of lists into a flat(er) list.""" out: list[T] = [] diff --git a/static-src/style.scss b/static-src/style.scss index ec677d7..70d6ee9 100644 --- a/static-src/style.scss +++ b/static-src/style.scss @@ -77,10 +77,29 @@ code.json { display: inline; } -.example { - background: lightgray; - padding: 1em; +.code-example { + /* Lighter Light gray */ + background: #edecea; border-radius: $border-radius; + overflow: hidden; + margin: 1em; + + pre { + padding: 1em; + padding-top: 1pt; + padding-bottom: 1pt; + } + + + .code-example-header { + background: beige; + /* Dark beige */ + border-bottom: 2px solid #b2b2a0; + padding: 1em; + padding-bottom: 0; + padding-top: 0; + font-family: sans; + } } .comment { @@ -196,6 +215,20 @@ span.error { color: white; } +code .puppet-doc { + color: orange; +} + +.email { + font-family: mono; + font-size: 80%; +} + + +dt .variable { + font-weight: bold; +} + /* -------------------------------------------------- */ @import "colorscheme_default"; diff --git a/templates/base.html b/templates/base.html index 9c8fa46..d116b16 100644 --- a/templates/base.html +++ b/templates/base.html @@ -13,7 +13,7 @@ Parameters: An optional list of breadcrumb items. #} <!doctype html> -<html> +<html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> @@ -30,6 +30,7 @@ Parameters: } </style> </noscript> + <title>{% block title %}Muppet{% endblock %}</title> </head> <body> <header> diff --git a/templates/code_page.html b/templates/code_page.html index 2e7f99c..5b2d364 100644 --- a/templates/code_page.html +++ b/templates/code_page.html @@ -19,7 +19,6 @@ Parameters: <li><a href="index.html">Rendered</a></li> <li><a href="source.pp.html">Source</a></li> <li><a href="source.pp.txt">Raw Source</a></li> - <li><a href="source.json">JSON blob</a></li> </ul> {{ content }} {% endblock %} diff --git a/templates/module_index.html b/templates/module_index.html index 3448000..7f76a36 100644 --- a/templates/module_index.html +++ b/templates/module_index.html @@ -8,15 +8,22 @@ Parameters: #} {% extends "base.html" %} {% block left_sidebar %} - {# environment list #} + {% if left_sidebar %} + {{ left_sidebar }} + {% endif %} {% endblock %} {% block right_sidebar %} {# Table of contents, including all types #} + {# <ul class="toc"> {% for entry in content %} {{ entry.to_html_list() }} {% endfor %} </ul> + #} + {% if right_sidebar %} + {{ right_sidebar }} + {% endif %} {% endblock %} {% block content %} <h1>{{ module_author }} / {{ module_name.title() }}</h1> diff --git a/templates/snippets/ResourceType-index-entry.html b/templates/snippets/ResourceType-index-entry.html index b08d658..45501f4 100644 --- a/templates/snippets/ResourceType-index-entry.html +++ b/templates/snippets/ResourceType-index-entry.html @@ -1,6 +1,11 @@ {# +:param item: An instance of ResourceTypeOutput +:param prefix: Prefix for HTTP output path, + (e.g. '/code/muppet-strings/output') +:param module_name: + #} -<dt><a href="#">{{ item.base() }}</a></dt> +<dt><a href="{{ prefix }}/{{ module_name }}/lib/puppet/types/{{ item.base() }}.rb">{{ item.base() }}</a></dt> <dd> {% if item.summary %} <!-- TODO docstring.text --> -- GitLab