From 4d8814c7f4548f43a0c70deae4ef256965fba49d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se>
Date: Tue, 19 Sep 2023 11:15:12 +0200
Subject: [PATCH] Major documentation for doc DocStringTag.

Extend DocStringTag to include "all" known types, along with document
how each of them behaves. This also forced a few changes to its
behaviour, which is reflected in the updated tests. The biggest change
is that we now barely test for valid "tag_name" values, since a valid
tag should always have a valid (albeit unknown) tag name.
---
 doc/doc-gather.rst                |   1 +
 doc/index.rst                     |   5 +
 muppet/gather.py                  |   1 +
 muppet/parser_combinator.py       |   1 +
 muppet/puppet/strings/__init__.py | 357 ++++++++++++++++++++++++++----
 tests/test_deserializable.py      |  45 ++--
 6 files changed, 352 insertions(+), 58 deletions(-)

diff --git a/doc/doc-gather.rst b/doc/doc-gather.rst
index 2e23c65..179c81b 100644
--- a/doc/doc-gather.rst
+++ b/doc/doc-gather.rst
@@ -27,4 +27,5 @@ which creates an AST in python objects.
 These modes can be tested with
 
 .. code-block:: sh
+
     python -m muppet.puppet {parser,ast,serialize}
diff --git a/doc/index.rst b/doc/index.rst
index fb3d599..0a779d0 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -8,6 +8,11 @@ Welcome to Muppet Strings's documentation!
 
 Muppet Strings is a tool for documenting entire Puppet environments.
 
+This tool makes heavy use of ``puppet strings``. `Puppet Strings
+Manual <https://www.puppet.com/docs/puppet/7/puppet_strings_style>`_
+is an expected pre-requisite to understand why this program works as
+it does.
+
 .. toctree::
    :maxdepth: 3
    :caption: Contents:
diff --git a/muppet/gather.py b/muppet/gather.py
index 3152562..f0a43d9 100644
--- a/muppet/gather.py
+++ b/muppet/gather.py
@@ -72,6 +72,7 @@ def get_puppet_strings(cache: Cache, path: str) -> Optional[PuppetStrings]:
             else:
                 result = puppet_strings(path)
                 cache.put(key, result)
+            logger.info('Deserializing %s', path)
             return PuppetStrings.from_json(json.loads(result))
     except FileNotFoundError:
         # TODO actually run puppet strings again.
diff --git a/muppet/parser_combinator.py b/muppet/parser_combinator.py
index f0e75f9..6f132a4 100644
--- a/muppet/parser_combinator.py
+++ b/muppet/parser_combinator.py
@@ -231,6 +231,7 @@ class ParseError(Exception):
         Stack trace of previous parsers. Recommended usage is
 
         .. code-block:: python
+
             try:
                 ...
             except ParseError as e:
diff --git a/muppet/puppet/strings/__init__.py b/muppet/puppet/strings/__init__.py
index 9826658..a8a9987 100644
--- a/muppet/puppet/strings/__init__.py
+++ b/muppet/puppet/strings/__init__.py
@@ -35,7 +35,6 @@ from dataclasses import dataclass, field
 import logging
 from .internal import Deserializable
 import re
-# from muppet.util import ILoggerAdapter
 
 
 logger = logging.getLogger(__name__)
@@ -46,39 +45,123 @@ class DocStringTag(Deserializable):
     """
     A generic DocString tag.
 
-    note, api, summary
+    These are the tagged blocks/lines in puppet comments, on the form:
+
+    .. code-block:: puppet
+
+        # @sumamry
+        #   The summary of the module
+
+    Everything after ``@summary`` on the same line is in this document
+    refered to as the "tag line", while the indented body below is
+    refered as the "tag body"
+
+    Note that many of these tags can be "generated" from the source.
+    For example, the following block will have a return tag in the
+    output of Puppet STrings, due to it's in code return type
+    declaration. This holds for other types as well.
+
+    .. code-block:: puppet
+
+        function f() >> Integer {
+            1
+        }
+
+
+    Finnaly, the tags `@!puppet.type.param` and
+    `!puppet.type.property` exists for dynamically generated
+    parameters and properties, respectively. These are currently not
+    handled separately by us.
+
+    https://www.puppet.com/docs/puppet/7/puppet_strings.html#strings-tags
 
     :param tag_name:
-        Type of the tag, like 'param', 'api', ...
+        Type of the tag, like 'param', 'api', e.t.c.
 
     :param text:
         Freeform text content of the tag.
-
-    TODO types
     """
 
-    tag_name:   str
-    text:       str = ''
-
-    types:      Optional[list[str]] = None
+    tag_name: str
+    text:     str = ''
 
     @staticmethod
     def _key() -> str:
         raise NotImplementedError()
 
+    @classmethod
+    def get_type(cls, obj: dict[str, Any]) -> type['DocStringTag']:
+        """
+        Find the correct tag subclass for the given dict.
+
+        Find the subclass of `DocStringTag` where ``_key()`` returns
+        the same name as the key "tag_name" in the given dictionary.
+
+        Defaults to the bare `DocStringTag`.
+
+        :param obj:
+            Object should be a dictionary representing a tag, as
+            returned by ``puppet strings``.
+
+            .. code-block:: json
+               :caption:
+                 An example JSON object.
+
+                {
+                  "tag_name": "example",
+                  "text": "include ::test",
+                  "name": "block name"
+                }
+        """
+        assert 'tag_name' in obj, "Tag dict MUST contain tag_name"
+        for c in cls.__subclasses__():
+            if obj['tag_name'] == c._key():
+                return c
+        else:
+            return DocStringTag
+
 
 @dataclass(kw_only=True)
 class DocStringParamTag(DocStringTag):
-    """Tag with tag_name set to 'param'."""
-
-    types: list[str]
     """
-    If tag_name is 'param', then this will be a singleton list of the
-    type (as a string). Untyped fields will be typed as 'Any'.
+    Documentation for a parameter to a class, definition, or function.
+
+    The following puppet code
+
+    .. code-block:: puppet
+
+        # @param path
+        #   Where the file should end up
+        class CLS (String $x = '/dev/null') {
+            # ...
+        }
+
+    Would yield the following the object:
+
+    .. code-block:: python
+
+        DocStringParamTag(tag_name='param',
+                          text='Where the file should end up'
+                          types=['String'])
+
+    TODO link to how default values are handled.
+
+    :param types:
+        A singleton list containing the type, exactly as it appeared
+        in the Puppet source (this means that compound types with
+        internal newlines will still have their newlines).
+
+        Note that the type declaration can also appear in the
+        documentation tag instead of the source code.
+
+        TODO won't this cause our parser to sometimes fail?
+
+    :param name:
+        Parameter name.
     """
 
     name: str
-    """Parameter name."""
+    types: Optional[list[str]] = None
 
     @staticmethod
     def _key() -> str:
@@ -87,15 +170,31 @@ class DocStringParamTag(DocStringTag):
 
 @dataclass(kw_only=True)
 class DocStringExampleTag(DocStringTag):
-    """Tag with tag_name set to 'example'."""
-
-    name: str
     """
-    Most likely name of language of ``text``.
+    An example block.
+
+    .. code-block:: puppet
+
+        # @example Name of the block
+        #   !!Contents of the block
+        #   Can (and probably should)
+        #   be multiline!!
+
+    :param text:
+        The contents of the example. Will in the example above be the
+        string delimited by '!!'. Whitespace equivalent to the
+        indentation of the first line will be removed from each line,
+        trailing whitespace is always removed.
 
-    Will be the empty string if not set.
+    :param name:
+        The name of the block, "Name of the block" in the example
+        above.
+
+        Will be the empty string if not set.
     """
 
+    name: str
+
     @staticmethod
     def _key() -> str:
         return 'example'
@@ -104,10 +203,30 @@ class DocStringExampleTag(DocStringTag):
 @dataclass(kw_only=True)
 class DocStringOverloadTag(DocStringTag):
     """
-    Tag with tag_name set to 'overload'.
+    The overload "tag".
+
+    This isn't a real tag, but rather something ``puppet strings``
+    generates for overrides in Puppet 4.x functions written in Ruby
+    (and possibly others).
+
+    One such example is puppetlabs-stdlib's
+    lib/puppet/functions/validate_legacy.rb (as of v8.1.0)
+    https://github.com/puppetlabs/puppetlabs-stdlib/blob/v8.1.0/lib/puppet/functions/validate_legacy.rb
+
+    :param signature:
+        A normalized version of the function signature.
+
+        .. code-block:: python
+
+            "validate_legacy(Any $scope, String $type_string, Any $value, Any *$args)"
+
+    :param docstring:
+        The actual documentation for the instance. Usually rather
+        long.
+
+    :param defaults:
+        TODO
 
-    These three will be set for puppet 4.x functions with overloads
-    when tag_name is 'overload'.
     """
 
     name: str
@@ -122,24 +241,185 @@ class DocStringOverloadTag(DocStringTag):
 
 @dataclass(kw_only=True)
 class DocStringOptionTag(DocStringTag):
-    """Tag with tag_name set to 'option'."""
+    """
+    An option for a hash parameter.
+
+    Parameters can be hashes, which should have their options
+    documented.
+
+    .. code-block:: puppet
+      :caption:
+        Example borrowed from the `puppet documentation
+        <https://www.puppet.com/docs/puppet/7/puppet_strings.html#strings-tags>`_
+
+        # @param [Hash] opts
+        #   List of options
+        # @option opts [String] :option1
+        #   option 1 in the hash
+        # @option opts [Array] :option2
+        #   option 2 in the hash
+
+    :param opt_name:
+        Name of the option. This is what will be given a string key.
+    :param opt_text:
+        The description of the option, given in the tag body.
+    :param opt_types:
+        A singleton list, if present, declaring valid value types. See
+        @param for further details.
+    :param parent:
+        The hash parameter which this option belongs to. Would be
+        ``opts`` in the above example.
+    :param name:
+        Seems to always be equal to parent.
+    """
 
     opt_name:   str
     opt_text:   str
-    opt_types:  str
     parent:     str
     name:       str
+    opt_types:  Optional[list[str]] = None
 
     @staticmethod
     def _key() -> str:
         return 'option'
 
 
+@dataclass(kw_only=True)
+class DocStringAuthorTag(DocStringTag):
+    """
+    Declaration of the author of an object.
+
+    .. code-block:: puppet
+
+        # @author Hugo Hörnquist
+
+    :param text:
+        The name of the author, "Hugo Hörnquist" in this example.
+    """
+
+    @staticmethod
+    def _key() -> str:
+        return 'author'
+
+
+@dataclass(kw_only=True)
+class DocStringApiTag(DocStringTag):
+    """
+    Declaration of API visibility.
+
+    .. code-block:: puppet
+
+        # @api private
+
+    :param text:
+        Only the value "private" seems to be in use.
+    """
+
+    @staticmethod
+    def _key() -> str:
+        return 'api'
+
+
+@dataclass(kw_only=True)
+class DocStringRaiseTag(DocStringTag):
+    """
+    Information about what a function may raise.
+
+    .. code-block:: puppet
+
+        # @raise PuppetError this error is raised if x
+
+    :param text:
+        The entire contents contentns of the tag.
+        The first word doesn't get any special treatment.
+    """
+
+    @staticmethod
+    def _key() -> str:
+        return 'raise'
+
+
+@dataclass(kw_only=True)
+class DocStringReturnTag(DocStringTag):
+    """
+    A return type annotation.
+
+    Notes what a function will return, can be given multiple times if
+    if multiple return values are possible. It's recommended that they
+    then start with "if ..."
+
+    .. code-block:: puppet
+
+        # @return [String] if something is true
+
+    :param text:
+        Freeform text descriping the return value. Can be the empty string.
+
+    :param types:
+        A singleton list, if present, containing the noted return type.
+    """
+
+    types: Optional[list[str]] = None
+
+    @staticmethod
+    def _key() -> str:
+        return 'return'
+
+
+@dataclass(kw_only=True)
+class DocStringSinceTag(DocStringTag):
+    """
+    Declaration of when a procedure was added.
+
+    Barely in use. Blame git blame.
+
+    :param text:
+        Free text describing version when the object was added.
+    """
+
+    @staticmethod
+    def _key() -> str:
+        return 'since'
+
+
+@dataclass(kw_only=True)
+class DocStringSummaryTag(DocStringTag):
+    """
+    Short summary of the object.
+
+    :param text:
+        Free-text summary of the object, should be limited to 140
+        characters.
+    """
+
+    @staticmethod
+    def _key() -> str:
+        return 'summary'
+
+
 @dataclass(kw_only=True)
 class DocStringSeeTag(DocStringTag):
-    """Tag with tag_name set to 'see'."""
+    """
+    A see other tag.
 
-    name:       str
+    .. code-block:: puppet
+
+        # @see https://example.com For more information
+
+    .. code-block:: python
+
+        DocStringSeeTag(name="https://example.com",
+                        text="For more information")
+
+    :param name:
+        The first word of the tag line. Should be an URL or code object.
+
+    :param text:
+        Remaining elements on tag line, or the tag body. A freeform
+        description of the reference for human consumption.
+    """
+
+    name: str
 
     @staticmethod
     def _key() -> str:
@@ -148,7 +428,7 @@ class DocStringSeeTag(DocStringTag):
 
 @dataclass(kw_only=True)
 class DocString(Deserializable):
-    """Documentation entry for something."""
+    """Documentation entry for any type of object."""
 
     text: str
     tags: list[DocStringTag] = field(default_factory=list)
@@ -158,24 +438,21 @@ class DocString(Deserializable):
         """
         Parse list of tag dictionaries.
 
-        The type of a tag dictionary depends on the value of
-        ``tag_name``.
+        The dynamic type choice is done here instead of in
+        `DocStringTag`, due to how `Deserialize` is constructed. In
+        short, it's much easier to modify the deserialization
+        behaviour of children rather than changing oneself. #deep
+
+        See `DocStringTag.get_type` for further information.
         """
         result: list[DocStringTag] = []
         for object in items:
-            # cls: type[DocStringTag]
-            cls: type
-            for c in DocStringTag.__subclasses__():
-                if object['tag_name'] == c._key():
-                    cls = c
-                    break
-            else:
-                cls = DocStringTag
+            cls = DocStringTag.get_type(object)
             try:
                 result.append(cls(**object))
             except TypeError as e:
-                logger.error("Bad tag set for tag object (class=%s) %s",
-                             cls.__name__, object,
+                logger.error("Bad tag set for tag object (class=%s) %s (%s)",
+                             cls.__name__, object, e,
                              exc_info=e)
                 raise e
         return result
diff --git a/tests/test_deserializable.py b/tests/test_deserializable.py
index e64914a..2f15f27 100644
--- a/tests/test_deserializable.py
+++ b/tests/test_deserializable.py
@@ -1,34 +1,43 @@
 import pytest
 from muppet.puppet.strings import (
     DocStringTag,
+    DocStringParamTag,
     DocString,
     DataTypeAlias,
 )
 
 
 def test_deserialize_from_json():
-    tagname = 'Tagname'
-    text = 'Contents of tagname'
-    types = ['what even is this?']
-
-    assert DocStringTag(tag_name=tagname, text=text, types=types) \
-        == DocStringTag.from_json({
-            'tag_name': tagname,
-            'text': text,
-            'types': types
-        })
+    """
+    Test deserializing of tag objects.
 
+    Of note here is that DocStringTag have many subclasses, and should
+    dynamically find the correct one based on 'tag_name'.
 
-def test_deserialize_from_json_unknown_key():
-    # TODO ensure that a worning is logged
-    DocStringTag.from_json({
-        'tag_name': 'Tagname',
+    Calling ``.to_json()`` directly on a `DocStringTag` (or on of its
+    subclasses) is ratnher uninteresting, since the dynamic choice
+    isn't available there.
+    """
+    data = {
+        'tag_name': 'param',
         'text': 'Contents of tagname',
-        'XXX': 'what even is this?'
-    })
+        'types': ['String'],
+        'name': 'x',
+    }
+
+    assert [DocStringParamTag(**data)] \
+        == DocString.from_json({
+            'text': '',
+            'tags': [data],
+            }).tags
 
 
 def test_deserialize_nested():
+    """
+    Test a nested parse.
+
+    This test is slightly redundant.
+    """
     text = 'A'
     tn = 'B'
     ttext = 'C'
@@ -47,8 +56,8 @@ def test_deserialize_nested():
             })
 
 
-def test_deserialize_from_json_type_error():
-    with pytest.raises(TypeError):
+def test_deserialize_from_json_error():
+    with pytest.raises(AssertionError):
         DocString.from_json({
             'text': 'text',
             'tags': [
-- 
GitLab