Skip to content
Snippets Groups Projects
Commit d25e1fbc authored by Hugo Hörnquist's avatar Hugo Hörnquist
Browse files

Add JSON parser as parser combinator example.

parent 1e7d6487
No related branches found
No related tags found
No related merge requests found
"""Unit tests for JSON parser written in my parser combinator."""
import json
from json2 import (
json_value,
json_string,
_json_keyword,
json_number,
)
from muppet.parser_combinator import ParserCombinator
def test_string(): # noqa: D103
assert ParserCombinator('"Hello"').get(json_string) == "Hello"
def test_number_int(): # noqa: D103
assert ParserCombinator("1").get(json_number) == [1]
def test_number_decimal(): # noqa: D103
assert ParserCombinator("1.1").get(json_number) == [1.1]
def test_number_exp(): # noqa: D103
assert ParserCombinator("1e-10").get(json_number) == [1e-10]
def test_number_full(): # noqa: D103
assert ParserCombinator("-1.1e10").get(json_number) == [-1.1e10]
def test_keyword(): # noqa: D103
assert ParserCombinator("true").get(_json_keyword) == [True]
assert ParserCombinator("false").get(_json_keyword) == [False]
assert ParserCombinator("null").get(_json_keyword) == [None]
tests = {
'integer': 10,
'negative': -10,
'decimal': 1.2,
'float': 1e+20,
'string': "Hello, World",
'object': {'a': 10, 'b': 20},
'array': [1, 2, 3, 4],
'nested': [{}, {'a': 10}],
'keyword': True,
}
def test_good(): # noqa: D103
for key, value in tests.items():
serialized = json.dumps(value)
deserialized = ParserCombinator(serialized).get(json_value)
assert repr(deserialized[0])
def test_pre_serialized(): # noqa: D103
pre_serialized = {
'escaped': r'"Hello \u0041 World"',
}
for key, value in pre_serialized.items():
print(key)
deserialized = ParserCombinator(value).get(json_string)
assert deserialized[0]
"""
A basic JSON parser.
This parser shouldn't be used, but is instead here to demonstrate how
to use the parser combinator library.
Besides the obvious, note can be taken of
- handlers and their type transformations
- lambdas for lazy evaluation.
"""
from muppet.parser_combinator import (
MatchCompound,
MatchObject,
ParseDirective,
complement,
const,
count,
digit,
discard,
hexdig,
many,
many1,
name,
optional,
or_,
s,
space,
tag,
)
from typing import Optional, TypeVar
import math
T = TypeVar('T')
def force(t: Optional[T]) -> T:
"""
Discard the None part of an optional value.
Only use this when you /know/ that the value exists.
:raises AssertionError:
If the value was ``None`` after all.
"""
assert t
return t
def handle_int(xs: list[MatchObject]) -> list[int]:
"""
Convert matched to an integer.
Apply when parsing integers, such as::
(many digit) @ handle_int
Note that this only works if adjacant joining is working.
"""
return [int(xs[0])]
def _handle_exp(parts: list[MatchObject]) -> list[int]:
"""Convert the exponential part of a float to its integer value."""
total = force(__find('dig', parts)).matched[0]
if sign := __find('sign', parts):
if sign.matched[0][0] == '-':
total *= -1
return [total]
def _handle_number(parts: list[MatchObject]) -> list[float]:
"""
Construct a float from its components.
A float is structured as ``±{base}.{dec}e{exp}``.
:param base:
The integer part of the float. SHOULD be positive for this function.
:param exp:
The exponent part of the float.
:param dec:
The decimal part of the float.
:param neg:
Is the value negative?
:returns:
The constructed float.
"""
total: float = 0
print(parts)
# string: str = ''
if base := __find('base', parts):
total += base.matched[0]
if frac := __find('fractional', parts):
d = frac.matched[0]
total += d / 10**(math.floor(math.log10(d)) + 1)
if exp := __find('exp', parts):
total *= 10**exp.matched[0]
if __find('minus', parts):
total *= -1
return [total]
ws = discard(name('ws', many(space)))
digit_19 = or_(*(chr(x + ord('0')) for x in range(1, 10)))
_hex_esc = (discard(r'\u') & count(hexdig, 3, 5)) @ (lambda x: chr(int(x[0], 16)))
_json_esc = (_hex_esc |
s(r'\"') @ const('"') |
s(r'\/') @ const("/") |
s(r'\b') @ const("\b") |
s(r'\n') @ const("\n") |
s(r'\r') @ const("\r") |
s(r'\t') @ const("\t") |
s(r'\\') @ const("\\"))
_json_char = name('_json_char', _json_esc | complement(r'\"'))
json_string = name('json_string',
discard('"')
& many(_json_char)
& discard('"')) \
@ (lambda x: x[0])
_fraction = discard(".") & tag('fractional', many1(digit) @ handle_int)
_exponent = tag('exp',
(discard(s("e") | "E")
& optional(tag('sign', s("-") | "+"))
& tag('dig', many1(digit) @ handle_int)) @ _handle_exp)
json_number = (optional(tag('minus', "-"))
& tag('base', (s("0") | digit_19 & many(digit)) @ handle_int)
& optional(_fraction)
& optional(_exponent)) @ _handle_number
_json_keyword = s("true") @ (lambda _: [True]) \
| s("false") @ (lambda _: [False]) \
| s("null") @ (lambda _: [None])
_json_kv = tag('kv',
ws & tag('key', json_string) &
ws & discard(":")
& tag('value', lambda: json_value))
json_object = tag(
'object', discard("{") & (_json_kv & many(discard(",") & _json_kv) | ws) & discard("}"))
json_array = tag('array', discard("[")
& (lambda: (json_value & many(discard(",") & json_value)) | ws)
& discard("]"))
json_value: ParseDirective \
= (ws & (json_string |
json_number |
_json_keyword |
json_object |
json_array)
& ws)
def __find(key: str, objs: list[MatchObject]) -> Optional[MatchObject]:
"""Locate the first matching object of type key."""
for item in objs:
match item:
case MatchCompound(type=s) if s == key:
return item
return None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment