noirbizarre/json4humans

View on GitHub
json4humans/json5.py

Summary

Maintainability
A
0 mins
Test Coverage
A
100%
"""
This module implements the [JSON module protocol][json4humans.protocol.JSONModule]
for [JSON5](https://json5.org/).

The JSON5 Data Interchange Format ([JSON5](https://json5.org/)) is a superset of [JSON](https://json.org/)
that aims to alleviate some of the limitations of JSON by expanding its syntax to include some productions
from [ECMAScript 5.1](https://www.ecma-international.org/ecma-262/5.1/).

See: [specifications](https://spec.json5.org)

"""
from __future__ import annotations

import re
from typing import Any, cast

from lark import Token
from lark.visitors import merge_transformers, v_args

from . import json, protocol, wsc
from .jsonc import JSONCEncoder
from .style import StylePreservingTransformer, with_style
from .types import (  # noqa: F401
    WSC,
    AnyNumber,
    Array,
    Float,
    HexInteger,
    Ident,
    Identifier,
    Integer,
    JSONType,
    Key,
    Literal,
    Member,
    Number,
    Object,
    Quote,
    String,
    Value,
)


class JSON5Transformer(StylePreservingTransformer):
    """
    A [Transformer][lark.visitors.Transformer] for JSON5
    """

    @v_args(inline=True)
    def string(self, string: String):
        return string

    @v_args(inline=True)
    def SINGLE_QUOTE_CHARS(self, token: Token) -> str:
        return token.value.replace("\\/", "/").encode().decode("unicode_escape", "surrogatepass")

    @v_args(inline=True)
    def DOUBLE_QUOTE_CHARS(self, token: Token) -> tuple[str, list[int]]:
        return token.value.replace("\\/", "/").encode().decode("unicode_escape", "surrogatepass"), [
            m.start() for m in re.finditer("\\\n", token.value)
        ]

    @v_args(inline=True)
    def double_quote_string(self, string: tuple[str, list[int]] | None = None) -> String:
        value, linebreaks = string or ("", [])
        return String(value, quote=Quote.DOUBLE, linebreaks=linebreaks)

    @v_args(inline=True)
    def single_quote_string(self, string: str | None = None) -> String:
        return String(string or "", quote=Quote.SINGLE)

    @v_args(inline=True)
    def identifier(self, string) -> Identifier:
        return Identifier(string)

    @v_args(inline=True)
    def number(self, number: Number):
        return number

    def SIGNED_HEXNUMBER(self, token: Token):
        return HexInteger(int(token.value, base=16), prefixed=token.value.startswith(("+", "-")))

    def SIGNED_NUMBER(self, token: Token):
        prefixed = token.value.startswith(("+", "-"))
        if "." in token.value or "e" in token.value:
            significand = len(token.value.split(".")[1]) if "." in token.value else None
            return Float(
                token.value,
                prefixed=prefixed,
                leading_point=token.value.startswith("."),
                significand=significand,
            )
        else:
            return Integer(token.value, prefixed=prefixed)

    def object_with_trailing(self, children: list) -> Any:
        o = Object(cast(Member, c) for c in children if isinstance(c, tuple))
        o.json_container_tail = children[-2]
        o.json_container_trailing_coma = isinstance(children[-3], Token)
        return o

    pair = tuple


transformer = merge_transformers(JSON5Transformer(), wsc=wsc.transformer, json=json.transformer)


ESCAPES = {
    "\\": r"\\",
    "\n": r"\n",
    "\r": r"\r",
    "\b": r"\b",
    "\f": r"\f",
    "\t": r"\t",
    "\v": r"\v",
    "\0": r"\0",
    "\u2028": r"\\u2028",
    "\u2029": r"\\u2029",
}


def escape_string(string: str, **escapes: str | int | None) -> str:
    out = string.translate(str.maketrans({**escapes, **ESCAPES}))
    if isinstance(string, String):
        for linebreak in string.linebreaks:
            out = out[: linebreak - 1] + "\\\n" + out[linebreak - 1 :]
    return out


class JSON5Encoder(JSONCEncoder):
    def encode(self, obj: Any) -> str:
        match obj:
            case Number():
                return self.encode_number(obj)
        return super().encode(obj)

    @with_style
    def encode_number(self, obj: AnyNumber) -> str:
        return f"+{obj}" if obj > 0 and obj.prefixed else str(obj)

    @with_style
    def encode_string(self, obj: str) -> str:
        match obj:
            case String():
                return f"{obj.quote.value}{escape_string(obj)}{obj.quote.value}"
            case Identifier():
                return str(obj)
        return f'"{escape_string(obj)}"'


protocol.implement("json5", transformer, JSON5Encoder)