tsdat/tstring.py

Summary

Maintainability
A
2 hrs
Test Coverage
from __future__ import annotations

import re
from typing import Callable, Mapping, Match

__all__ = ("Template",)

_SQUARE_BRACKET_REGEX = r"\[(.*?)\]"
_CURLY_BRACKET_REGEX = r"\{(.*?)\}"


def _substitute(
    template: str,
    mapping: Mapping[str, str | Callable[[], str] | None] | None = None,
    allow_missing: bool = False,
    **kwds: str | Callable[[], str] | None,
) -> str:
    """Substitutes variables in a template string.

    The template string is expected to be formatted in the same way as python f-strings,
    with variables that should be substituted wrapped in curly braces `{}`.
    Additionally, square brackets may be used around curly brackets and other text to
    mark that substitution as optional -- i.e. if the variable cannot be found then the
    text wrapped in the square brackets will be removed.

    Examples:

        `mapping = dict(a="x", b="y", c="z")`

        `substitute("{a}.{b}{c}w", mapping) == "x.yzw"  # True`

        `substitute("{a}.{b}[.{c}]", mapping) == "x.y.z"  # True`

        `substitute("{a}.{b}[.{d}]", mapping) == "x.y"  # True`

        `substitute("{a}.{b}.{d}", mapping, True) == "x.y.{d}"  # True`

        `substitute("{a}.{b}.{d}", mapping, False)  # raises ValueError

    Args:
        template (str): The template string. Variables to substitute should be wrapped
            by curly braces `{}`.
        mapping (Mapping[str, str | Callable[[], str] | None] | None): A key-value pair
            of variable name to the value to replace it with. If the value is a
            string it is dropped-in directly. If it is a no-argument callable the
            return value of the callable is used. If it is None, then it is treated
            as missing and the action taken depends on the `allow_missing` parameter.
        allow_missing (bool, optional): Allow variables outside of square brackets to be
            missing, in which case they are left as-is, including the curly brackets.
            This is intended to allow users to perform some variable substitutions
            before all variables in the mapping are known. Defaults to False.
        **kwds (str | Callable[[], str] | None): Optional extras to be merged into the
            mapping dict. If a keyword passed here has the same name as a key in the
            mapping dict, the value here would be used instead.

    Raises:
        ValueError: If the substitutions cannot be made due to missing variables.

    Returns:
        str: The template string with the appropriate substitutions made.
    """
    if mapping is None:
        mapping = {}
    mapping = {**mapping, **kwds}

    def _sub_curly(match: Match[str]) -> str:
        # group(1) returns string without {}, group(0) returns with {}
        # result is we only do replacements that we can actually do.
        res = mapping.get(match.group(1))
        if callable(res):
            res = res()
        if allow_missing and res is None:
            res = match.group(0)
        elif res is None:
            raise ValueError(f"Substitution cannot be made for key '{match.group(1)}'")
        return res

    def _sub_square(match: Match[str]) -> str:
        # make curly substitutions inside of square brackets or remove the whole thing
        # if substitutions cannot be made.
        try:
            resolved = re.sub(_CURLY_BRACKET_REGEX, _sub_curly, match.group(1))
            return resolved if resolved != match.group(1) else ""
        except ValueError:
            return ""

    squared = re.sub(_SQUARE_BRACKET_REGEX, _sub_square, template)
    resolved = re.sub(_CURLY_BRACKET_REGEX, _sub_curly, squared)

    return resolved


def _generate_regex(template: str) -> str:
    """Generates a regex pattern which can be used to extract the values substituted
    into a template string.

    Args:
        template (str): The template string to generate a regex pattern for.

    Returns:
        str: The regex pattern with named groups according to the template.
    """
    regex_pattern = "^"

    i = 0
    while i < len(template):
        char = template[i]
        if char == "{":
            var_start = i + 1
            var_end = template.index("}", var_start)
            var_name = template[var_start:var_end]
            regex_pattern += f"(?P<{var_name}>[_a-zA-Z0-9]+)"
            i = var_end + 1
        elif char == "[":
            regex_pattern += "(?:"
            i += 1
        elif char == "]":
            regex_pattern += ")?"
            i += 1
        else:
            regex_pattern += re.escape(char)
            i += 1

    regex_pattern += "$"
    return regex_pattern


# def _generate_regex(template: str) -> str:
#     regex_pattern = "^"
#     i = 0
#     while i < len(template):
#         char = template[i]
#         if char == "{":
#             var_start = i + 1
#             var_end = template.index("}", var_start)
#             var_name = template[var_start:var_end]
#             regex_pattern += f"(?P<{var_name}>[^.*]+)?"
#             i = var_end + 1
#         elif char == "[":
#             regex_pattern += "(?:"
#             i += 1
#         elif char == "]":
#             regex_pattern += ")?"
#             i += 1
#         else:
#             regex_pattern += re.escape(char)
#             i += 1
#     regex_pattern += "$"
#     return regex_pattern


# def _generate_regex(template: str) -> str:
#     regex_pattern = "^"
#     is_optional = False
#     optional_part = ""

#     i = 0
#     while i < len(template):
#         char = template[i]
#         if char == "{":
#             var_start = i + 1
#             var_end = template.index("}", var_start)
#             var_name = template[var_start:var_end]

#             if is_optional:
#                 # regex_pattern += f"(?:{re.escape(optional_part)}(?P<{var_name}>[^.]*))?"
#                 regex_pattern += f"(?:{re.escape(optional_part)}(?P<{var_name}>.*?))?"
#                 optional_part = ""
#             else:
#                 regex_pattern += f"(?P<{var_name}>[^.]*)"

#             i = var_end + 1
#         elif char == "[":
#             is_optional = True
#             optional_part = ""
#             i += 1
#         elif char == "]":
#             is_optional = False
#             regex_pattern += re.escape(optional_part) + ")?"
#             optional_part = ""
#             i += 1
#         else:
#             if is_optional:
#                 optional_part += char
#             else:
#                 regex_pattern += re.escape(char)
#             i += 1

#     regex_pattern += "$"
#     return regex_pattern


# def _generate_regex(template: str) -> str:
#     regex_pattern = "^"
#     is_optional = False
#     optional_part = ""

#     i = 0
#     while i < len(template):
#         char = template[i]
#         if char == "{":
#             var_start = i + 1
#             var_end = template.index("}", var_start)
#             var_name = template[var_start:var_end]

#             if is_optional:
#                 regex_pattern += f"(?:{re.escape(optional_part)}(?P<{var_name}>.*?))?"
#                 optional_part = ""
#             else:
#                 regex_pattern += f"(?P<{var_name}>.*?)"

#             i = var_end + 1
#         elif char == "[":
#             is_optional = True
#             optional_part = ""
#             i += 1
#         elif char == "]":
#             is_optional = False
#             if optional_part:
#                 regex_pattern += f"(?:{re.escape(optional_part)})?"
#             optional_part = ""
#             i += 1
#         else:
#             if is_optional:
#                 optional_part += char
#             else:
#                 regex_pattern += re.escape(char)
#             i += 1

#     regex_pattern += "$"
#     return regex_pattern


class Template:
    """Python f-string implementation with lazy and optional variable substitutions.

    The template string is expected to be formatted in the same way as python f-strings,
    with variables that should be substituted wrapped in curly braces `{}`.
    Additionally, square brackets may be used around curly brackets and other text to
    mark that substitution as optional -- i.e. if the variable cannot be found then the
    text wrapped in the square brackets will be removed.


    Examples:

        `mapping = dict(a="x", b="y", c="z")`

        `TemplateString("{a}.{b}{c}w").substitute(mapping) # -> "x.yzw"`

        `TemplateString("{a}.{b}[.{c}]").substitute(mapping) # -> "x.y.z"`

        `TemplateString("{a}.{b}.{d}").substitute(mapping)  # raises ValueError`

        `TemplateString("{a}.{b}[.{d}]").substitute(mapping) # -> "x.y"`

        `TemplateString("{a}.{b}.{d}").substitute(mapping, True) # -> "x.y.{d}"`

    Args:
        template (str): The template string. Variables to substitute should be wrapped
            by curly braces `{}`.
        regex (str | None, optional): A regex pattern used to extract the substitutions
            used to create a formatted string with this template. Generated
            automatically if not provided.
    """

    def __init__(self, template: str, regex: str | None = None) -> None:
        """"""
        if not self._is_balanced(template):
            raise ValueError(f"Unbalanced brackets in template string: '{template}'")
        self.template = template
        self.regex = regex or _generate_regex(template)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.template!r})"

    def __str__(self) -> str:
        return self.template

    @classmethod
    def _is_balanced(cls, template: str):
        stack: list[str] = []
        for char in template:
            if char in "{[":
                stack.append("}" if char == "{" else "]")
            elif char in "}]":
                if not stack or char != stack.pop():
                    return False
        return len(stack) == 0

    def substitute(
        self,
        mapping: Mapping[str, str | Callable[[], str] | None] | None = None,
        allow_missing: bool = False,
        **kwds: str | Callable[[], str] | None,
    ) -> str:
        """Substitutes variables in a template string.

        Args:
            mapping (Mapping[str, str | Callable[[], str] | None] | None): A key-value pair
                of variable name to the value to replace it with. If the value is a
                string it is dropped-in directly. If it is a no-argument callable the
                return value of the callable is used. If it is None, then it is treated
                as missing and the action taken depends on the `allow_missing` parameter.
            allow_missing (bool, optional): Allow variables outside of square brackets to be
                missing, in which case they are left as-is, including the curly brackets.
                This is intended to allow users to perform some variable substitutions
                before all variables in the mapping are known. Defaults to False.
            **kwds (str | Callable[[], str] | None): Optional extras to be merged into the
                mapping dict. If a keyword passed here has the same name as a key in the
                mapping dict, the value here would be used instead.

        Raises:
            ValueError: If the substitutions cannot be made due to missing variables.

        Returns:
            str: The template string with the appropriate substitutions made.
        """
        return _substitute(self.template, mapping, allow_missing, **kwds)

    def extract_substitutions(self, formatted_str: str) -> dict[str, str] | None:
        """Extracts the substitutions used to create the provided formatted string.

        Note that this is not guaranteed to return accurate results if the template
        is constructed such that separators between variables are ambiguous.

        Args:
            formatted_str (str): The formatted string

        Returns:
            dict[str, str]: A dictionary mapping each matched template variable to its
                value in the formatted string. Returns None if there are no matches.
        """
        match = re.match(self.regex, formatted_str)
        if match:
            return match.groupdict()
        else:
            return None