tZ3ma/strutils

View on GitHub
src/strutils/core.py

Summary

Maintainability
A
1 hr
Test Coverage
# src/strutils/core.py
"""
(compound) STRing UTILitieS - A collection of (compound string utilities).

Used to grow on the fly. Aims to provide abstract, general purpose
functionalities.


.. autosummary::
   :nosignatures:

   sos_on
   permute_splits
   variate_compounds
"""
import collections
import itertools as it

import ittools

patterns = [
    "{}{: .0}{: .0}",  # first
    "{: .0}{: .0}{}",  # second
    "{}{}{}",  # first.second
    "{: .1}{}{}",  # f.second
    "{}{}{: .1}",  # first.s
]


def sos_on(compound_string, using=str.capitalize, split_at="_", stitch_with="_"):
    """Split Operate Stitch.

    Split the :paramref:`~sos_on.compound_string` at
    :paramref:`~sos_on.split_at` and operate on it with
    :paramref:`~sos_on.using` on each component to stitch it back
    together into one string using `:paramref:~sos_on.stitch_with`.

    Parameters
    ----------
    compound_string: str
        Compound strings of which the components are to be split, operated and
        stitched.

    using: :class:`~collections.abc.Callable`
        Callable mapped to each component of the string.

    split_at: str, default='_'
        Each component of the inbound compound string  will be identified by
        this.

    stitch_with: str, default='_'
        The outbound compound string will be stitched together using this.

    Yields
    ------
    :class:`~collections.abc.Generator`
        Generator object yielding the splitted, operated and stitched compound
        strings.

    Examples
    --------
    >>> import pprint
    >>> capitilzed = sos_on('variable_cost', using=str.capitalize)
    >>> print(type(capitilzed))
    <class 'generator'>
    >>> pprint.pprint(list(capitilzed))
    ['variable_cost', 'variable_Cost', 'Variable_cost', 'Variable_Cost']
    """
    splitted = compound_string.split(split_at)

    operated = map(using, splitted)
    recombined = it.product(*zip(splitted, operated))

    for combination in recombined:
        stitched = stitch_with.join(combination)
        yield stitched


def permute_splits(strings, stitch_with="_", split_at="_"):
    """Split compound strings and permute its components.

    Split each compund string in
    :paramref:`~permute_splits.strings` at :paramref:`~permute_splits.split_at`
    into its components. Create all possible permutations and stitch them
    back together into compound strings using
    `:paramref:~permute_splits.stitch_with`.

    Parameters
    ----------
    strings: :class:`~collections.abc.Iterable`, str
        String or iterable of compound strings of which the components are to
        be permutated. Must be of depth <= 1. i.e.:
        ``"My_String" or ["My-String1", "My-String2"]``

    split_at: str, default='_'
        Each component of the inbound compound string  will be identified by
        this.

    stitch_with: str, default='_'
        The outbound compound string will be stitched together using this.

    Yields
    ------
    :class:`~collections.abc.Generator`
        Generator object yielding the permmuted compound strings.

    Examples
    --------
    >>> import pprint
    >>> capitilzed = sos_on('variable_cost', using=str.capitalize)
    >>> permuted = permute_splits(strings=capitilzed)
    >>> print(type(permuted))
    <class 'generator'>
    >>> pprint.pprint(list(permuted))
    ['variable_cost',
     'cost_variable',
     'variable_Cost',
     'Cost_variable',
     'Variable_cost',
     'cost_Variable',
     'Variable_Cost',
     'Cost_Variable']
    """
    # 'enforce' container of depth one (DOES NOT FLATTEN)
    if not isinstance(strings, collections.abc.Generator):
        strings = ittools.nestify(strings, target_depth=1)

    for string in strings:
        splitted = string.split(split_at)
        if len(splitted) > 1:
            permutated = it.permutations(splitted)  # , len(splitted))
            for permute in permutated:
                yield stitch_with.join(permute)

        else:
            # non compound strings
            yield splitted[0]


def variate_compounds(
    compound_string,
    using=str.capitalize,
    permutate="True",
    split_at="_",
    stitch_with="_",
):
    """Split, variate and stitch compound strings.

    Convenience wrapper for :func:`sos_on` and :func:`permute_splits`.

    Designed to quickly generate variations of a compound string.

    Parameters
    ----------
    compound_string: str
        Compound string of which the components are to be split, operated and
        stitched.

    using: :class:`~collections.abc.Callable`
        Callable mapped to each compound of the string. Used for creating
        variations. See :func:`sos_on` for details on the mutations.

    permutate: bool, default=True
        If true, all possible permations of the variated compound strings
        will be generated. See :func:`permute_splits` for details on
        the permutation process.

    split_at: str, default='_'
        Each component of the inbound compound string  will be identified by
        this.

    stitch_with: str, default='_'
        The outbound compound string will be stitched together using this.

    Returns
    -------
    variated: :class:`~collections.abc.Generator`
        Generator object yielding the variated compound strings.

    Examples
    --------
    Note how getting a list of generator can be a little inconvenient:

    >>> import pprint
    >>> variated = variate_compounds('variable_cost')
    >>> pprint.pprint(list(variated))
    ['variable_cost',
     'cost_variable',
     'variable_Cost',
     'Cost_variable',
     'Variable_cost',
     'cost_Variable',
     'Variable_Cost',
     'Cost_Variable']

    Use nested for loops and :func:`itertools.chain` to create complex mappings
    of variated compound strings:

    >>> import pprint
    >>> import collections
    >>> import itertools
    >>> import pandas
    >>> stitchers = ['_', ' ']
    >>> compounds = ['variable_cost', 'flow_costs']
    >>> variations = collections.defaultdict(list)
    >>> for string in compounds:
    ...     for stitcher in stitchers:
    ...         variations[string] = list(itertools.chain(
    ...             variations[string],
    ...             variate_compounds(string, stitch_with=stitcher)))
    >>> print(pandas.DataFrame(variations))
        variable_cost  flow_costs
    0   variable_cost  flow_costs
    1   cost_variable  costs_flow
    2   variable_Cost  flow_Costs
    3   Cost_variable  Costs_flow
    4   Variable_cost  Flow_costs
    5   cost_Variable  costs_Flow
    6   Variable_Cost  Flow_Costs
    7   Cost_Variable  Costs_Flow
    8   variable cost  flow costs
    9   cost variable  costs flow
    10  variable Cost  flow Costs
    11  Cost variable  Costs flow
    12  Variable cost  Flow costs
    13  cost Variable  costs Flow
    14  Variable Cost  Flow Costs
    15  Cost Variable  Costs Flow
    """
    if permutate:
        sorred = sos_on(
            compound_string, using=using, split_at=split_at, stitch_with=split_at
        )

        variated = permute_splits(
            strings=sorred, split_at=split_at, stitch_with=stitch_with
        )

        return variated

    # no permutation requested
    sorred = sos_on(
        compound_string, using=using, split_at=split_at, stitch_with=stitch_with
    )

    return sorred