KarrLab/wc_utils

View on GitHub
wc_utils/util/string.py

Summary

Maintainability
D
2 days
Test Coverage
B
89%
""" String utilities.

:Author: Arthur Goldberg <Arthur.Goldberg@mssm.edu>
:Author: Jonathan Karr <jonrkarr@gmail.com>
:Date: 2017-03-20
:Copyright: 2017-2018, Karr Lab
:License: MIT
"""

import collections.abc
import re


def indent_forest(forest, indentation=2, keep_trailing_blank_lines=False, return_list=False):
    """ Generate a string of lines, each indented by its depth in `forest`

    Convert a forest of objects provided in an iterator of nested iterators into a flat list of
    strings, each indented by depth*indentation spaces where depth is the objects' depth in `forest`.

    Strings are not treated as iterators. Properly handles strings containing newlines. Trailing
    blank lines are removed from strings containing newlines.

    Args:
        forest (:obj:`iterators` of `iterators`): a forest as an iterator of nested iterators
        indentation (:obj:`int`, optional): number of spaces to indent at each level
        keep_trailing_blank_lines (:obj:`Boolean`, optional): if set, keep trailing blank lines in
            strings in `forest`
        return_list (:obj:`Boolean`, optional): if set, return a list of lines, each indented by
            its depth in `forest`

    Returns:
        :obj:`str`: a string of lines, each indented by its depth in `forest`
    """
    if return_list:
        return __indent_forest(forest, indentation, depth=0,
                               keep_trailing_blank_lines=keep_trailing_blank_lines)
    else:
        return '\n'.join(__indent_forest(forest, indentation, depth=0,
                                         keep_trailing_blank_lines=keep_trailing_blank_lines))


def __indent_forest(forest, indentation, depth, keep_trailing_blank_lines):
    """ Private, recursive method to generate a list of lines indented by their depth in a forest

    Args:
        forest (:obj:`list` of :obj:`list`): a forest as an iterator of nested iterators
        indentation (:obj:`int`): number of spaces to indent at each level
        depth (:obj:`int`): recursion depth, used by recursion
        keep_trailing_blank_lines (:obj:`Boolean`): if set, keep trailing blank lines in strings in
            `forest`

    Returns:
        :obj:`list` of :obj:`str`: list of strings, appropriately indented
    """
    indent = ' ' * depth * indentation
    output = []
    if _iterable_not_string(forest):
        for entry in forest:
            if _iterable_not_string(entry):
                output += __indent_forest(entry, indentation, depth + 1, keep_trailing_blank_lines)
            else:
                e_str = str(entry)
                if '\n' in e_str:
                    lines = e_str.split('\n')
                    if not keep_trailing_blank_lines:
                        delete_trailing_blanks(lines)
                    for line in lines:
                        output.append(indent + line)
                else:
                    output.append(indent + e_str)
    else:
        output.append(indent + str(forest))
    return output


def _iterable_not_string(o):
    # todo: try to simplify & generalize this by using isinstance(o, basestring)
    return isinstance(o, collections.abc.Iterable) and not isinstance(o, str)


def delete_trailing_blanks(l_of_strings):
    """ Remove all blank lines from the end of a list of strings

    A line is blank if it is empty after applying `String.rstrip()`.

    Args:
        l_of_strings (:obj:`list` of :obj:`str`): a list of strings
    """
    last = None
    for i, e in reversed(list(enumerate(l_of_strings))):
        e = e.rstrip()
        if e:
            break
        last = i
    if last is not None:
        del l_of_strings[last:]


def find_nth(s, sub, n, start=0, end=float('inf')):
    """ Get the index of the nth occurrence of a substring within a string

    Args:
        s (:obj:`str`): string to search
        sub (:obj:`str`): substring to search for
        n (:obj:`int`): number of occurence to find the position of
        start (:obj:`int`, optional): starting position to search from
        end (:obj:`int`, optional): end position to search within

    Returns:
        :obj:`int`: index of nth occurence of the substring within the string
            or -1 if there are less than n occurrences of the substring within
            the string

    Raises:
        :obj:`ValueError`: if `sub` is empty or `n` is less than 1
    """
    if not sub:
        raise ValueError('sep cannot be empty')
    if n < 1:
        raise ValueError('n must be at least 1')

    L = len(s)
    l = len(sub)
    count = 0
    i = start
    while i < min(end, L) - l + 1:
        if s[i:i+l] == sub:
            count += 1
            if count == n:
                return i
            i += l
        else:
            i += 1

    return -1


def rfind_nth(s, sub, n, start=0, end=float('inf')):
    """ Get the index of the nth-last occurrence of a substring within a string

    Args:
        s (:obj:`str`): string to search
        sub (:obj:`str`): substring to search for
        n (:obj:`int`): number of occurence to find the position of
        start (:obj:`int`, optional): starting position to search from
        end (:obj:`int`, optional): end position to search within

    Returns:
        :obj:`int`: index of nth-last occurence of the substring within the string
            or -1 if there are less than n occurrences of the substring within
            the string

    Raises:
        :obj:`ValueError`: if `sub` is empty or `n` is less than 1
    """
    if not sub:
        raise ValueError('sep cannot be empty')
    if n < 1:
        raise ValueError('n must be at least 1')

    L = len(s)
    l = len(sub)
    count = 0
    i = min(L, end) - l
    while i >= start:
        if s[i:i+l] == sub:
            count += 1
            if count == n:
                return i
            i -= l
        else:
            i -= 1

    return -1


def partition_nth(s, sep, n):
    """ Partition a string on the nth occurrence of a substring

    Args:
        s (:obj:`str`): string to partition
        sep (:obj:`str`): separator to partition on
        n (:obj:`int`): number of occurence to partition on

    Returns:
        :obj:`tuple`:

            * :obj:`str`: substring before the nth separator
            * :obj:`str`: separator
            * :obj:`str`: substring after the nth separator

    Raises:
        :obj:`ValueError`: if `sep` is empty or `n` is less than 1
    """
    if not sep:
        raise ValueError('sep cannot be empty')
    if n < 1:
        raise ValueError('n must be at least 1')

    i = find_nth(s, sep, n)
    if i == -1:
        return (s, '', '')
    else:
        if i == 0:
            before = ''
        else:
            before = s[0:i]

        if i == len(s) - len(sep):
            after = ''
        else:
            after = s[i+len(sep):]

        return (before, sep, after)


def rpartition_nth(s, sep, n):
    """ Partition a string on the nth-last occurrence of a substring

    Args:
        s (:obj:`str`): string to partition
        sep (:obj:`str`): separator to partition on
        n (:obj:`int`): number of occurence to partition on

    Returns:
        :obj:`tuple`:

            * :obj:`str`: substring before the nth-last separator
            * :obj:`str`: separator
            * :obj:`str`: substring after the nth-last separator

    Raises:
        :obj:`ValueError`: if `sep` is empty or `n` is less than 1
    """
    if not sep:
        raise ValueError('sep cannot be empty')
    if n < 1:
        raise ValueError('n must be at least 1')

    i = rfind_nth(s, sep, n)
    if i == -1:
        return ('', '', s)
    else:
        if i == 0:
            before = ''
        else:
            before = s[0:i]

        if i == len(s) - len(sep):
            after = ''
        else:
            after = s[i+len(sep):]

        return (before, sep, after)


def camel_case_to_snake_case(camel_case):
    """ Convert string from camel (e.g. SnakeCase) to snake case (e.g. snake_case)

    Args:
        camel_case (:obj:`str`): string in camel case

    Returns:
        :obj:`str`: string in snake case
    """
    _underscorer1 = re.compile(r'(.)([A-Z][a-z]+)')
    _underscorer2 = re.compile('([a-z0-9])([A-Z])')

    subbed = _underscorer1.sub(r'\1_\2', camel_case)
    return _underscorer2.sub(r'\1_\2', subbed).lower()