slim/utils/repath.py

Summary

Maintainability
C
1 day
Test Coverage
# Original code: https://github.com/nickcoutsos/python-repath

import re
from urllib import parse as urllib

REGEXP_TYPE = type(re.compile(''))


# Match escaped characters that would otherwise appear in future matches.
# This allows the user to escape special characters that won't transform.
#
# Match Express-style parameters and un-named parameters with a prefix
# and optional suffixes. Matches appear as:
#
#  Path         | prefix | name   | capture | group | suffix | asterisk
# --------------+--------+--------+---------+-------+--------+----------
#  /:test(\d+)? | "/"    | "test" | "\d+"   | None  | "?"    | None
#  /route(\d+)  | None   | None   | None    | "\d+" | None   | None
#  /*           | "/"    | None   | None    | None  | None   | "*"

PATH_REGEXP = re.compile(r'''
    (?P<escaped>\\.)
    |
    (?P<prefix>[/.])?
    (?:
        (?:
            \:
            (?P<name>\w+)
            (?:
                \(
                (?P<capture>
                    (?:
                        \\.
                        |
                        [^()]
                    )+
                )
                \)
            )?
            |
            \(
            (?P<group>
                (?:
                    \\.
                    |
                    [^()]
                )
            +)
            \)
        )
        (?P<suffix>[+*?])?
        |
        (?P<asterisk>\*)
    )
''', re.X)

PATTERNS = dict(
    REPEAT='(?:{prefix}{capture})*',
    OPTIONAL='(?:{prefix}({name}{capture}))?',
    REQUIRED='{prefix}({name}{capture})'
)


def escape_string(string):
    """
    Escape URL-acceptable regex special-characters.
    """
    return re.sub('([.+*?=^!:${}()[\\]|])', r'\\\1', string)


def escape_group(group):
    return re.sub('([=!:$()])', r'\\\1', group)


def parse(string):
    """
    Parse a string for the raw tokens.
    :param path: express-style path string
    :return: list of path tokens used by :func:`tokens_to_pattern`
    """
    tokens = []
    key = 0
    index = 0
    path = ''

    for match in PATH_REGEXP.finditer(string):
        parts = match.groupdict()
        offset = match.start(0)
        path += string[index:offset]
        index = offset + len(match.group(0))

        if parts['escaped']:
            path += parts['escaped'][1]
            continue

        if path:
            tokens.append(path)
            path = ''

        delimiter = parts['prefix'] or '/'
        token_pattern = (
            parts['capture'] or
            parts['group'] or
            ('.*' if parts['asterisk'] else '[^%s]+?' % delimiter)
        )

        if not parts['name']:
            parts['name'] = key
            key += 1

        token = {
            'name': str(parts['name']),
            'prefix': parts['prefix'] or '',
            'delimiter': delimiter,
            'optional': parts['suffix'] in ('?', '*'),
            'repeat': parts['suffix'] in ('+', '*'),
            'pattern': escape_group(token_pattern),
        }

        tokens.append(token)

    if index < len(string):
        path += string[index:]

    if path:
        tokens.append(path)

    return tokens


def tokens_to_template(tokens):
    """
    Generate a function for templating tokens into a path string.
    """
    def template_function(obj):
        path = ''
        obj = obj or {}

        for token in tokens:
            if isinstance(token, str):
                path += token
                continue

            regexp = re.compile('^%s$' % token['pattern'])

            value = obj.get(token['name'])
            if value is None:
                if token["optional"]:
                    continue
                else:
                    raise KeyError(
                        'Expected "{name}" to be defined'.format(**token)
                    )

            if isinstance(value, list):
                if not token['repeat']:
                    raise TypeError(
                        'Expected "{name}" to not repeat'.format(**token)
                    )

                if len(value) == 0:
                    if token['optional']:
                        continue
                    else:
                        raise ValueError(
                            'Expected "{name}" to not be empty'.format(**token)
                        )

                for i, val in enumerate(value):
                    val = str(val)
                    if not regexp.search(val):
                        raise ValueError(
                            'Expected all "{name}" to match "{pattern}"'.format(**token)
                        )

                    path += token['prefix'] if i == 0 else token['delimiter']
                    path += urllib.quote(val, '')

                continue

            value = str(value)
            if not regexp.search(value):
                raise ValueError(
                    'Expected "{name}" to match "{pattern}"'.format(**token)
                )

            path += token['prefix'] + urllib.quote(value.encode('utf8'), '-_.!~*\'()')

        return path
    return template_function


def tokens_to_pattern(tokens, end=True, strict=False):
    """
    Generate a pattern for the given list of tokens.
    """
    route = ''
    last = tokens[-1]
    trailing_slash = isinstance(last, str) and last.endswith('/')

    for token in tokens:
        if isinstance(token, str):
            route += escape_string(token)
            continue

        parts = {
            'prefix': escape_string(token['prefix']),
            'capture': token['pattern'],
            'name': ''
        }

        if token['name'] and re.search('[a-zA-Z]', token['name']):
            parts['name'] = '?P<%s>' % re.escape(token['name'])

        if token['repeat']:
            parts['capture'] += PATTERNS['REPEAT'].format(**parts)

        segment_necessity = 'OPTIONAL' if token['optional'] else 'REQUIRED'
        segment_template = PATTERNS[segment_necessity]
        route += segment_template.format(**parts)

    if not strict:
        route = route[:-1] if trailing_slash else route
        route += '(?:/(?=$))?'

    if end:
        route += '$'
    else:
        route += '' if strict and trailing_slash else '(?=/|$)'

    return '^%s' % route


def compile(path, flags=0, **options):
    """
    Create a comiled regular expresion from the given path.
    :param path: express-style path string
    :param flags: (optional) regex flags as defined in :mod:`re`
    :param options: (optional) dictionary of options accepted by :func:`pattern`
    :return: A :mod:`re` compiled regular expression object.
    """
    return re.compile(pattern(path, **options), flags)


def match(path, string, flags=0, **options):
    """
    Match a string against a path.
    :param path: express-style path string
    :param string: a string to match against the generated regular expression
    :param flags: (optional) regex flags as defined in :mod:`re`
    :param options: (optional) dictionary of options accepted by :func:`pattern`
    :return: A :class:`_sre.SRE_Match` or ``None``
    """
    return compile(path, flags=0, **options).match(string)


def pattern(path, **options):
    """
    Generate a pattern from any kind of path value.
    This function selects the appropriate function array/regex/string paths,
    and calls it with the provided values.
    :param path: express-style path string, or list of paths, or compiled regex
    :param end: Make *path* match to the end of strings (default ``True``)
    :param strict: Enforce trailing slash in matched strins (default ``False``)
    :return: A regular expression pattern string
    """
    if isinstance(path, REGEXP_TYPE):
        return path.pattern
    if isinstance(path, list):
        parts = [pattern(p, **options) for p in path]
        return '(?:%s)' % '|'.join(parts)

    return tokens_to_pattern(parse(path), **options)


def template(path):
    """
    Compile a string to a template function for the path.
    :param path: express-style path string
    :return: A template funcion for generating paths from given field values
    """
    return tokens_to_template(parse(path))