18F/federalist-garden-build

View on GitHub
src/repo_config/repo_config.py

Summary

Maintainability
A
0 mins
Test Coverage
A
97%
import fnmatch


class RepoConfig:
    '''
    Encapsulate the logic for handling the `federalist.json` configuration

    The file should look something like:
    {
        "fullClone": true,
        "headers": [
            "/*": {
                "cache-control": "no-cache"
            }
        ],
        "excludePaths": [
            "**/Dockerfile",
            "/another_excluded_file.yml"
        ],
        "includePaths": [
            "/included_file",
            "/.well-known/security.txt
        ],
        "cache": true
    }

    Currently, only the following keys are utilized:
        - headers
        - excludePaths
        - includePaths
        - fullClone
        - cache
    '''

    def __init__(self, config={}, defaults={}):
        self.config = config
        self.defaults = defaults

    def get_headers_for_path(self, path_to_match):
        '''
        Determine the headers that apply to particular filepath
        '''

        # A shallow copy should be sufficient
        resolved_headers = self.defaults.get('headers', {}).copy()

        first_matching_cfg = find_first_matching_cfg(
            self.config.get('headers', []),
            path_to_match)

        if first_matching_cfg:
            headers = first_value(first_matching_cfg)

            for key, value in headers.items():
                resolved_headers[key.strip().lower()] = value.strip()

        return resolved_headers

    def is_path_excluded(self, path_to_match):
        return ((contains_dotpath(path_to_match) or self.is_exclude_path_match(path_to_match))
                and not self.is_include_path_match(path_to_match))

    def is_path_included(self, path_to_match):
        return not self.is_path_excluded(path_to_match)

    def is_exclude_path_match(self, path_to_match):
        return is_path_match(self.exclude_paths(), path_to_match)

    def is_include_path_match(self, path_to_match):
        return is_path_match(self.include_paths(), path_to_match)

    def full_clone(self):
        return self.config.get('fullClone', False) is True

    def exclude_paths(self):
        return self.config.get('excludePaths', []) + self.defaults.get('excludePaths', [])

    def include_paths(self):
        return self.config.get('includePaths', []) + self.defaults.get('includePaths', [])

    def should_cache(self):
        return self.config.get('cache', True) is True


def contains_dotpath(filename):
    return any(segment for segment in filename.split('/') if segment.startswith('.'))


def is_path_match(patterns, path_to_match):
    for pattern in patterns:
        if fnmatch.fnmatch(prepend_slash(path_to_match), pattern):
            return True

    return False


def find_first_matching_cfg(configuration_section, path_to_match):
    '''
    Find and return the FIRST configuration rule where the `path_to_match` matches
    the configured pattern.

    Order is important, so the configuration must be specified and handled as a
    list.

    If no path matches, an empty dict is returned.
    '''

    return next(
        (configuration_rule
            for configuration_rule
            in configuration_section
            if match_path(first_key(configuration_rule), path_to_match)),
        {})


def match_path(pattern, path_to_match):
    '''
    Determine if the `path_to_match` matches the path `pattern`

    >>> match_path('/*', '/index.html')
    True

    >>> match_path('/index.html', '/foo.js')
    False

    Patterns can contain the '*' and ':foo' wildcards.

    The '*' wildcard will match anything including '/'
    Ex.

    >>> match_path('/*', '/foo/bar/baz/index.html')
    True

    When combined with an extension, ie '*.html', the wildcard will match
    everything up to the LAST extension in the path to match, which must
    be matched exactly.
    Ex.

    >>> match_path('/*.html', '/foo/bar/baz/index.foo.html')
    True

    >>> match_path('/*.foo', '/foo/bar/baz/index.foo.html')
    False

    The ':foo' wildcard will match anything EXCEPT '/',
    ie it is a single segment wildcard. It can contain any letters after ':'
    Ex.

    >>> match_path('/:foo/bar', '/abc/bar')
    True

    >>> match_path('/:baz', '/abc/foo')
    False
    '''

    # normalize the paths by removing leading slash since that will
    # result in a leading empty string with 'split'ing
    pattern = strip_prefix('/', pattern)
    path_to_match = strip_prefix('/', path_to_match)

    pattern_parts = pattern.split('/')
    path_parts = path_to_match.split('/')

    for idx, pattern_part in enumerate(pattern_parts):
        if pattern_part == '*':
            return True

        if pattern_part.startswith(':'):
            continue

        if len(path_parts) <= idx:
            return False

        if pattern_part.startswith('*.'):
            pattern_part_ext = pattern_part.split('.')[-1]
            last_path_part = path_parts[-1]
            last_path_ext = last_path_part.split('.')[-1]
            return last_path_ext == pattern_part_ext

        path_part = path_parts[idx]

        if path_part != pattern_part:
            return False

    if len(path_parts) > len(pattern_parts):
        return False

    return True


def first_key(dikt):
    return next(key for key in dikt)


def first_value(dikt):
    return next(value for value in dikt.values())


def strip_prefix(prefix, path):
    # Copied from models.py::remove_prefix
    return path[len(prefix):] if path.startswith(prefix) else path


def prepend_slash(path):
    return path if path.startswith('/') else ('/' + path)