Nikolay-Lysenko/readingbricks

View on GitHub
git_hooks/pre_commit_hook.py

Summary

Maintainability
A
35 mins
Test Coverage
#!/usr/bin/env python
# Be sure that a copy of this file is placed to `../.git/hooks` directory
# under the name `pre-commit` (without extension).


"""
Automate pre-commit validation.

As of now, this script runs the below tasks:
1) Check code style with 'lint' section of `tox.ini`;
2) Validate note headers and tags;
3) Validate internal links.

Not all tasks can be run from here, because it is desired to have no dependencies
other than built-in Python packages in Git hooks.

Author: Nikolay Lysenko
"""


import os
import re
import subprocess
import sys
from typing import Any


# NB: If note name contains parentheses, all after the first closing parenthesis is lost.
INTERNAL_URL_PATTERN = "\\[.+\\]\\(([^h][^\\)]*)\\)"
INTERNAL_URL_REGEXP = re.compile(INTERNAL_URL_PATTERN)


def convert_to_absolute_path(relative_path: str) -> str:
    """Convert relative path to absolute path."""
    script_directory = os.path.dirname(__file__)
    absolute_path = os.path.abspath(os.path.join(script_directory, relative_path))
    return absolute_path


def lint() -> None:
    """Analyze code statically."""
    rel_path_to_repo_root = '../../'  # It is assumed that script is located at `.git/hooks`.
    abs_path_to_repo_root = convert_to_absolute_path(rel_path_to_repo_root)
    result = subprocess.run('tox -e lint', cwd=abs_path_to_repo_root, shell=True)
    return_code = result.returncode
    if return_code:
        raise ValueError('Lint target failed.')


def validate_cell_header(header: str, previous_headers: list[str]) -> None:
    """Check that header of a cell meets project specifications."""
    if not header.startswith('## '):
        raise ValueError(f"Cell header must be h2 (i.e., it must start with ##), found: {header}")
    if header in previous_headers:
        raise ValueError(f"Each header must appear only once, '{header}' is duplicated")


def update_list_of_headers(headers: list[str], cell: dict[str, Any]) -> list[str]:
    """Add cell header to list of headers if it is valid."""
    content = [line.rstrip('\n') for line in cell['source']]
    header = content[0]
    validate_cell_header(header, headers)
    headers.append(header)
    return headers


def validate_tag(tag: str) -> None:
    """Check that a tag can be a name of SQLite table."""
    if not tag:
        raise ValueError("Empty tags are not allowed")
    if '-' in tag:
        raise ValueError("Symbol '-' is prohibited in a tag name")
    if tag[0].isdigit():
        raise ValueError("Tags must not start with digit")


def update_list_of_tags(tags: list[str], cell: dict[str, Any]) -> list[str]:
    """Update list of tags occurrences based on a current cell."""
    current_tags = cell['metadata']['tags']
    for tag in current_tags:
        validate_tag(tag)
    tags.extend(current_tags)
    return tags


def update_list_of_internal_urls(internal_urls: list[str], cell: dict[str, Any]) -> list[str]:
    """Update list of URLs pointing to other pages of the app."""
    for line in cell['source']:
        internal_urls.extend(INTERNAL_URL_REGEXP.findall(line))
    return internal_urls


def validate_internal_urls(internal_urls: list[str], headers: list[str], tags: list[str]) -> None:
    """Validate URLs pointing to other pages of the app."""
    # See comment about `INTERNAL_URL_PATTERN` - this is why `rstrip(')')` is applied.
    headers = [x.lstrip('# ').rstrip(')') for x in headers]
    for internal_url in internal_urls:
        split_url = internal_url.split('/')
        if split_url[0] == '__root_url__':
            conditions = [
                split_url[2] == 'notes' and split_url[3] in headers,
                split_url[2] == 'tags' and split_url[3] in tags
            ]
        elif split_url[0] == '__home_url__':
            conditions = [
                split_url[1] == 'notes' and split_url[2] in headers,
                split_url[1] == 'tags' and split_url[2] in tags
            ]
        else:
            raise ValueError(
                "URLs must start with 'http', '__root_url__', or '__home_url__', "
                f"but '{internal_url}' found"
            )
        if not any(conditions):
            raise ValueError(f"URL '{internal_url}' points to non-existent page")


def main():
    """Validate commit."""
    lint()

    relative_path_to_notes = '../../notes/'
    absolute_path_to_notes = convert_to_absolute_path(relative_path_to_notes)

    sys.path.append(convert_to_absolute_path('../../readingbricks/'))
    import utils

    for directory_name in os.listdir(absolute_path_to_notes):
        directory_path = os.path.join(absolute_path_to_notes, directory_name)
        if not os.path.isdir(directory_path):
            continue
        headers = []
        tags = []
        internal_urls = []
        for cell in utils.extract_cells(directory_path):
            headers = update_list_of_headers(headers, cell)
            tags = update_list_of_tags(tags, cell)
            internal_urls = update_list_of_internal_urls(internal_urls, cell)
        validate_internal_urls(internal_urls, headers, tags)


if __name__ == '__main__':
    main()