benvial/gyptis

View on GitHub
docs/postprocess.py

Summary

Maintainability
A
3 hrs
Test Coverage
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Benjamin Vial
# This file is part of gyptis
# Version: 1.0.2
# License: MIT
# See the documentation at gyptis.gitlab.io
"""
%prog MODE FILES...

Post-processes HTML and Latex files output by Sphinx.
MODE is either 'html' or 'tex'.

"""
from __future__ import absolute_import, division, print_function

import io
import optparse
import re

from bs4 import BeautifulSoup

import gyptis as package


def main():
    p = optparse.OptionParser(__doc__)
    options, args = p.parse_args()

    if len(args) < 1:
        p.error("no mode given")

    mode = args.pop(0)

    if mode not in ("html", "tex"):
        p.error(f"unknown mode {mode}")

    for fn in args:
        f = io.open(fn, "r", encoding="utf-8")
        try:
            if mode == "html":
                lines = process_html(fn, f.readlines())
            elif mode == "tex":
                lines = process_tex(f.readlines())
        finally:
            f.close()

        f = io.open(fn, "w", encoding="utf-8")
        f.write("".join(lines))
        f.close()

        try:
            if mode == "html":
                postpro_download_links(fn)
        finally:
            pass


def postpro_download_links(fn):
    sgfoot = None
    with open(fn, "r") as file:
        soup = BeautifulSoup(file, "html.parser")
    for item in soup.findAll(["div"]):
        if item.get("class") is not None and "sphx-glr-footer" in item.get("class"):
            sgfoot = item

    if sgfoot is not None:
        for item in soup.findAll(["div"]):
            if item.get("class") is not None and "d-none" in item.get("class"):
                item.insert(0, sgfoot)
    with open(fn, "w") as file:
        file.write(str(soup))


def process_html(fn, lines):
    new_lines = []
    for line in lines:
        # Remove escaped arguments from the html files.
        line = line.replace(r"\*args", "*args")
        line = line.replace(r"\*\*kwargs", "**kwargs")
        line = line.replace("col-md-3", "col-md-2")

        line = line.replace(
            "<title> &#8212; __WEBPAGE_TITLE_PLACEHOLDER__</title>",
            f"<title>Gyptis: {package.__description__}</title>",
        )

        line = line.replace(
            "__WEBPAGE_TITLE_PLACEHOLDER__",
            f"Gyptis",
        )

        line = line.replace(". URL: ", ".")
        if line.startswith("<dd><p>") and line.endswith("</a>.</p>\n"):
            line = line.replace("</a>.</p>", "</a></p>")

        line = line.replace(
            "https://mybinder.org/v2/gh/gyptis/gyptis.gitlab.io",
            "https://mybinder.org/v2/gl/gyptis%2Fgyptis.gitlab.io",
        )

        line = line.replace(
            "https://mybinder.org/v2/gl/gyptis%2Fgyptis.gitlab.io/doc?filepath=notebooks/examples/",
            "https://mybinder.org/v2/gl/gyptis%2Fgyptis.gitlab.io/doc?filepath=notebooks/",
        )

        first_tag = "binder_badge_logo"
        second_tag = ".svg"
        reg = f"(?<={first_tag}).*?(?={second_tag})"
        line = re.sub(reg, "", line, flags=re.DOTALL)

        line = line.replace(
            'binder" src="../../_images/binder_badge_logo.svg" width="150px"',
            'binder" src="../../_images/binder_badge_logo.svg" width="200px"',
        )

        line = line.replace(
            '<span class="pre">notebooks:</span>',
            '<span class="pre">notebooks</span>',
        )

        line = line.replace(
            '<span class="pre">examples_jupyter.zip</span>',
            "",
        )

        new_binder_badge = "https://img.shields.io/badge/run-online-45957d.svg?style=for-the-badge&logo="
        line = line.replace(
            "../../_images/binder_badge_logo.svg",
            new_binder_badge,
        )

        icon_python = '<i class="icondld icon-python"></i>'
        icon_jupyter = '<i class="icondld icon-jupyter"></i>'

        first_tag = '<span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code'
        second_tag = "</span></code></a></p>"
        reg = f"(?<={first_tag}).*?(?={second_tag})"

        line = re.sub(reg, "", line, flags=re.DOTALL)
        if icon_python not in line:
            line = line.replace(
                first_tag,
                icon_python
                + '<span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">script',
            )

        first_tag = '<span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook'
        reg = f"(?<={first_tag}).*?(?={second_tag})"
        line = re.sub(reg, "", line, flags=re.DOTALL)
        # if icon_jupyter in line:
        if icon_jupyter not in line:
            line = line.replace(first_tag, icon_jupyter + first_tag)

        first_tag = '<span class="pre">Download</span> <span class="pre">all</span> <span class="pre">examples</span> <span class="pre">in</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code'
        second_tag = "</span></code></a></p>"
        reg = f"(?<={first_tag}).*?(?={second_tag})"
        line = re.sub(reg, "", line, flags=re.DOTALL)
        if icon_python not in line:
            line = line.replace(first_tag, icon_python + first_tag)

        first_tag = '<span class="pre">Download</span> <span class="pre">all</span> <span class="pre">examples</span> <span class="pre">in</span> <span class="pre">Jupyter</span> <span class="pre">notebooks'
        reg = f"(?<={first_tag}).*?(?={second_tag})"
        line = re.sub(reg, "", line, flags=re.DOTALL)
        if icon_jupyter not in line:
            line = line.replace(first_tag, icon_jupyter + first_tag)

        new_lines.append(line)
    return new_lines


def process_tex(lines):
    """
    Remove unnecessary section titles from the LaTeX file.

    """
    return [
        line
        for line in lines
        if not line.startswith(r"\section{gyptis.")
        and not line.startswith(r"\subsection{gyptis.")
        and not line.startswith(r"\subsubsection{gyptis.")
        and not line.startswith(r"\paragraph{gyptis.")
        and not line.startswith(r"\subparagraph{gyptis.")
    ]


if __name__ == "__main__":
    main()