pweave/formatters/base.py from piccolbo/Pweave

pweave/formatters/base.py
Summary

Maintainability

2 days
Test Coverage

Issues
import textwrap
import os
import base64
import copy
from nbconvert import filters

# Pweave output formatters
class PwebFormatter(object):
    """Base class for all not-notebook formatters"""

    def __init__(
        self,
        executed,
        *,
        kernel="python3",
        language="python",
        mimetype=None,
        source=None,
        theme=None,
        figdir="figures",
        wd="."
    ):

        self.mimetypes = []  # other supported mimetypes than text/plain
        self.executed = executed
        self.figdir = figdir
        self.wd = wd
        self.source = source
        self.theme = theme
        self.language = language

        # To be set in child classess
        self.file_ext = None
        self.header = None
        self.footer = None

        self.wrapper = textwrap.TextWrapper(
            subsequent_indent="", break_long_words=False
        )

        self.mime_extensions = {
            "application/pdf": "pdf",
            "image/png": "png",
            "image/jpg": "jpg",
        }
        self.initformat()
        self._fillformatdict()

    def initformat(self):
        pass

    def format(self):
        self.formatted = []
        for chunk in self.executed:
            # Fill in options for code chunks
            if chunk["type"] == "code":
                for key in self.formatdict.keys():
                    if not key in chunk:
                        chunk[key] = self.formatdict[key]

            # Wrap text if option is set
            if chunk["type"] == "code":
                if chunk["wrap"] is True or chunk["wrap"] == "code":
                    chunk["content"] = self._wrap(chunk["content"])

            # Preformat chunk content before default formatters
            chunk = self.preformat_chunk(chunk)

            if chunk["type"] == "doc":
                self.formatted.append(self.format_docchunk(chunk))
            elif chunk["type"] == "code":
                self.formatted.append(self.format_codechunks(chunk))
            else:
                self.formatted.append(chunk["content"])

        self.formatted = "\n".join(self.formatted)
        self.convert()  # Convert to e.g. markdown
        self.add_header()
        self.add_footer()

    def convert(self):
        pass

    def preformat_chunk(self, chunk):
        """You can use this method in subclasses to preformat chunk content"""
        return chunk

    def figures_from_chunk(self, chunk):
        """Extract base64 encoded figures from chunk"""
        figs = []
        i = 1
        for out in chunk["result"]:
            if out["output_type"] != "display_data":
                continue
            # Loop trough mimetypes in order of preference
            for mimetype in self.fig_mimetypes:
                if mimetype in out["data"]:
                    fig_name, include_name = self.get_figname(chunk, i, mimetype)
                    figs.append(include_name)
                    bfig = base64.b64decode(out["data"][mimetype])
                    f = open(fig_name, "wb")
                    f.write(bfig)
                    f.close()
                    i += 1
                    break

        # print(figs)
        return figs

    def format_termchunk(self, chunk):
        if chunk["echo"] and chunk["results"] != "hidden":
            chunk["result"] = self._termindent(chunk["result"])
            result = "%(termstart)s%(result)s%(termend)s" % chunk
        else:
            result = ""
        return result

    def format_codeblock(self, chunk):
        pass

    def format_results(self, chunk):
        pass

    def render_jupyter_output(self, out, chunk):
        if out["output_type"] == "error":
            return self.render_traceback("".join(out["traceback"]), chunk)

        if out["output_type"] == "stream":
            return self.render_text(out["text"], chunk)

        for mimetype in self.mimetypes:
            if mimetype in out["data"]:
                if mimetype == "application/javascript":
                    return "\n<script>" + out["data"][mimetype] + "</script>"
                else:
                    return "\n" + out["data"][mimetype]
        # Return nothing if data is shown as figure
        for mimetype in self.fig_mimetypes:
            if mimetype in out["data"]:
                return ""

        if "text/plain" in out["data"]:
            return self.render_text(out["data"]["text/plain"], chunk)
        else:
            return ""

    def highlight_ansi_and_escape(self, text):
        return self.escape(filters.strip_ansi(text))

    def escape(self, text):
        return text

    def render_traceback(self, text, chunk):
        chunk = copy.deepcopy(chunk)
        text = self.highlight_ansi_and_escape(text)
        return self.format_text_result(text, chunk)

    def render_text(self, text, chunk):
        chunk = copy.deepcopy(chunk)
        text = self.highlight_ansi_and_escape(text)
        return self.format_text_result(text, chunk)

        # Set lexers for code and output

    def format_text_result(self, text, chunk):
        chunk["result"] = text
        result = ""
        if "%s" in chunk["outputstart"]:
            chunk["outputstart"] = chunk["outputstart"] % self.language
        if "%s" in chunk["termstart"]:
            chunk["termstart"] = chunk["termstart"] % self.language

        # Other things than term
        if chunk["results"] == "verbatim":
            if len(chunk["result"].strip()) > 0:
                if (
                    chunk["wrap"] is True
                    or chunk["wrap"] == "results"
                    or chunk["wrap"] == "output"
                ):
                    chunk["result"] = self._wrap(chunk["result"])
                chunk["result"] = "\n%s\n" % chunk["result"].rstrip()
                chunk["result"] = self._indent(chunk["result"])
                # chunk["result"] = self.fix_linefeeds(chunk['result'])
                result += "%(outputstart)s%(result)s%(outputend)s" % chunk
        elif chunk["results"] != "verbatim":
            result += self.fix_linefeeds(text)

        return result

    def fix_linefeeds(self, text):
        """Add empty line to start and end of string if it
        they don't exist"""

        if not text.startswith("\n"):
            text = "\n" + text
        if not text.endswith("\n"):
            text = text + "\n"

        return text

    def format_codechunks(self, chunk):
        chunk["content"] = self._indent(chunk["content"])

        # Code is not executed
        if not chunk["evaluate"]:
            chunk["content"] = self.fix_linefeeds(chunk["content"])
            if "%s" in chunk["codestart"]:
                chunk["codestart"] = chunk["codestart"] % self.language
            if chunk["echo"]:
                result = "%(codestart)s%(content)s%(codeend)s" % chunk
                return result
            else:
                return ""

        # Code is executed
        # -------------------
        if "%s" in chunk["codestart"]:
            chunk["codestart"] = chunk["codestart"] % self.language

        result = ""

        if chunk["echo"]:
            chunk["content"] = self.fix_linefeeds(chunk["content"])
            result += "%(codestart)s%(content)s%(codeend)s" % chunk

        if chunk["results"] != "hidden":
            stream_result = {"output_type": "stream", "text": ""}
            other_result = ""
            for out in chunk["result"]:
                if out["output_type"] == "stream":
                    stream_result["text"] += out["text"]
                else:
                    other_result += self.render_jupyter_output(out, chunk)

            result += self.render_jupyter_output(stream_result, chunk)
            result += other_result

        # Handle figures
        chunk["figure"] = self.figures_from_chunk(
            chunk
        )  # Save embedded figures to file

        if chunk["fig"] and "figure" in chunk:
            if chunk["include"]:
                result += self.formatfigure(chunk)
        return result

    def format_docchunk(self, chunk):
        return chunk["content"]

    def add_header(self):
        """Can be used to add header to self.formatted list"""
        if self.header is not None:
            self.formatted = self.header + self.formatted

    def add_footer(self):
        """Can be used to add footer to self.formatted list"""
        if self.footer is not None:
            self.formatted += self.footer

    def getformatdict(self):
        return self.formatdict

    def getformatted(self):
        return self.formatted

    def updateformatdict(self, format_dict):
        self.formatdict.update(format_dict)

    def _wrapper(self, string, width=80):
        """Wrap a string to specified width like Python terminal"""
        if len(string) < width:
            return string
        # Wrap also comment lines
        if string.lstrip()[0] == "#":
            return (
                string[0:width]
                + "\n"
                + self._wrapper("#" + string[width : len(string)], width)
            )
        else:
            return (
                string[0:width]
                + "\n"
                + self._wrapper(string[width : len(string)], width)
            )

    def _wrap(self, content):
        splitted = content.split("\n")
        result = ""
        for line in splitted:
            result += self.wrapper.fill(line) + "\n"
        return result

    def _fillformatdict(self):
        """Fill in the blank options that are now only used for rst
            but also allow e.g. special latex style for terminal blocks etc."""
        self._fillkey("termstart", self.formatdict["codestart"])
        self._fillkey("termend", self.formatdict["codeend"])
        self._fillkey("savedformats", list([self.formatdict["figfmt"]]))

    def _fillkey(self, key, value):
        if key not in self.formatdict:
            self.formatdict[key] = value

    def _indent(self, text):
        """Indent blocks for formats where indent is significant"""
        return text
        # return(text.replace('\n', '\n' + self.formatdict['indent']))

    def _termindent(self, text):
        """Indent blocks for formats where indent is significant"""
        return text
        # return(text.replace('\n', '\n' + self.formatdict['termindent']))

    def sanitize_filename(self, fname):
        return "".join(i for i in fname if i not in "\/:*?<>|")

    def get_figname(self, chunk, i, mimetype):
        save_dir = self.getFigDirectory()
        include_dir = self.figdir

        ext = "." + self.mime_extensions[mimetype]
        base = os.path.splitext(os.path.basename(self.source))[0]

        if chunk["name"] is None:
            prefix = base + "_figure" + str(chunk["number"]) + "_" + str(i)
        else:
            prefix = base + "_" + self.sanitize_filename(chunk["name"]) + "_" + str(i)

        self.ensureDirectoryExists(self.getFigDirectory())

        save_name = os.path.join(save_dir, prefix + ext)
        include_name = os.path.join(include_dir, prefix + ext).replace("\\", "/")

        return save_name, include_name

    def getFigDirectory(self):
        return os.path.join(self.wd, self.figdir)

    def ensureDirectoryExists(self, figdir):
        if not os.path.isdir(figdir):
            os.mkdir(figdir)