volt/engines/markdown2.py

Summary

Maintainability
A
0 mins
Test Coverage
B
81%
"""Markdown engine based on markdown2."""

# Copyright (c) 2012-2023 Wibowo Arindrarto <contact@arindrarto.dev>
# SPDX-License-Identifier: BSD-3-Clause

from copy import deepcopy
from contextlib import suppress
from dataclasses import dataclass
from datetime import datetime as dt
from functools import cached_property
from itertools import chain
from pathlib import Path
from typing import cast, Any, Callable, Iterator, Optional, Self, Sequence
from urllib.parse import urljoin

import pendulum
import yaml
from jinja2 import Template
from markdown2 import Markdown
from pendulum.datetime import DateTime
from slugify import slugify
from yaml import SafeLoader

from .common import Engine
from .. import constants, error as err
from ..config import Config
from ..outputs import TemplateOutput


__all__ = ["MarkdownEngine", "MarkdownSource"]


class MarkdownEngine(Engine):
    """Engine that creates HTML outputs using the markdown2 library."""

    default_extras = {
        "fenced-code-blocks": {
            "nowrap": False,
            "full": False,
            "title": "",
            "noclasses": False,
            "classprefix": "",
            "cssclass": "hl",
            "csstyles": "",
            "prestyles": "",
            "cssfile": "",
            "noclobber_cssfile": False,
            "linenos": False,
            "hl_lines": [],
            "linenostart": 1,
            "linenostep": 1,
            "linenospecial": 0,
            "nobackground": False,
            "lineseparator": "\n",
            "lineanchors": "",
            "anchorlinenos": False,
        },
        "markdown-in-html": True,
        "header-ids": True,
        "footnotes": True,
    }

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)
        # TODO: How to best expose and configure this?
        template_name = "page.html.j2"
        try:
            self.template = self.theme.load_template_file(template_name)
        except err.VoltMissingTemplateError:
            default_fp = Path(__file__).parent / "defaults" / f"{template_name}"
            self.template = Template(default_fp.read_text())

        markdown_opts = self.theme.opts.get("markdown") or {}
        self.recursive = markdown_opts.pop("recursive", False)
        self.extras = markdown_opts

    @staticmethod
    def iter_md_paths(
        base_dir: Path,
        recursive: bool = False,
        ext: str = constants.MARKDOWN_EXT,
    ) -> Iterator[Path]:
        pattern = f"*{ext}"
        if recursive:
            return base_dir.rglob(pattern)
        return base_dir.glob(pattern)

    def prepare_outputs(
        self,
        with_draft: bool,
        *args: Any,
        **kwargs: Any,
    ) -> Sequence[TemplateOutput]:
        return [
            src.to_template_output(self.template)
            for src in self.read_sources(with_draft, meta=kwargs.get("meta", None))
        ]

    def read_sources(
        self,
        with_draft: bool,
        contents_lookup_dirname: str = "",
        meta: Optional[dict] = None,
    ) -> Sequence["MarkdownSource"]:
        converter = self._make_converter()
        return [
            MarkdownSource.from_path(
                path=fp,
                config=self.config,
                converter=converter,
                meta=meta,
            )
            for fp in self._iter_source_paths(with_draft, contents_lookup_dirname)
        ]

    def _iter_source_paths(
        self,
        with_draft: bool,
        contents_lookup_dirname: str = "",
    ) -> Iterator[Path]:
        if with_draft:
            return self._iter_source_paths_with_draft(contents_lookup_dirname)
        return self._iter_source_paths_no_draft(contents_lookup_dirname)

    def _iter_source_paths_with_draft(
        self,
        contents_lookup_dirname: str = "",
    ) -> Iterator[Path]:
        base_contents_dir = self.config.contents_dir / contents_lookup_dirname
        base_drafts_dir = self.config.draft_contents_dir / contents_lookup_dirname

        if not self.recursive:
            return (
                sp
                for base_dir in (base_contents_dir, base_drafts_dir)
                for sp in self.iter_md_paths(base_dir, recursive=False)
            )

        return self.iter_md_paths(base_contents_dir, recursive=True)

    def _iter_source_paths_no_draft(
        self,
        contents_lookup_dirname: str = "",
    ) -> Iterator[Path]:
        base_contents_dir = self.config.contents_dir / contents_lookup_dirname

        if not self.recursive:
            return self.iter_md_paths(base_contents_dir, recursive=False)

        return chain(
            # Exclude .drafts tree, i.e. start from all top-level dirs
            # except .drafts.
            (
                sp
                for base_dir in (
                    dp
                    for dp in base_contents_dir.iterdir()
                    if dp.is_dir() and dp.name != self.config.draft_dir_name
                )
                for sp in self.iter_md_paths(base_dir, recursive=True)
            ),
            (sp for sp in self.iter_md_paths(base_contents_dir, recursive=False)),
        )

    def _make_converter(self) -> Callable[[str], str]:
        resolved_extras = _resolve_extras(self.extras, self.default_extras)

        kwargs: dict = {}
        if isinstance((fd := resolved_extras.get("footnotes", None)), dict):
            for k in ("footnote_return_symbol", "footnote_title"):
                if (v := fd.get(k)) is not None:
                    kwargs[k] = v

        return cast(
            Callable[[str], str], Markdown(extras=resolved_extras, **kwargs).convert
        )


@dataclass(kw_only=True, eq=False)
class MarkdownSource:
    """A markdown input parsed using the markdown2 library."""

    # FileSystem path to the file content.
    path: Path

    # Metadata of the content.
    meta: dict

    # Site configuration.
    config: Config

    # Whether the content is draft or not.
    is_draft: bool

    # Markdown text of the body, without any metadata.
    body: str

    # Markdown converter.
    converter: Callable[[str], str]

    @classmethod
    def from_path(
        cls,
        path: Path,
        config: Config,
        converter: Callable[[str], str],
        meta: Optional[dict] = None,
        fm_sep: str = constants.FRONT_MATTER_SEP,
    ) -> Self:
        """Create an instance from a file.

        :param src: Path to the source file.
        :param config: Site configuration.
        :param meta: Optional metadata to inject.
        :param fm_sep: String for separating the markdown front matter.

        ."""
        raw_text = path.read_text()
        *top, raw_body = raw_text.split(fm_sep, 2)
        raw_fm = [item for item in top if item]
        fm = {} if not raw_fm else yaml.load(raw_fm[0].strip(), Loader=SafeLoader)

        return cls(
            body=raw_body,
            path=path,
            # TODO: Validate minimal front matter metadata.
            meta={**fm, **(meta or {})},
            config=config,
            is_draft=f"{path}".startswith(f"{config.draft_contents_dir}"),
            converter=converter,
        )

    @cached_property
    def url(self) -> str:
        config = self.config
        url_key = "url"
        title_key = "title"

        parts: list[str] = [f"{self._slugify(self.path.stem)}.html"]

        if (meta_url := self.meta.get(url_key)) is not None:
            parts = [part for part in meta_url.split("/") if part]

        elif (meta_title := self.meta.get(title_key)) is not None:
            parts = [f"{self._slugify(meta_title)}.html"]

        ps = [*(self.path.parent.parts[config.num_common_parts :]), *parts]
        if self.is_draft:
            with suppress(IndexError):
                # NOTE: This assumes that the `.draft` folder is located just below
                #       the contents dir.
                del ps[0]

        return f"/{'/'.join(ps)}"

    @property
    def url_abs(self) -> str:
        return urljoin(self.config.url, self.url)

    @property
    def title(self) -> Optional[str]:
        return self.meta.get("title")

    @cached_property
    def pub_time(self) -> Optional[DateTime]:
        value = self.meta.get("pub_time", None)
        exc = err.VoltResourceError(
            f"value {value!r} in {str(self.path)!r} is not a valid datetime"
        )
        if value is None:
            return value
        if isinstance(value, str):
            rv = pendulum.parse(value)
            if isinstance(rv, DateTime):
                return rv
            raise exc
        if isinstance(value, dt):
            return pendulum.instance(value)
        raise exc

    @cached_property
    def html(self) -> str:
        return self.converter(self.body)

    def to_template_output(self, template: Template) -> TemplateOutput:
        """Create a :class:`TemplateOutput` instance."""

        return TemplateOutput(
            url=self.url,
            template=template,
            render_kwargs={
                "meta": {**self.meta, "url": self.url},
                "content": self.html,
            },
            src=self.path.relative_to(self.config.project_dir),
        )

    def _slugify(self, value: str) -> str:
        return slugify(value, replacements=self.config.slug_replacements)


def _resolve_extras(extras: Optional[dict], default_extras: dict) -> dict:
    resolved = deepcopy(default_extras)
    extras = extras or {}

    for k, v in extras.items():
        if v is False:
            resolved.pop(k, None)
        else:
            resolved[k] = v

    return resolved