reformat_gherkin/formatter.py from ducminh-phan/reformat-gherkin

reformat_gherkin/formatter.py
Summary

Maintainability

1 day
Test Coverage

Issues
from itertools import chain, groupby
from typing import (
    Any,
    Callable,
    Dict,
    Iterator,
    List,
    Mapping,
    Optional,
    Set,
    Union,
    overload,
)

from attr import attrib, dataclass

from .ast_node import (
    Background,
    Comment,
    DataTable,
    DocString,
    Examples,
    Feature,
    GherkinDocument,
    Location,
    Node,
    Rule,
    Scenario,
    Step,
    TableRow,
    Tag,
    TagGroup,
)
from .options import AlignmentMode, TagLineMode
from .utils import camel_to_snake_case, extract_beginning_spaces, get_display_width

INDENT_LEVEL_MAP: Mapping[Any, int] = {
    Feature: 0,
    Background: 1,
    Scenario: 1,
    Rule: 1,
    Step: 2,
    Examples: 2,
    DocString: 3,
    TableRow: 3,
}


def generate_language_header(language: str) -> Comment:
    return Comment(Location(1, 1), f"# language: {language}")  # type: ignore


def generate_step_line(
    step: Step,
    keyword_alignment: AlignmentMode,
    indent: str,
    indent_level: int,
    *,
    keyword_padding_width: int = 0,
) -> str:
    """
    Generate lines for steps. The step keywords are aligned according to the parameter
    `keyword_alignment`. For example:

    If `keyword_alignment = AlignmentMode.NONE`:
        Given Enter search term 'Cucumber'
        When Do search
        Then Single result is shown for 'Cucumber'

    If `keyword_alignment = AlignmentMode.LEFT`:
        Given Enter search term 'Cucumber'
        When  Do search
        Then  Single result is shown for 'Cucumber'

    If `keyword_alignment = AlignmentMode.Right`:
        Given Enter search term 'Cucumber'
         When Do search
         Then Single result is shown for 'Cucumber'
    """

    formatted_keyword = format_step_keyword(
        step.keyword,
        keyword_alignment,
        keyword_padding_width=keyword_padding_width,
    )

    return f"{indent * indent_level}{formatted_keyword} {step.text}"


def format_step_keyword(
    keyword: str,
    keyword_alignment: AlignmentMode,
    *,
    keyword_padding_width: int = 0,
) -> str:
    """
    Insert padding to step keyword if necessary based on how we want to align them.
    """

    if keyword_alignment is AlignmentMode.NONE or keyword_padding_width <= 0:
        return keyword

    padding = " " * (keyword_padding_width - get_display_width(keyword))

    if keyword_alignment is AlignmentMode.LEFT:
        return keyword + padding
    else:
        return padding + keyword


def generate_keyword_line(
    keyword: str,
    name: str,
    indent: str,
    indent_level: int,
) -> str:
    return f"{indent * indent_level}{keyword}: {name}".rstrip()


def generate_description_lines(
    description: str,
    indent: str,
    indent_level: int,
) -> List[str]:
    description_lines = description.splitlines()

    lines = [f"{indent * indent_level}{line}" for line in description_lines]

    # Add an empty line after the description, if it exists
    if lines:
        lines.append("")

    return lines


def generate_table_lines(
    rows: List[TableRow],
    indent: str,
    indent_level: int,
) -> List[str]:
    """
    Generate lines for table. The columns in a table need to have the same width.
    """

    if not rows:
        return []

    n_columns = len(rows[0])

    # Find the max width of a cell in a column, so that every cell in the same column
    # has the same width
    column_widths = [
        max(get_display_width(row[column_index].value) for row in rows)
        for column_index in range(n_columns)
    ]

    lines = []
    for row in rows:
        line = "|"

        for column_index in range(n_columns):
            # Left-align the content of each cell, fix the width of the cell
            content = row[column_index].value
            column_width = column_widths[column_index]
            content_width = get_display_width(content)
            padding = " " * (column_width - content_width)
            line += f" {content}{padding} |"

        lines.append(line)

    return [f"{indent * indent_level}{line}" for line in lines]


def extract_rows(node: Union[DataTable, Examples]) -> List[TableRow]:
    """
    Extract table rows from either a Datable or Example instance.
    """

    if isinstance(node, DataTable):
        return list(node.rows)

    rows = []

    if isinstance(node, Examples):
        header = node.table_header
        body = node.table_body

        if header is not None:
            rows.append(header)

        if body is not None:
            rows.extend(body)

    return rows


def generate_doc_string_lines(
    docstring: DocString,
    indent: str,
    indent_level: int,
) -> List[str]:
    raw_lines = docstring.content.splitlines()
    raw_lines = ['"""'] + raw_lines + ['"""']

    return [f"{indent * indent_level}{line}" if line else "" for line in raw_lines]


ContextMap = Dict[Union[Comment, Tag, TagGroup, TableRow], Any]
Lines = Iterator[str]


@dataclass
class LineGenerator:
    ast: GherkinDocument
    step_keyword_alignment: AlignmentMode
    tag_line_mode: TagLineMode
    indent: str

    __nodes: List[Node] = attrib(init=False)
    __contexts: ContextMap = attrib(init=False)
    __nodes_with_newline: Set[Node] = attrib(init=False)
    __nodes_within_rules: Set[Node] = attrib(init=False)
    __max_step_keyword_width: int = attrib(init=False)

    def __attrs_post_init__(self):
        # Use `__attrs_post_init__` instead of `property` to avoid re-computing attributes

        self.__nodes = list(self.ast)

        if self.tag_line_mode is TagLineMode.SINGLELINE:
            self.__group_tags()

        self.__nodes.sort(key=lambda node: node.location)

        self.__nodes_within_rules = self.__find_nodes_within_rules()
        self.__contexts = self.__construct_contexts()
        self.__nodes_with_newline = self.__find_nodes_with_newline()
        self.__max_step_keyword_width = self.__find_max_step_keyword_width()
        self.__add_language_header()

    def __group_tags(self):
        """
        Group the tags of a node, so that we can render them on a single line.
        """

        tag_groups: List[TagGroup] = []
        node: Node
        for node in self.ast:
            if hasattr(node, "tags"):
                tags = node.tags

                if tags:
                    tag_group = TagGroup(
                        members=tags,
                        context=node,
                        # The tag group should be placed
                        # at the position of the last tag
                        location=tags[-1].location,
                    )
                    tag_groups.append(tag_group)

        # After grouping the tags, we need to include the tag groups into
        # the list of nodes and remove the tags from the list.
        self.__nodes = [
            node for node in self.__nodes if not isinstance(node, Tag)
        ] + tag_groups

    def __construct_contexts(self) -> ContextMap:
        """
        Construct the information about the context a certain line might need to know to
        properly format these lines.
        """

        contexts: ContextMap = {}
        nodes = self.__nodes

        for node in nodes:
            if hasattr(node, "context"):
                contexts[node] = node.context  # type: ignore

            # We want tags to have the same indentation level with their parents
            for tag in getattr(node, "tags", []):
                contexts[tag] = node

            if isinstance(node, (DataTable, Examples)):
                # We need to know all rows in a table, so that the columns can be padded
                # to have the same widths across all rows. The context of a row is its
                # reformatted line.
                rows = extract_rows(node)
                indent_level = 0
                if rows:
                    indent_level = self.get_indent_level(rows[0])

                lines = generate_table_lines(
                    rows,
                    self.indent,
                    indent_level,
                )

                for row, line in zip(rows, lines):
                    contexts[row] = line

        contexts.update(self.__construct_contexts_for_comments(nodes))

        return contexts

    @staticmethod
    def __construct_contexts_for_comments(nodes: List[Node]) -> ContextMap:
        # The context of each comment line is the next non-comment line.
        #
        # The steps of the algorithm:
        # 1. Group the nodes into comments and non-comments
        # 2. The first node in each group of non-comments is the context of every node
        #    in the previous group, which consists of comments only.
        #
        # We start with a context of None, this lets us know if the document ends
        # with a block of comments.
        #
        # In the original algorithm, we simply set the context of each comment line
        # to be the next line. This leads to a RecursionError if there are too many
        # consecutive comments.

        contexts: ContextMap = {}
        current_context = None

        groups = groupby(reversed(nodes), lambda n: isinstance(n, Comment))

        for key, group in groups:
            if key is False:
                # The current group consists of non-comments, we set the current context
                # to be the last node in the group, since we grouped in the reverse order
                current_context = list(group)[-1]
            else:
                # The current group consists of comments. These comments should have the
                # same indent level, which is the indent level of the current context.
                for node in group:
                    contexts[node] = current_context  # type: ignore

        return contexts

    def __find_nodes_with_newline(self) -> Set[Node]:
        """
        Find all nodes in the AST that needs a new line after it.
        """

        nodes_with_newline: Set[Node] = set()

        node: Optional[Node] = None

        for node in self.__nodes:
            # We want to add a newline after the Feature/Rule line, even
            # if it does not have a description. If the feature/rule has
            # a description, we already add a newline after each description.
            if isinstance(node, (Feature, Rule)) and not node.description:
                nodes_with_newline.add(node)

            children: List[Node] = []

            # Add an empty line after the last step, including its argument, if any
            if isinstance(node, (Background, Scenario)):
                children = list(chain.from_iterable(node.steps))

            # Add an empty line after an examples table
            if isinstance(node, Examples):
                children = list(node)

            if children:
                last_child = children[-1]
                nodes_with_newline.add(last_child)

        # Add the last node in the AST so that we have an empty line at the end
        if node is not None:
            nodes_with_newline.add(node)

        return nodes_with_newline

    def __find_nodes_within_rules(self) -> Set[Node]:
        nodes_within_rules: Set[Node] = set()

        feature = self.ast.feature
        if feature is not None:
            for child in feature.children:
                if child.rule is not None:
                    for node in child.rule:
                        if not isinstance(node, Rule):
                            nodes_within_rules.add(node)

        return nodes_within_rules

    def __find_max_step_keyword_width(self) -> int:
        """
        Find the length of the longest step keyword in the document. This is
        used for aligning step keywords.
        """

        if self.step_keyword_alignment is AlignmentMode.NONE:
            # We don't need to align step keywords in this case.
            return 0

        step_keyword_widths = [
            get_display_width(node.keyword.strip())
            for node in self.ast
            if isinstance(node, Step)
        ]
        if not step_keyword_widths:
            return 0

        return max(step_keyword_widths)

    def __add_language_header(self) -> None:
        """
        Add a language header if the Feature language is not English.
        """

        # Exit if the language is English or if there is no Feature node
        feature = self.ast.feature
        if not feature:
            return
        language = feature.language
        if language == "en":
            return

        # Register the language header
        language_header = generate_language_header(language)
        self.__nodes.insert(0, language_header)
        self.__nodes_with_newline.add(language_header)
        self.__contexts[language_header] = self.ast.feature

    @overload
    def get_indent_level(self, node: Node) -> int:
        pass

    @overload
    def get_indent_level(self, node: Node, *, default: Optional[int]) -> Optional[int]:
        pass

    def get_indent_level(self, node: Node, *, default=0):
        indent_level = INDENT_LEVEL_MAP.get(type(node), default)

        if indent_level is not None and node in self.__nodes_within_rules:
            indent_level += 1

        return indent_level

    def generate(self) -> Lines:
        for node in self.__nodes:
            yield from self.visit(node)

            if node in self.__nodes_with_newline:
                yield ""

    def visit(self, node: Node) -> Lines:
        class_name = type(node).__name__

        visit_method: Callable[[Node], Lines] = getattr(
            self,
            f"visit_{camel_to_snake_case(class_name)}",
            self.visit_default,
        )

        yield from visit_method(node)

    def visit_default(self, node: Node) -> Lines:
        indent_level = self.get_indent_level(node)

        if hasattr(node, "keyword") and hasattr(node, "name"):
            yield generate_keyword_line(
                node.keyword,  # type: ignore
                node.name,  # type: ignore
                self.indent,
                indent_level,
            )

        if hasattr(node, "description"):
            yield from generate_description_lines(
                node.description,  # type: ignore
                self.indent,
                indent_level + 1,
            )

    def visit_step(self, step: Step) -> Lines:
        yield generate_step_line(
            step,
            self.step_keyword_alignment,
            self.indent,
            self.get_indent_level(step),
            keyword_padding_width=self.__max_step_keyword_width,
        )

    def visit_tag(self, tag: Tag) -> Lines:
        context = self.__contexts[tag]

        indent_level = self.get_indent_level(context)

        yield f"{self.indent * indent_level}{tag.name}"

    def visit_tag_group(self, tag_group: TagGroup) -> Lines:
        context = self.__contexts[tag_group]

        indent_level = self.get_indent_level(context)

        line_content = " ".join(tag.name for tag in tag_group.members)

        yield f"{self.indent * indent_level}{line_content}"

    def visit_table_row(self, row: TableRow) -> Lines:
        context = self.__contexts[row]

        yield context

    def visit_comment(self, comment: Comment) -> Lines:
        context = self.__contexts[comment]

        # Find the indent level of this comment line
        if context is None:
            # In this case, this comment line is the last line of the document
            indent_level: Optional[int] = 0
        else:
            # Try to look for the indent level of the context in the mapping. If not
            # successful, then we use the same amount of white spaces to indent as
            # the next line.
            indent_level = self.get_indent_level(context, default=None)

        if indent_level is None:
            next_line = next(self.visit(context))
            indent = extract_beginning_spaces(next_line)
        else:
            indent = self.indent * indent_level

        yield f"{indent}{comment.text}"

    def visit_doc_string(self, docstring: DocString) -> Lines:
        yield from generate_doc_string_lines(
            docstring,
            self.indent,
            self.get_indent_level(docstring),
        )