OWASP/cornucopia

View on GitHub
scripts/convert.py

Summary

Maintainability
D
2 days
Test Coverage
A
91%
#!/usr/bin/env python3
import argparse
import docx2pdf  # type: ignore
import docx  # type: ignore
import fnmatch
import logging
import os
import platform
import re
import shutil
import sys
import yaml
import zipfile
import xml.etree.ElementTree as ElTree
from typing import Any, Dict, List, Tuple
from operator import itemgetter
from itertools import groupby
from pathvalidate.argparse import validate_filepath_arg
from pathvalidate import sanitize_filepath

import defusedxml.ElementTree


class ConvertVars:
    BASE_PATH = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0]
    EDITION_CHOICES: List[str] = ["all", "webapp", "mobileapp"]
    FILETYPE_CHOICES: List[str] = ["all", "docx", "pdf", "idml"]
    LAYOUT_CHOICES: List[str] = ["all", "leaflet", "guide", "cards"]
    LANGUAGE_CHOICES: List[str] = ["all", "en", "es", "fr", "nl", "no-nb", "pt-br", "hu", "it"]
    VERSION_CHOICES: List[str] = ["all", "latest", "1.00", "1.22", "2.00"]
    LATEST_VERSION_CHOICES: List[str] = ["1.00", "2.00"]
    TEMPLATE_CHOICES: List[str] = ["all", "bridge", "bridge_qr", "tarot", "tarot_qr"]
    EDITION_VERSION_MAP: Dict[str, Dict[str, str]] = {
        "webapp": {"1.22": "1.22", "2.00": "2.00"},
        "against-security": {"1.00": "1.00"},
        "mobileapp": {"1.00": "1.00"},
        "all": {"1.22": "1.22", "2.00": "2.00", "1.00": "1.00"},
    }
    DEFAULT_TEMPLATE_FILENAME: str = os.sep.join(
        ["resources", "templates", "owasp_cornucopia_edition_ver_layout_document_template_lang"]
    )
    DEFAULT_OUTPUT_FILENAME: str = os.sep.join(["output", "owasp_cornucopia_edition_ver_layout_document_template_lang"])
    args: argparse.Namespace
    can_convert_to_pdf: bool = False


def check_fix_file_extension(filename: str, file_type: str) -> str:
    if filename and not filename.endswith(file_type):
        filename_split = os.path.splitext(filename)
        if filename_split[1].strip(".").isnumeric():
            filename = filename + "." + file_type.strip(".")
        else:
            filename = ".".join([os.path.splitext(filename)[0], file_type.strip(".")])
        logging.debug(f" --- output_filename with new ext = {filename}")
    return filename


def check_make_list_into_text(var: List[str]) -> str:
    if not isinstance(var, list):
        return str(var)
    var = group_number_ranges(var)
    text_output = ", ".join(str(s) for s in var)
    if not text_output.strip():
        text_output = " - "

    return text_output


def convert_docx_to_pdf(docx_filename: str, output_pdf_filename: str) -> None:
    logging.debug(
        f" --- docx_file = {docx_filename} convert to {output_pdf_filename}\n--- starting pdf conversion now."
    )

    if convert_vars.can_convert_to_pdf:
        try:
            docx2pdf.convert(docx_filename, output_pdf_filename)
            logging.info(f"New file saved: {output_pdf_filename}")
        except Exception as e:
            error_msg = f"\nConvert error: {e}"
            logging.warning(error_msg)
    else:
        error_msg = (
            "Error. A temporary docx file was created in the output folder but cannot be converted "
            f"to pdf (yet) on operating system: {platform.system()}\n"
            "This does work on Windows and Mac with MS Word installed."
        )
        logging.warning(error_msg)

    # If not debugging then delete the temp file
    if not convert_vars.args.debug:
        os.remove(docx_filename)


def create_edition_from_template(
    layout: str, language: str = "en", template: str = "bridge", version: str = "1.22", edition: str = "webapp"
) -> None:

    # Get the list of available translation files
    yaml_files = get_files_from_of_type(os.sep.join([convert_vars.BASE_PATH, "source"]), "yaml")
    if not yaml_files:
        return

    mapping: Dict[str, Any] = get_mapping_for_edition(yaml_files, version, language, edition, template, layout)

    if not mapping:
        logging.warning(
            f"No mapping file found for version: {version}, lang: {language}, edition: {edition},"
            f" template: {template}, layout: {layout}"
        )
        # return

    # Get the language data from the correct language file (checks vars.args.language to select the correct file)
    language_data: Dict[str, Dict[str, str]] = get_language_data(yaml_files, language, version, edition)

    # Transform the language data into the template mapping
    language_dict: Dict[str, str] = map_language_data_to_template(language_data)

    # Get meta data from language data
    meta: Dict[str, str] = get_meta_data(language_data)

    if not meta:
        return

    template_doc: str = get_template_for_edition(layout, template, edition)
    if template_doc == "None":
        return
    file_name, file_extension = os.path.splitext(template_doc)
    logging.debug(f"template_doc: {template_doc}")
    # Name output file with correct edition, component, language & version
    output_file: str = rename_output_file(file_extension, template, layout, meta)
    ensure_folder_exists(os.path.dirname(output_file))

    # Work with docx file (and maybe convert to pdf afterwards)
    if file_extension in ".docx":
        # Get the input (template) document
        doc: docx.Document = get_docx_document(template_doc)
        language_dict.update(mapping)
        doc = replace_docx_inline_text(doc, language_dict)
        doc.save(output_file)
        if convert_vars.args.pdf:
            # If file type is pdf, then save a temp docx file, convert the docx to pdf
            temp_docx_file = os.sep.join([convert_vars.BASE_PATH, "output", "temp.docx"])
            save_docx_file(doc, temp_docx_file)
            convert_docx_to_pdf(temp_docx_file, output_file)
    elif file_extension == ".idml":
        language_dict.update(mapping)
        save_idml_file(template_doc, language_dict, output_file)

    logging.info(f"New file saved: {output_file}")


def valid_meta(meta: Dict[str, Any], language: str, edition: str, version: str, template: str, layout: str) -> bool:
    if not has_translation_for_edition(meta, language):
        logging.warning(
            f"Translation in {language} does not exist for edition: {edition}, version: {version} "
            "or the translation choices are missing from the meta -> languages section in the mappings file"
        )
        return False

    if not has_template_for_edition(meta, template) and not convert_vars.args.inputfile:
        logging.warning(
            f"The template: {template} does not exist for edition: {edition}, version: {version} "
            "or the template choices are missing from the meta templates section in the mappings file"
        )
        return False

    if not has_layout_for_edition(meta, layout) and not convert_vars.args.inputfile:
        logging.warning(
            f"The layout: {layout} does not exist for edition: {edition}, version: {version} "
            "or the layout choices are missing from the meta -> layouts section in the mappings file"
        )
        return False
    return True


def has_translation_for_edition(meta: Dict[str, Any], language: str) -> bool:
    if meta and "languages" in meta and language in meta["languages"]:
        return True
    return False


def has_template_for_edition(meta: Dict[str, Any], template: str) -> bool:
    if meta and "templates" in meta and template in meta["templates"]:
        return True
    return False


def has_layout_for_edition(meta: Dict[str, Any], layout: str) -> bool:
    if meta and "layouts" in meta and layout in meta["layouts"]:
        return True
    return False


def ensure_folder_exists(folder_path: str) -> None:
    """Check if folder exists and if not, create folders recursively."""
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)


def main() -> None:
    convert_vars.args = parse_arguments(sys.argv[1:])
    set_logging()
    logging.debug(" --- args = %s", str(convert_vars.args))

    set_can_convert_to_pdf()
    if convert_vars.args.pdf and not convert_vars.can_convert_to_pdf and not convert_vars.args.debug:
        logging.error(
            "Cannot convert to pdf on this system. "
            "Pdf conversion is available on Windows and Mac, if MS Word is installed"
        )
        return

    # Create output files
    for edition in get_valid_edition_choices():
        for layout in get_valid_layout_choices():
            for language in get_valid_language_choices():
                for template in get_valid_templates():
                    for version in get_valid_version_choices():
                        create_edition_from_template(layout, language, template, version, edition)


def parse_arguments(input_args: List[str]) -> argparse.Namespace:
    """Parse and validate the input arguments. Return object containing argument values."""
    description = "Tool to output OWASP Cornucopia playing cards into different file types and languages. "
    description += "\nExample usage: $ scripts/convert.py --pdf -lt guide -l es -v 2.00"
    description += "\nExample usage: $ scripts/convert.py -t tarot -l en -lt cards  -v 1.0 -e eop -i "
    description += "./resources/templates/eop_ver_cards_tarot_lang.idml -o ./output/eop-1.0-cards-en.idml"
    description += "\nExample usage: c:\\cornucopia\\scripts\\convert.py -t bridge -lt cards -l fr -v 2.00 -o"
    description += " my_output_folder\\owasp_cornucopia_edition_version_layout_language_template.idml"
    parser = argparse.ArgumentParser(
        description=description, formatter_class=argparse.RawTextHelpFormatter, exit_on_error=False
    )
    parser.add_argument(
        "-i",
        "--inputfile",
        type=validate_filepath_arg,
        default="",
        help=(
            "Input (template) file to use."
            f"\nDefault={convert_vars.DEFAULT_TEMPLATE_FILENAME}.(docx|idml)"
            "\nTemplate type is dependent on the file (-o) specified."
        ),
    )
    parser.add_argument(
        "-v",
        "--version",
        type=is_valid_string_argument,
        required=False,
        default="latest",
        help=(
            "Output version to produce. [`all`, `latest`, `1.00`, `1.22`, `2.00`] "
            "\nVersion 1.22 and 1.2x will deliver cards mapped to ASVS 3.0"
            "\nVersion 2.00 and 2.0x will deliver cards mapped to ASVS 4.0"
            "\nVersion 1.00 and 1.0x will deliver cards mapped to MASVS 2.0"
            "\nVersion all will deliver all versions of cornucopia"
            "\nVersion latest will deliver the latest deck versions of cornucopia"
            "\nYou can also specify another version explicitly if needed. "
            "If so, there needs to be a yaml file in the source folder where the name contains "
            "the version code. Eg. edition-template-ver-lang.yaml"
        ),
    )
    parser.add_argument(
        "-o",
        "--outputfile",
        default="",
        type=validate_filepath_arg,
        help=(
            "Specify a path and name of output file to generate. (caution: existing file will be overwritten). "
            f"\nEg. {convert_vars.DEFAULT_OUTPUT_FILENAME}.(docx|pdf|idml)"
        ),
    )
    parser.add_argument(
        "-p",
        "--pdf",
        action="store_true",
        default=False,
        help=(
            "Whether to generate a pdf in addition to the printable document. "
            "Does not generate a pdf by default. Only docx can be converted to pdf for the moment."
        ),
    )
    parser.add_argument(
        "-d",
        "--debug",
        action="store_true",
        help="Output additional information to debug script",
    )
    group = parser.add_mutually_exclusive_group(required=False)
    group.add_argument(
        "-l",
        "--language",
        type=is_valid_string_argument,
        default="en",
        help=(
            "Output language to produce. [`en`, `es`, `fr`, `nl`, `no-nb`, `pt-br`, `it`] "
            "you can also specify your own language file. If so, there needs to be a yaml "
            "file in the source folder where the name ends with the language code. Eg. edition-template-ver-lang.yaml"
        ),
    )
    group = parser.add_mutually_exclusive_group(required=False)
    group.add_argument(
        "-t",
        "--template",
        type=is_valid_string_argument,
        default="bridge",
        help=(
            "From which template to produce the document. [`bridge`, `tarot` or `tarot_qr`]\n"
            "Templates need to be added to ./resource/templates or specified with (-i or --inputfile)\n"
            "Bridge cards are 2.25 x 3.5 inch and have the mappings printed on them, \n"
            "tarot cards are 2.75 x 4.75 (71 x 121 mm) inch large, \n"
            "qr cards have a QRCode that points to an maintained list.\n"
            "You can also speficy your own template. If so, there needs to be a file in the templates folder "
            "where the name contains the template code. Eg. owasp_cornucopia_edition_ver_layout_template_lang.idml"
        ),
    )
    group = parser.add_mutually_exclusive_group(required=False)
    group.add_argument(
        "-e",
        "--edition",
        type=is_valid_string_argument,
        default="all",
        help=(
            "Output decks to produce. [`all`, `webapp` or `mobileapp`]\n"
            "The various Cornucopia decks. `web` will give you the Website App edition.\n"
            "`mobileapp` will give you the Mobile App edition.\n"
            "You can also speficy your own edition. If so, there needs to be a yaml "
            "file in the source folder where the name contains the edition code. Eg. edition-template-ver-lang.yaml"
        ),
    )

    group = parser.add_mutually_exclusive_group(required=False)
    group.add_argument(
        "-lt",
        "--layout",
        type=is_valid_string_argument,
        default="all",
        help=(
            "Document layouts to produce. [`all`, `guide`, `leaflet` or `cards`]\n"
            "The various Cornucopia document layouts.\n"
            "`cards` will output the high quality print card deck.\n"
            "`guide` will generate the docx guide with the low quality print deck.\n"
            "`leaflet` will output the high quality print leaflet.\n"
            "You can also speficy your own layout. If so, there needs to be a yaml "
            "file in the source folder where the name contains the layout code. Eg. edition-layout-ver-lang.yaml"
        ),
    )
    try:
        args = parser.parse_args(input_args)
    except argparse.ArgumentError as exc:
        # sys.tracebacklimit = 0
        logging.error(exc.message)
        sys.exit()
    return args


def is_valid_string_argument(argument: str) -> str:
    if len(argument) > 255:
        raise argparse.ArgumentTypeError("The option can not have more the 255 char.")
    if not re.match(r"^[A-Za-z0-9._-]+$", argument):
        raise argparse.ArgumentTypeError(
            "The option can only contain a-z letters, numbers, periods, dash or underscore"
        )
    return argument


def is_valid_argument_list(arguments: List[str]) -> Any:
    if not isinstance(arguments, List):
        return arguments
    for argument in arguments:
        is_valid_string_argument(argument)
    return arguments


def get_document_paragraphs(doc: docx) -> List[docx.Document]:
    paragraphs = list(doc.paragraphs)
    l1 = len(paragraphs)
    for table in doc.tables:
        paragraphs += get_paragraphs_from_table_in_doc(table)
    l2 = len(paragraphs)
    if not len(paragraphs):
        logging.error("No paragraphs found in doc")
    logging.debug(f" --- count doc paragraphs = {l1}, with table paragraphs = {l2}")
    return paragraphs


def get_docx_document(docx_file: str) -> docx.Document:
    """Open the file and return the docx document."""
    if os.path.isfile(docx_file):
        return docx.Document(docx_file)
    else:
        logging.error("Could not find file at: %s", str(docx_file))
        return docx.Document()


def get_files_from_of_type(path: str, ext: str) -> List[str]:
    """Get a list of files from a specified folder recursively, that have the specified extension."""
    files = []
    for root, dirnames, filenames in os.walk(path):
        for filename in fnmatch.filter(filenames, "*." + str(ext)):
            files.append(os.path.join(root, filename))
    if not files:
        logging.error("No language files found in folder: %s", str(os.sep.join([convert_vars.BASE_PATH, "source"])))
        return files
    logging.debug(
        "%s%s", f" --- found {len(files)} files of type {ext}. Showing first few:\n* ", str("\n* ".join(files[:3]))
    )
    return files


def get_find_replace_list(meta: Dict[str, str], template: str, layout: str) -> List[Tuple[str, str]]:
    ll: List[Tuple[str, str]] = [
        ("_edition", "_" + meta["edition"].lower()),
        ("_layout", "_" + layout.lower()),
        ("_document_template", "_" + template.lower()),
        ("_lang", "_" + meta["language"].lower()),
        ("_ver", "_" + meta["version"].lower()),
    ]
    return ll


def get_full_tag(suit_tag: str, card: str, tag: str) -> str:
    if suit_tag == "WC":
        full_tag = "${{{}}}".format("_".join([suit_tag, card, tag]))
    elif suit_tag == "Common":
        full_tag = "${{{}}}".format("_".join([suit_tag, card]))
    else:
        full_tag = "${{{}}}".format("_".join([suit_tag, suit_tag + card, tag]))
    return full_tag


def get_mapping_for_edition(
    yaml_files: List[str], version: str, language: str, edition: str, template: str, layout: str
) -> Dict[str, Any]:
    mapping_data: Dict[str, Dict[str, str]] = get_mapping_data_for_edition(yaml_files, language, version, edition)
    if not mapping_data:
        logging.warning("Could not retrieve valid mapping information")
        return {}
    if "meta" not in mapping_data.keys() or not valid_meta(
        mapping_data["meta"], language, edition, version, template, layout
    ):
        logging.warning("Could not retrieve valid meta information from the mapping file")
        return {}
    try:
        mapping_data = build_template_dict(mapping_data)
    except Exception as e:
        logging.warning(f"Could not build valid template mapping. The Yaml file is not valid. Got exception: {e}")
    return mapping_data


def get_mapping_data_for_edition(
    yaml_files: List[str],
    language: str,
    version: str = "1.22",
    edition: str = "webapp",
) -> Dict[Any, Dict[Any, Any]]:
    """Get the raw data of the replacement text from correct yaml file"""
    data: Dict[Any, Dict[Any, Any]] = {}
    logging.debug(
        " --- Starting get_mapping_data_for_edition() for edition: "
        f"{edition} , language: {language} and version: {version} "
        f" with mapping to version {get_valid_mapping_for_version(version, edition)}"
    )
    mappingfile: str = ""
    for file in yaml_files:
        if is_yaml_file(file) and is_mapping_file_for_version(file, version, edition):
            mappingfile = file
    if not mappingfile:
        return data

    with open(mappingfile, "r", encoding="utf-8") as f:
        try:
            data = yaml.safe_load(f)
        except yaml.YAMLError as e:
            logging.info(f"Error loading yaml file: {mappingfile}. Error = {e}")
            data = {}
    if "meta" in data.keys() and "component" in data["meta"].keys() and data["meta"]["component"] == "mappings":
        logging.debug(" --- found mappings file: " + os.path.split(mappingfile)[1])
    else:
        logging.debug(" --- found source file, but it was missing metadata: " + os.path.split(mappingfile)[1])
        if "meta" in list(data.keys()):
            meta_keys = data["meta"].keys()
            logging.debug(f" --- data.keys() = {data.keys()}, data[meta].keys() = {meta_keys}")
        data = {}
    logging.debug(f" --- Len = {len(data)}.")
    return data


def build_template_dict(input_data: Dict[str, Any]) -> Dict[str, Any]:
    """Build template dictionary from the input data"""
    data: Dict[str, Any] = {"meta": get_meta_data(input_data)}
    for key in list(k for k in input_data.keys() if k != "meta"):
        for paragraphs in input_data[key]:
            text_type = ""
            if key == "suits":
                text_type = "cards"
            if key == "paragraphs":
                text_type = "sentences"
            logging.debug(f" --- key = {key}.")
            logging.debug(f" --- suit name = {paragraphs['name']}")
            logging.debug(f" --- suit id = {is_valid_string_argument(paragraphs['id'])}")
            full_tag = "${{{}}}".format("_".join([is_valid_string_argument(paragraphs["id"]), "suit"]))
            logging.debug(f" --- suit tag = {full_tag}")
            if data["meta"]["component"] == "cards":
                data[full_tag] = paragraphs["name"]
            for paragraph in paragraphs[text_type]:
                for tag, text_output in paragraph.items():
                    if tag == "value":
                        continue
                    full_tag = get_full_tag(
                        is_valid_string_argument(paragraphs["id"]), is_valid_string_argument(paragraph["value"]), tag
                    )
                    logging.debug(f" --- tag = {full_tag}")
                    # Add a translation for "Joker"
                    if paragraphs["id"] == "WC" and tag == "value":
                        full_tag = "${{{}}}".format(
                            "_".join([is_valid_string_argument(paragraphs["id"]), is_valid_string_argument(tag)])
                        )
                    logging.debug(f" --- tag = {full_tag}")
                    logging.debug(f" --- text = {text_output}")
                    data[full_tag] = check_make_list_into_text(text_output)
    return data


def get_meta_data(data: Dict[str, Dict[str, str]]) -> Dict[str, Any]:
    meta = {}
    try:
        if "meta" in list(data.keys()):
            for key, value in data["meta"].items():
                if key in ("edition", "component", "language", "version", "languages", "layouts", "templates"):
                    meta[key] = (isinstance(value, str) and is_valid_string_argument(value)) or (
                        isinstance(value, List) and is_valid_argument_list(value)
                    )
            return meta
        else:
            logging.error(
                "Could not find meta tag in the language data. " "Please ensure the language file is available."
            )
        logging.debug(f" --- meta data = {meta}")
    except argparse.ArgumentError as exc:
        logging.error(f"Could not get meta because of invalid data. error: {exc.message}")
        return {}
    return meta


def get_paragraphs_from_table_in_doc(doc_table: docx.Document) -> List[docx.Document]:
    paragraphs: List[docx.Document] = []
    for row in doc_table.rows:
        for cell in row.cells:
            for paragraph in cell.paragraphs:
                if len(paragraph.runs):
                    paragraphs.append(paragraph)
            for t2 in cell.tables:
                paragraphs += get_paragraphs_from_table_in_doc(t2)
    return paragraphs


def get_language_data(
    yaml_files: List[str],
    language: str,
    version: str = "1.22",
    edition: str = "webapp",
) -> Dict[Any, Dict[Any, Any]]:
    """Get the raw data of the replacement text from correct yaml file"""
    logging.debug(
        f" --- Starting get_language_data() for edition: {edition} "
        f"requesting language: {language} for version: {version} "
    )
    language_file: str = ""
    for file in yaml_files:
        if is_yaml_file(file) and is_lang_file_for_version(file, version, language, edition):
            language_file = file
    if not language_file:
        logging.debug(
            "Did not find translation for version: " + version + ", lang: " + language + ", edition: " + edition
        )
        return {}

    with open(language_file, "r", encoding="utf-8") as f:
        try:
            data: dict[Any, Any] = yaml.safe_load(f)
        except yaml.YAMLError as e:
            logging.info(f"Error loading yaml file: {language_file}. Error = {e}")
            data = {}

    if data and (data["meta"]["language"].lower() == language):
        logging.debug(" --- found source language file: " + os.path.split(language_file)[1])
    else:
        logging.debug(" --- found source file: " + os.path.split(language_file)[1])
        if "meta" in list(data.keys()):
            meta_keys = data["meta"].keys()
            logging.debug(f" --- data.keys() = {data.keys()}, data[meta].keys() = {meta_keys}")

    if not data or "suits" not in list(data.keys()):
        logging.error(
            "Could not get "
            + language
            + " data from yaml "
            + os.path.split(language_file)[1]
            + " for edition: "
            + edition
            + " under version:"
            + version
        )
        data = {}

    logging.debug(f" --- Len = {len(data)}.")
    return data


def is_mapping_file_for_version(path: str, version: str, edition: str) -> bool:
    return (
        os.path.basename(path).find("mappings") >= 0
        and os.path.basename(path).find(edition) >= 0
        and os.path.basename(path).find(version) >= 0
    )


def is_lang_file_for_version(path: str, version: str, lang: str, edition: str) -> bool:
    return (
        os.path.basename(path).find("-" + lang + ".") >= 0
        and os.path.basename(path).find(version) >= 0
        and os.path.basename(path).find(edition) >= 0
    ) or (
        os.path.basename(path).find("-" + lang.replace("-", "_") + ".") >= 0
        and os.path.basename(path).find(version) >= 0
        and os.path.basename(path).find(edition) >= 0
    )


def is_yaml_file(path: str) -> bool:
    return os.path.splitext(path)[1] in (".yaml", ".yml")


def map_language_data_to_template(input_data: Dict[str, Any]) -> Dict[str, str]:
    try:
        data = build_template_dict(input_data)
    except Exception as e:
        logging.warning(f"Could not build valid template mapping. The Yaml file is not valid. Got exception: {e}")
        data = input_data

    if convert_vars.args.debug:
        debug_txt = " --- Translation data showing First 4 (key: text):\n* "
        debug_txt += "\n* ".join(l1 + ": " + str(data[l1]) for l1 in list(data.keys())[:4])
        logging.debug(debug_txt)
        debug_txt = " --- Translation data showing Last 4 (key: text):\n* "
        debug_txt += "\n* ".join(l1 + ": " + str(data[l1]) for l1 in list(data.keys())[-4:])
        logging.debug(debug_txt)
    return data


def get_replacement_mapping_value(k: str, v: str, el_text: str) -> str:
    reg_str: str = (
        "^(OWASP MASTG|OWASP MASVS|OWASP SCP|OWASP ASVS|OWASP AppSensor|CAPEC|SAFECODE)\u2028"
        + k.replace("$", "\\$").strip()
        + "$"
    )
    if re.match(reg_str, el_text.strip()):
        if len(v) >= 38:
            return el_text[: el_text.find("\u2028")] + ": " + v
        else:
            return el_text.replace(k, v)
    return ""


def get_replacement_value_from_dict(el_text: str, replacement_values: List[Tuple[str, str]]) -> str:
    for k, v in replacement_values:
        el_new = get_replacement_mapping_value(k, v, el_text)
        if el_new:
            return el_new
        if k.strip() in el_text:
            reg = r"(?<!\S)" + re.escape(k.strip()) + "(?!\S)"  # # noqa: W605
            el_text = re.sub(reg, v, el_text)
    return el_text


def get_suit_tags_and_key(key: str, edition: str) -> Tuple[List[str], str]:
    # Short tags to match the suits in the template documents
    suit_tags: List[str] = []
    suit_key: str = ""
    if key == "suits" and edition == "webapp":
        suit_tags = ["VE", "AT", "SM", "AZ", "CR", "C", "WC"]
        suit_key = "cards"
    if key == "suits" and edition == "mobileapp":
        suit_tags = ["PC", "AA", "NS", "RS", "CRM", "COM", "WC"]
        suit_key = "cards"
    elif key == "paragraphs":
        suit_tags = ["Common"]
        suit_key = "sentences"
    return suit_tags, suit_key


def get_template_for_edition(layout: str = "guide", template: str = "bridge", edition: str = "webapp") -> str:
    template_doc: str
    args_input_file: str = convert_vars.args.inputfile
    sfile_ext = "idml"
    if layout == "guide":
        sfile_ext = "docx"
    if args_input_file:
        # Input file was specified
        if os.path.isabs(args_input_file):
            template_doc = args_input_file
        elif os.path.isfile(convert_vars.BASE_PATH + os.sep + args_input_file):
            template_doc = os.path.normpath(convert_vars.BASE_PATH + os.sep + args_input_file)
        elif os.path.isfile(convert_vars.BASE_PATH + os.sep + args_input_file.replace(".." + os.sep, "")):
            template_doc = os.path.normpath(
                convert_vars.BASE_PATH + os.sep + args_input_file.replace(".." + os.sep, "")
            )
        elif args_input_file.find("..") == -1 and os.path.isfile(
            convert_vars.BASE_PATH + os.sep + ".." + os.sep + args_input_file
        ):
            template_doc = os.path.normpath(convert_vars.BASE_PATH + os.sep + ".." + os.sep + args_input_file)
        elif os.path.isfile(convert_vars.BASE_PATH + os.sep + args_input_file.replace("scripts" + os.sep, "")):
            template_doc = os.path.normpath(
                convert_vars.BASE_PATH + os.sep + args_input_file.replace("scripts" + os.sep, "")
            )
        else:
            template_doc = args_input_file
            logging.debug(f" --- Template_doc NOT found. Input File = {args_input_file}")
    else:
        # No input file specified - using defaults
        template_doc = os.path.normpath(
            convert_vars.BASE_PATH
            + os.sep
            + convert_vars.DEFAULT_TEMPLATE_FILENAME.replace("edition", edition)
            .replace("layout", layout)
            .replace("document_template", template)
            + "."
            + sfile_ext
        )

    template_doc = template_doc.replace("\\ ", " ")
    template_doc = sanitize_filepath(template_doc)
    if os.path.isfile(template_doc):
        template_doc = check_fix_file_extension(template_doc, sfile_ext)
        logging.debug(f" --- Returning template_doc = {template_doc}")
        return template_doc
    else:
        logging.error(f"Source file not found: {template_doc}. Please ensure file exists and try again.")
        return "None"


def get_valid_layout_choices() -> List[str]:
    layouts = []
    if convert_vars.args.layout.lower() == "all" or convert_vars.args.layout == "":
        for layout in convert_vars.LAYOUT_CHOICES:
            if layout not in ("all", "guide"):
                layouts.append(layout)
            if layout == "guide" and convert_vars.args.edition.lower() in "webapp":
                layouts.append(layout)
    else:
        layouts.append(convert_vars.args.layout)
    return layouts


def get_valid_language_choices() -> List[str]:
    languages = []
    if convert_vars.args.language.lower() == "all":
        for language in convert_vars.LANGUAGE_CHOICES:
            if language not in ("all", "template"):
                languages.append(language)
    elif convert_vars.args.language == "":
        languages.append("en")
    else:
        languages.append(convert_vars.args.language)
    return languages


def get_valid_version_choices() -> List[str]:
    versions = []
    edition: str = convert_vars.args.edition.lower()
    if convert_vars.args.version.lower() == "all":
        for version in convert_vars.VERSION_CHOICES:
            if version not in ("all", "latest") and not get_valid_mapping_for_version(version, edition) == "":
                versions.append(version)
    elif convert_vars.args.version == "" or convert_vars.args.version == "latest":
        for version in convert_vars.LATEST_VERSION_CHOICES:
            if not get_valid_mapping_for_version(version, edition) == "":
                versions.append(version)
    else:
        versions.append(convert_vars.args.version)

    if not versions:
        logging.debug(f"No deck with version: {convert_vars.args.version} for edition: {edition} exists")
    return versions


def get_valid_mapping_for_version(version: str, edition: str) -> str:
    return ConvertVars.EDITION_VERSION_MAP.get(edition, {}).get(version, "")


def get_valid_templates() -> List[str]:
    templates = []
    if convert_vars.args.template.lower() == "all":
        for template in [t for t in convert_vars.TEMPLATE_CHOICES if t not in "all"]:
            templates.append(template)
    elif convert_vars.args.template == "":
        templates.append("bridge")
        templates.append("tarot_qr")
    else:
        templates.append(convert_vars.args.template)
    return templates


def get_valid_edition_choices() -> List[str]:
    editions = []
    if convert_vars.args.edition.lower() == "all" or not convert_vars.args.edition.lower():
        for edition in convert_vars.EDITION_CHOICES:
            if edition not in "all":
                editions.append(edition)
    if convert_vars.args.edition and convert_vars.args.edition not in "all":
        editions.append(convert_vars.args.edition)
    return editions


def group_number_ranges(data: List[str]) -> List[str]:
    if len(data) < 2 or len([s for s in data if not str(s).isnumeric()]):
        return data
    list_ranges: List[str] = []
    data_numbers = [int(s) for s in data]
    for k, g in groupby(enumerate(data_numbers), lambda x: x[0] - x[1]):
        group: List[int] = list(map(itemgetter(1), g))
        group = list(map(int, group))
        if group[0] == group[-1]:
            list_ranges.append(str(group[0]))
        else:
            list_ranges.append(str(group[0]) + "-" + str(group[-1]))
    return list_ranges


def save_docx_file(doc: docx.Document, output_file: str) -> None:
    ensure_folder_exists(os.path.dirname(output_file))
    doc.save(output_file)


def save_idml_file(template_doc: str, language_dict: Dict[str, str], output_file: str) -> None:
    # Get the output path and temp output path to put the temp xml files
    output_path = convert_vars.BASE_PATH + os.sep + "output"
    temp_output_path = output_path + os.sep + "temp"
    # Ensure the output folder and temp output folder exist
    ensure_folder_exists(temp_output_path)
    logging.debug(" --- temp_folder for extraction of xml files = %s", str(temp_output_path))

    # Unzip source xml files and place in temp output folder
    with zipfile.ZipFile(template_doc) as idml_archive:
        idml_archive.extractall(temp_output_path)
        logging.debug(" --- namelist of first few files in archive = %s", str(idml_archive.namelist()[:5]))

    xml_files = get_files_from_of_type(temp_output_path, "xml")
    # Only Stories files have content to update
    for file in fnmatch.filter(xml_files, "*Stories*Story*"):
        if os.path.getsize(file) == 0:
            continue
        replace_text_in_xml_file(file, language_dict)

    # Zip the files as an idml file in output folder
    logging.debug(" --- finished replacing text in xml files. Now zipping into idml file")
    zip_dir(temp_output_path, output_file)

    # If not debugging, delete temp folder and files
    if not convert_vars.args.debug and os.path.exists(temp_output_path):
        shutil.rmtree(temp_output_path, ignore_errors=True)


def set_can_convert_to_pdf() -> bool:
    operating_system: str = sys.platform.lower()
    can_convert = operating_system.find("win") != -1 or operating_system.find("darwin") != -1
    convert_vars.can_convert_to_pdf = can_convert
    logging.debug(f" --- operating system = {operating_system}, can_convert_to_pdf = {convert_vars.can_convert_to_pdf}")
    return can_convert


def set_logging() -> None:
    logging.basicConfig(
        format="%(asctime)s %(filename)s | %(levelname)s | %(funcName)s | %(message)s",
    )
    if convert_vars.args.debug:
        logging.getLogger().setLevel(logging.DEBUG)
    else:
        logging.getLogger().setLevel(logging.INFO)


def sort_keys_longest_to_shortest(replacement_dict: Dict[str, str]) -> List[Tuple[str, str]]:
    new_list = list((k, v) for k, v in replacement_dict.items())
    return sorted(new_list, key=lambda s: len(s[0]), reverse=True)


def remove_short_keys(replacement_dict: Dict[str, str], min_length: int = 8) -> Dict[str, str]:
    data2: Dict[str, str] = {}
    for key, value in replacement_dict.items():
        if len(key) >= min_length:
            data2[key] = value
    logging.debug(
        " --- Making template. Removed card_numbers. len replacement_dict = "
        f"{len(replacement_dict)}, len data2 = {len(data2)}"
    )
    return data2


def rename_output_file(file_extension: str, template: str, layout: str, meta: Dict[str, str]) -> str:
    """Rename output file replacing place-holders from meta dict (edition, component, language, version)."""
    args_output_file: str = convert_vars.args.outputfile
    logging.debug(f" --- args_output_file = {args_output_file}")
    if args_output_file:
        # Output file is specified as an argument
        if os.path.isabs(args_output_file):
            output_filename = args_output_file
        else:
            output_filename = os.path.normpath(convert_vars.BASE_PATH + os.sep + args_output_file)
    else:

        # No output file specified - using default
        output_filename = os.path.normpath(
            convert_vars.BASE_PATH + os.sep + convert_vars.DEFAULT_OUTPUT_FILENAME + file_extension
        )

    logging.debug(f" --- output_filename before fix extension = {output_filename}")
    output_filename = check_fix_file_extension(output_filename, file_extension)
    logging.debug(f" --- output_filename AFTER fix extension = {output_filename}")

    # Do the replacement of filename place-holders with metadata
    find_replace = get_find_replace_list(meta, template, layout)
    f = os.path.basename(output_filename)
    for r in find_replace:
        f = f.replace(*r)
    output_filename = os.path.dirname(output_filename) + os.sep + f
    output_filename = sanitize_filepath(output_filename)

    logging.debug(f" --- output_filename = {output_filename}")
    return output_filename


def replace_docx_inline_text(doc: docx.Document, data: Dict[str, str]) -> docx.Document:
    """Replace the text in the docx document."""
    logging.debug(" --- starting docx_replace")
    replacement_values = list(data.items())
    paragraphs = get_document_paragraphs(doc)
    for p in paragraphs:
        runs_text = "".join(r.text for r in p.runs)
        if runs_text.strip() == "":
            continue
        for key, val in replacement_values:
            replaced_key = False
            for i, run in enumerate(p.runs):
                if run.text.find(key) != -1:
                    p.runs[i].text = run.text.replace(key, val)
                    replaced_key = True
                    runs_text = runs_text.replace(key, val)
            if not replaced_key:
                if runs_text.find(key) != -1:
                    runs_text = runs_text.replace(key, val)
                    for i, r in enumerate(p.runs):
                        p.runs[i].text = ""
                    p.runs[0].text = runs_text

    logging.debug(" --- finished replacing text in doc")
    return doc


def replace_text_in_xml_file(filename: str, replacement_dict: Dict[str, str]) -> None:
    replacement_values = list(replacement_dict.items())
    try:
        tree = defusedxml.ElementTree.parse(filename)
    except ElTree.ParseError as e:
        logging.error(f" --- parsing xml file: {filename}. error = {e}")
        return

    all_content_elements = tree.findall(".//Content")

    for el in [el for el in all_content_elements]:
        if el.text == "" or el.text is None:
            continue
        el.text = get_replacement_value_from_dict(el.text, replacement_values)
        with open(filename, "bw") as f:
            f.write(ElTree.tostring(tree.getroot(), encoding="utf-8"))


def zip_dir(path: str, zip_filename: str) -> None:
    """Zip all the files recursively from path into zip_filename (excluding root path)"""
    with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zip_file:
        for root, dirs, files in os.walk(path):
            for file in files:
                f = os.path.join(root, file)
                zip_file.write(f, f[len(path) :])


if __name__ == "__main__":
    convert_vars: ConvertVars = ConvertVars()
    main()