libratom/libratom

View on GitHub
libratom/lib/constants.py

Summary

Maintainability
A
0 mins
Test Coverage
# pylint: disable=missing-docstring

import os
import textwrap
from collections import namedtuple
from enum import Enum, auto

# Allow these to be set through the environment
RATOM_MSG_BATCH_SIZE = int(os.environ.get("RATOM_MSG_BATCH_SIZE", 1000))
RATOM_DB_COMMIT_BATCH_SIZE = int(os.environ.get("RATOM_DB_COMMIT_BATCH_SIZE", 3_000))

# Interval between progress updates in the message generator
RATOM_MSG_PROGRESS_STEP = int(os.environ.get("RATOM_MSG_PROGRESS_STEP", 10))

# Use the same default as spacy: https://github.com/explosion/spaCy/blob/v2.1.6/spacy/language.py#L130-L149
RATOM_SPACY_MODEL_MAX_LENGTH = int(
    os.environ.get("RATOM_SPACY_MODEL_MAX_LENGTH", 1_000_000)
)


# Spacy trained model names
SPACY_MODEL_NAMES = [
    "ca_core_news_lg",
    "ca_core_news_md",
    "ca_core_news_sm",
    "ca_core_news_trf",
    "da_core_news_lg",
    "da_core_news_md",
    "da_core_news_sm",
    "da_core_news_trf",
    "de_core_news_lg",
    "de_core_news_md",
    "de_core_news_sm",
    "de_dep_news_trf",
    "de_pytt_bertbasecased_lg",
    "de_trf_bertbasecased_lg",
    "el_core_news_lg",
    "el_core_news_md",
    "el_core_news_sm",
    "en_core_web_lg",
    "en_core_web_md",
    "en_core_web_sm",
    "en_core_web_trf",
    "en_depent_web_md",
    "en_pytt_bertbaseuncased_lg",
    "en_pytt_distilbertbaseuncased_lg",
    "en_pytt_robertabase_lg",
    "en_pytt_xlnetbasecased_lg",
    "en_trf_bertbaseuncased_lg",
    "en_trf_distilbertbaseuncased_lg",
    "en_trf_robertabase_lg",
    "en_trf_xlnetbasecased_lg",
    "en_vectors_glove_md",
    "en_vectors_web_lg",
    "es_core_news_lg",
    "es_core_news_md",
    "es_core_news_sm",
    "es_core_web_md",
    "es_dep_news_trf",
    "fi_core_news_lg",
    "fi_core_news_md",
    "fi_core_news_sm",
    "fr_core_news_lg",
    "fr_core_news_md",
    "fr_core_news_sm",
    "fr_dep_news_trf",
    "fr_depvec_web_lg",
    "hr_core_news_lg",
    "hr_core_news_md",
    "hr_core_news_sm",
    "it_core_news_lg",
    "it_core_news_md",
    "it_core_news_sm",
    "ja_core_news_lg",
    "ja_core_news_md",
    "ja_core_news_sm",
    "ja_core_news_trf",
    "ko_core_news_lg",
    "ko_core_news_md",
    "ko_core_news_sm",
    "lt_core_news_lg",
    "lt_core_news_md",
    "lt_core_news_sm",
    "mk_core_news_lg",
    "mk_core_news_md",
    "mk_core_news_sm",
    "nb_core_news_lg",
    "nb_core_news_md",
    "nb_core_news_sm",
    "nl_core_news_lg",
    "nl_core_news_md",
    "nl_core_news_sm",
    "pl_core_news_lg",
    "pl_core_news_md",
    "pl_core_news_sm",
    "pt_core_news_lg",
    "pt_core_news_md",
    "pt_core_news_sm",
    "ro_core_news_lg",
    "ro_core_news_md",
    "ro_core_news_sm",
    "ru_core_news_lg",
    "ru_core_news_md",
    "ru_core_news_sm",
    "sv_core_news_lg",
    "sv_core_news_md",
    "sv_core_news_sm",
    "uk_core_news_lg",
    "uk_core_news_md",
    "uk_core_news_sm",
    "uk_core_news_trf",
    "xx_ent_wiki_sm",
    "xx_sent_ud_sm",
    "zh_core_web_lg",
    "zh_core_web_md",
    "zh_core_web_sm",
    "zh_core_web_trf",
]

SPACY_MODELS = namedtuple("SpacyModels", SPACY_MODEL_NAMES)(*SPACY_MODEL_NAMES)


class BodyType(Enum):
    PLAIN = auto()
    RTF = auto()
    HTML = auto()


# fmt: off
ASCII_ART_NAME = textwrap.dedent("""
 ___      ___  _______  ______    _______  _______  _______  __   __ 
|   |    |   ||  _    ||    _ |  |   _   ||       ||       ||  |_|  |
|   |    |   || |_|   ||   | ||  |  |_|  ||_     _||   _   ||       |
|   |    |   ||       ||   |_||_ |       |  |   |  |  | |  ||       |
|   |___ |   ||  _   | |    __  ||       |  |   |  |  |_|  ||       |
|       ||   || |_|   ||   |  | ||   _   |  |   |  |       || ||_|| |
|_______||___||_______||___|  |_||__| |__|  |___|  |_______||_|   |_|
""")  # noqa: W291
# fmt: on