NatLibFi/Annif

View on GitHub
annif/config.py

Summary

Maintainability
A
25 mins
Test Coverage
"""Configuration file handling"""

from __future__ import annotations

import configparser
import os.path
from glob import glob

try:
    import tomllib
except ImportError:
    import tomli as tomllib

import annif
import annif.util
from annif.exception import ConfigurationException

logger = annif.logger


class AnnifConfigCFG:
    """Class for reading configuration in CFG/INI format"""

    def __init__(self, filename: str = None, projstr: str = None) -> None:
        self._config = configparser.ConfigParser()
        self._config.optionxform = annif.util.identity
        if filename is not None:
            logger.debug(f"Reading configuration file {filename} in CFG format")
            self._read_config(self._config.read, filename)
        elif projstr is not None:
            logger.debug("Reading configuration from a string in CFG format")
            self._read_config(self._config.read_string, projstr)

    def _read_config(self, read_method, source):
        encoding = "utf-8-sig"
        try:
            read_method(source, encoding)
        except (
            configparser.DuplicateOptionError,
            configparser.DuplicateSectionError,
        ) as err:
            raise ConfigurationException(err.message)

    @property
    def project_ids(self) -> list[str]:
        return self._config.sections()

    def __getitem__(self, key: str) -> configparser.SectionProxy:
        return self._config[key]


class AnnifConfigTOML:
    """Class for reading configuration in TOML format"""

    def __init__(self, filename: str) -> None:
        with open(filename, "rb") as projf:
            try:
                logger.debug(f"Reading configuration file {filename} in TOML format")
                self._config = tomllib.load(projf)
            except tomllib.TOMLDecodeError as err:
                raise ConfigurationException(
                    f"Parsing TOML file '{filename}' failed: {err}"
                )

    @property
    def project_ids(self):
        return self._config.keys()

    def __getitem__(self, key: str) -> dict[str, str]:
        return self._config[key]


class AnnifConfigDirectory:
    """Class for reading configuration from directory"""

    def __init__(self, directory: str) -> None:
        files = glob(os.path.join(directory, "*.cfg"))
        files.extend(glob(os.path.join(directory, "*.toml")))
        logger.debug(f"Reading configuration files in directory {directory}")

        self._config = dict()
        for file in sorted(files):
            source_config = parse_config(file)
            for proj_id in source_config.project_ids:
                self._check_duplicate_project_ids(proj_id, file)
                self._config[proj_id] = source_config[proj_id]

    def _check_duplicate_project_ids(self, proj_id: str, file: str) -> None:
        if proj_id in self._config:
            # Error message resembles configparser's DuplicateSection message
            raise ConfigurationException(
                f'While reading from "{file}": project ID "{proj_id}" already '
                "exists in another configuration file in the directory."
            )

    @property
    def project_ids(self):
        return self._config.keys()

    def __getitem__(self, key: str) -> dict[str, str] | configparser.SectionProxy:
        return self._config[key]


def check_config(projects_config_path: str) -> str | None:
    if os.path.exists(projects_config_path):
        return projects_config_path
    else:
        logger.warning(
            "Project configuration file or directory "
            + f'"{projects_config_path}" is missing. Please provide one. '
            + "You can set the path to the project configuration "
            + "using the ANNIF_PROJECTS environment "
            + 'variable or the command-line option "--projects".'
        )
        return None


def find_config() -> str | None:
    for path in ("projects.cfg", "projects.toml", "projects.d"):
        if os.path.exists(path):
            return path

    logger.warning(
        "Could not find project configuration "
        + '"projects.cfg", "projects.toml" or "projects.d". '
        + "You can set the path to the project configuration "
        + "using the ANNIF_PROJECTS environment "
        + 'variable or the command-line option "--projects".'
    )
    return None


def parse_config(
    projects_config_path: str,
) -> AnnifConfigDirectory | AnnifConfigCFG | AnnifConfigTOML | None:
    if projects_config_path:
        projects_config_path = check_config(projects_config_path)
    else:
        projects_config_path = find_config()

    if not projects_config_path:  # not found
        return None

    if os.path.isdir(projects_config_path):
        return AnnifConfigDirectory(projects_config_path)
    elif projects_config_path.endswith(".toml"):  # TOML format
        return AnnifConfigTOML(projects_config_path)
    else:  # classic CFG/INI style format
        return AnnifConfigCFG(projects_config_path)