NatLibFi/Annif

View on GitHub

Showing 73 of 78 total issues

File cli.py has 710 lines of code (exceeds 250 allowed). Consider refactoring.
Open

"""Definitions for command-line (Click) commands for invoking Annif
operations and printing the results to console."""
 
import collections
import importlib
Severity: Major
Found in annif/cli.py - About 1 day to fix

    File project.py has 307 lines of code (exceeds 250 allowed). Consider refactoring.
    Open

    """Project management functionality for Annif"""
     
    from __future__ import annotations
     
    import enum
    Severity: Minor
    Found in annif/project.py - About 3 hrs to fix

      File mllm.py has 292 lines of code (exceeds 250 allowed). Consider refactoring.
      Open

      """MLLM (Maui-like Lexical Matchin) model for Annif"""
       
      from __future__ import annotations
       
      import collections
      Severity: Minor
      Found in annif/lexical/mllm.py - About 3 hrs to fix

        File hfh_util.py has 288 lines of code (exceeds 250 allowed). Consider refactoring.
        Open

        """Utility functions for interactions with Hugging Face Hub."""
         
        import binascii
        import configparser
        import importlib
        Severity: Minor
        Found in annif/hfh_util.py - About 2 hrs to fix

          AnnifProject has 24 functions (exceeds 20 allowed). Consider refactoring.
          Open

          class AnnifProject(DatadirMixin):
          """Class representing the configuration of a single Annif project."""
           
          # defaults for uninitialized instances
          _transform = None
          Severity: Minor
          Found in annif/project.py - About 2 hrs to fix

            Cyclomatic complexity is too high in function run_eval. (10)
            Open

            @cli.command("eval")
            @cli_util.project_id
            @click.argument("paths", type=click.Path(exists=True), nargs=-1)
            @click.option("--limit", "-l", default=10, help="Maximum number of subjects")
            @click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
            Severity: Minor
            Found in annif/cli.py by radon

            Cyclomatic complexity is too high in method _suggest. (9)
            Open

            def _suggest(self, text: str, params: dict[str, Any]) -> list[SubjectSuggestion]:
            data = {"text": text}
            if "project" in params:
            data["project"] = params["project"]
            if "limit" in params:
            Severity: Minor
            Found in annif/backend/http.py by radon

            Cyclomatic complexity is too high in function _documents_to_corpus. (8)
            Open

            def _documents_to_corpus(
            documents: list[dict[str, Any]],
            subject_index: SubjectIndex | None,
            ) -> annif.corpus.document.DocumentList:
            if subject_index is not None:
            Severity: Minor
            Found in annif/rest.py by radon

            Cyclomatic complexity is too high in function run_optimize. (8)
            Open

            @cli.command("optimize")
            @cli_util.project_id
            @click.argument("paths", type=click.Path(exists=True), nargs=-1)
            @click.option(
            "--jobs", "-j", default=1, help="Number of parallel jobs (0 means all CPUs)"
            Severity: Minor
            Found in annif/cli.py by radon

            Similar blocks of code found in 2 locations. Consider refactoring.
            Open

            def get_vocab(vocab_id: str, min_access: Access = Access.private) -> AnnifVocabulary:
            """return a single AnnifVocabulary by vocabulary id"""
             
            vocabs = get_vocabs(min_access)
            try:
            Severity: Major
            Found in annif/registry.py and 1 other location - About 2 hrs to fix
            annif/registry.py on lines 121..128

            Cyclomatic complexity is too high in method tokenize_words. (7)
            Open

            def tokenize_words(self, text: str, filter: bool = True) -> list[str]:
            lemmas = [
            lemma
            for lemma in (token.lemma_ for token in self.nlp(text.strip()))
            if (not filter or self.is_valid_token(lemma))
            Severity: Minor
            Found in annif/analyzer/spacy.py by radon

            Cyclomatic complexity is too high in function run_upload. (7)
            Open

            @cli.command("upload")
            @click.argument("project_ids_pattern", shell_complete=cli_util.complete_param)
            @click.argument("repo_id")
            @click.option(
            "--token",
            Severity: Minor
            Found in annif/cli.py by radon

            Cyclomatic complexity is too high in function run_suggest. (7)
            Open

            @cli.command("suggest")
            @cli_util.project_id
            @click.argument(
            "paths", type=click.Path(dir_okay=False, exists=True, allow_dash=True), nargs=-1
            )
            Severity: Minor
            Found in annif/cli.py by radon

            Cyclomatic complexity is too high in function run_index. (7)
            Open

            @cli.command("index")
            @cli_util.project_id
            @click.argument("directory", type=click.Path(exists=True, file_okay=False))
            @click.option(
            "--suffix", "-s", default=".annif", help="File name suffix for result files"
            Severity: Minor
            Found in annif/cli.py by radon

            Cyclomatic complexity is too high in function filter_suggestion. (7)
            Open

            def filter_suggestion(
            preds: csr_array,
            limit: int | None = None,
            threshold: float = 0.0,
            ) -> csr_array:
            Severity: Minor
            Found in annif/suggestion.py by radon

            Cyclomatic complexity is too high in method _create_train_file. (7)
            Open

            def _create_train_file(self, veccorpus: csr_matrix, corpus: DocumentCorpus) -> None:
            self.info("creating train file")
            path = os.path.join(self.datadir, self.TRAIN_FILE)
            with open(path, "w", encoding="utf-8") as trainfile:
            # Extreme Classification Repository format header line
            Severity: Minor
            Found in annif/backend/omikuji.py by radon

            Similar blocks of code found in 2 locations. Consider refactoring.
            Open

            def get_project(project_id: str, min_access: Access = Access.private) -> AnnifProject:
            """return the definition of a single Project by project_id"""
             
            projects = get_projects(min_access)
            try:
            Severity: Major
            Found in annif/registry.py and 1 other location - About 2 hrs to fix
            annif/registry.py on lines 146..153

            Cyclomatic complexity is too high in method save. (7)
            Open

            def save(self, path: str) -> None:
            """Save this subject index into a file with the given path name."""
             
            fieldnames = ["uri", "notation"] + [f"label_{lang}" for lang in self._languages]
             
             
            Severity: Minor
            Found in annif/vocab/subject_index.py by radon

            Similar blocks of code found in 2 locations. Consider refactoring.
            Open

            def list_vocabs() -> tuple:
            """return a dict with vocabularies formatted according to OpenAPI spec"""
             
            result = {
            "vocabs": [
            Severity: Major
            Found in annif/rest.py and 1 other location - About 1 hr to fix
            annif/rest.py on lines 74..83

            Similar blocks of code found in 2 locations. Consider refactoring.
            Open

            def list_projects() -> tuple:
            """return a dict with projects formatted according to OpenAPI spec"""
             
            result = {
            "projects": [
            Severity: Major
            Found in annif/rest.py and 1 other location - About 1 hr to fix
            annif/rest.py on lines 62..71
            Severity
            Category
            Status
            Source
            Language