KarrLab/datanator_rest_api

View on GitHub
datanator_rest_api/util/taxon_distance.py

Summary

Maintainability
A
1 hr
Test Coverage
A
100%
from datanator_query_python.config import query_manager
from collections import deque


class TaxonDist:

    def __init__(self, debugging=False):
        self.debugging = debugging

    def get_dist_object(self, doc, queried_species, distance_obj,
                    target_species, org_format='tax_id',
                    tax_field='species_name', obj_name='taxon_distance'):
        """Return taxon_distance object for frontend.

        Args:
            doc(:obj:`Obj`): documents that need to be processed.
            queried_species (:obj:`deque`): already queried species.
            distance_obj (:obj:`Obj`): distance objects containing already queried species.
            target_species (:obj:`str` or :obj:`int`): target species.
            org_format (:obj:`str`, optional): format of species identifier (tax_id or tax_name). Defaults to 'tax_id'.
            tax_field(:obj:`str`, optional): field containing taxon information in documents.
            obj_name(:obj:`str`, optional): name of the object containing taxon distance information.

        Return:
            (:obj:`tuple` of :obj:`list`, :obj:`Obj`, :obj:`Obj`)
        """
        manager = query_manager.TaxonManager().txn_manager()
        name = doc[tax_field]
        if name not in queried_species:
            dist = manager.get_canon_common_ancestor_fast(name, target_species, org_format=org_format)
            distance_obj[name] = dist
            queried_species.append(name)
            doc[obj_name] = dist
            if self.debugging:
                doc['queried'] = True
        else:
            doc[obj_name] = distance_obj[name]
            if self.debugging:
                doc['queried'] = False
        return queried_species, distance_obj, doc

    def arrange_distance_objs(self, docs, target_species='homo sapiens',
                              tax_field='taxon_name', org_format='tax_name'):
        """Arrange the distance object returned into arrays.

        Args:
            docs(:obj:`Iter`): Documents that need calculation of taxon_distance and rearranging.
            target_species(:obj:`str` or :obj:`int`, optional): User input for target species.
            tax_field(:obj:`str`, optional): Field containing taxon information in documents.
            org_format (:obj:`str`, optional): format of species identifier (tax_id or tax_name). Defaults to 'tax_id'.

        Return:
            (:obj:`list` of :obj:`Obj`): List of documents that have the taxon_distance objects.
        """
        queried_species = deque()
        distance_obj = {}
        result = []
        for doc in docs:
            queried_species, distance_obj, doc = self.get_dist_object(doc, queried_species, distance_obj,
                                                                      target_species, tax_field=tax_field, org_format=org_format)
            result.append(doc)
        return result