KarrLab/datanator_query_python

View on GitHub
datanator_query_python/query/query_metabolites_meta.py

Summary

Maintainability
B
5 hrs
Test Coverage
A
95%
from datanator_query_python.util import mongo_util, chem_util, file_util
import numpy as np
from pymongo.collation import Collation, CollationStrength


class QueryMetabolitesMeta(mongo_util.MongoUtil):
    '''Queries specific to metabolites_meta collection
    '''

    def __init__(self, cache_dirname=None, MongoDB=None, replicaSet=None, db=None,
                 collection_str='metabolites_meta', verbose=False, max_entries=float('inf'), username=None,
                 password=None, authSource='admin', readPreference='nearest'):
        self._collection_str = collection_str
        self.verbose = verbose
        super().__init__(cache_dirname=cache_dirname, MongoDB=MongoDB,
                        replicaSet=replicaSet, db=db,
                        verbose=verbose, max_entries=max_entries, username=username,
                        password=password, authSource=authSource,
                        readPreference=readPreference)
        self._collection = self.client.get_database("datanator-test")[collection_str]
        self.e_client, self.e_db_obj, self.e_collection = self.con_db('ecmdb')
        self.y_client, self.y_db_obj, self.y_collection = self.con_db('ymdb')        
        self.file_manager = file_util.FileUtil()
        self.chem_manager = chem_util.ChemUtil()
        self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY)

    def get_metabolite_synonyms(self, compounds):
        ''' Find synonyms of a compound

            Args:
                compound (list): name(s) of the compound e.g. "ATP", ["ATP", "Oxygen", ...]

            Returns:
                synonyms: dictionary of synonyms of the compounds
                        {'ATP': [], 'Oxygen': [], ...}
                rxns: dictionary of rxns in which each compound is found
                    {'ATP': [12345,45678,...], 'Oxygen': [...], ...}
        '''
        synonyms = {}
        rxns = {}

        def find_synonyms_of_str(c):
            if len(c) != 0:
                query = {'synonyms': c}
                projection = {'synonyms': 1, '_id': -1, 'kinlaw_id': 1}
                collation = {'locale': 'en', 'strength': 2}
                doc = self._collection.find_one(
                    filter=query, projection=projection, collation=collation)
                synonym = {}
                rxn = {}
                try:
                    synonym[c] = doc['synonyms']
                    rxn[c] = doc['kinlaw_id']
                except TypeError as e:
                    synonym[c] = (c + ' does not exist in ' +
                                  self._collection_str)
                    rxn[c] = (c + ' does not exist in ' + self._collection_str)
                return rxn, synonym
            else:
                return ({'reactions': None}, {'synonyms': None})

        if len(compounds) == 0:
            return ({'reactions': None}, {'synonyms': None})
        elif isinstance(compounds, str):
            rxn, syn = find_synonyms_of_str(compounds)
            synonyms.update(syn)
            rxns.update(rxn)
        else:
            for c in compounds:
                rxn, syn = find_synonyms_of_str(c)
                synonyms.update(syn)
                rxns.update(rxn)
        return rxns, synonyms

    def get_metabolite_inchi(self, compounds):
        '''Given a list of compound name(s) Return the corrensponding inchi string

            Args:
                compounds: list of compounds
                ['ATP', '2-Ketobutanoate']

            Returns:
                ['....', 'InChI=1S/C4H6O3/c1-2-3(5)4(6)7/...']
        '''
        inchi = []
        projection = {'_id': 0, 'inchi': 1, 'm2m_id': 1, 'ymdb_id': 1}
        collation = {'locale': 'en', 'strength': 2}
        for compound in compounds:
            cursor = self._collection.find_one({'$or': [{'synonyms': compound},
                                                       {'name': compound}]},
                                              projection=projection, collation=collation)
            if cursor is None:
                inchi.append(
                    {"inchi": 'No inchi found.', "m2m_id": 'No ECMDB record found.',
                    "ymdb_id": 'No YMDB record found.'})
            else:                
                inchi.append(
                    {"inchi": cursor['inchi'], "m2m_id": cursor.get('m2m_id', None),
                    "ymdb_id": cursor.get('ymdb_id', None)})
        return inchi

    def get_ids_from_hash(self, hashed_inchi):
        ''' Given a hashed inchi string, find its
            corresponding m2m_id and/or ymdb_id
            Args:
                hashed_inchi (`obj`: str): string of hashed inchi
            Returns:
                result (`obj`: dict): dictionary of ids and their keys
                    {'m2m_id': ..., 'ymdb_id': ...}
        '''
        query = {'InChI_Key': hashed_inchi}
        projection = {'_id': 0}
        doc = self._collection.find_one(filter=query, projection=projection)
        result = {}
        result['m2m_id'] = doc.get('m2m_id', None)
        result['ymdb_id'] = doc.get('ymdb_id', None)

        return result

    def get_ids_from_hashes(self, hashed_inchi):
        ''' Given a list of hashed inchi string, find their
            corresponding m2m_id and/or ymdb_id
            Args:
                hashed_inchi (`obj`: list of `obj`: str): list of hashed inchi
            Returns:
                result (`obj`: list of `obj`: dict): dictionary of ids and their keys
                    [{'m2m_id': ..., 'ymdb_id': ..., 'InChI_Key': ...}, {}, ..]
        '''
        query = {'InChI_Key': {'$in': hashed_inchi}}
        projection = {'m2m_id': 1, 'ymdb_id': 1, 'InChI_Key': 1}
        docs = self._collection.find(filter=query, projection=projection)
        result = []
        if docs is None: 
            return result
        else:
            for doc in docs:
                dic = {}
                dic['m2m_id'] = doc.get('m2m_id', None)
                dic['ymdb_id'] = doc.get('ymdb_id', None)
                dic['InChI_Key'] = doc.get('InChI_Key', None)
                result.append(dic)
            return result

    def get_metabolite_hashed_inchi(self, compounds):
        ''' Given a list of compound name(s)
            Return the corresponding hashed inchi string
            Args:
                compounds: ['ATP', '2-Ketobutanoate']
            Return:
                hashed_inchi: ['3e23df....', '7666ffa....']
        '''
        hashed_inchi = []
        projection = {'_id': 0, 'InChI_Key': 1}
        collation = {'locale': 'en', 'strength': 2}
        for compound in compounds:
            cursor = self._collection.find_one({'$or': [{'synonyms': compound},
                                                        {'name': compound}]},
                                              projection=projection, collation=collation)
            if cursor is None:
                hashed_inchi.append('No inchi key found.')
            else:
                hashed_inchi.append(cursor['InChI_Key'])
        return hashed_inchi

    def get_metabolite_name_by_hash(self, compounds):
        ''' Given a list of hashed inchi, 
            return a list of name (one of the synonyms)
            for each compound
            Args:
                compounds: list of compounds in inchikey format
            Return:
                result: list of names
                    [name, name, name]
        '''
        result = []
        projection = {'_id': 0, 'synonyms': 1}
        collation = {'locale': 'en', 'strength': 2}
        for compound in compounds:
            cursor = self._collection.find_one({'InChI_Key': compound},
                                              projection=projection)
            if cursor is None:
                result.append(['None'])
                continue
            if not isinstance(cursor['synonyms'], list):
                cursor['synonyms'] = [cursor['synonyms']]
            result.append(cursor.get('synonyms', ['None']))
            # except TypeError:
            #     result.append(['None'])
        return [x[-1] for x in result]

    def get_unique_metabolites(self):
        """Get number of unique metabolites.

        Return:
            (:obj:`int`): number of unique metabolites.
        """
        return len(self._collection.distinct('InChI_Key', collation=self.collation))

    def get_metabolites_meta(self, inchi_key):
        """Get metabolite's meta information given inchi_key.

        Args:
            (:obj:`str`): InChI Key of metabolites

        Return:
            (:obj:`dict`): meta information object.
        """
        projection = {'_id': 0, 'reaction_participants': 0, 'similar_compounds': 0}
        query = {'InChI_Key': inchi_key}
        doc = self._collection.find_one(filter=query, projection=projection, collation=self.collation)
        if doc is None:
            return {}
        else:
            return doc

    def get_eymeta(self, inchi_key):
        """Get meta info from ECMDB or YMDB
        
        Args:
            inchi_key (:obj:`str`): inchikey / name of metabolite molecule.

        Return:
            (:obj:`Obj`): meta information.
        """
        projection = {'_id': 0, 'concentrations': 0}
        con_0 = {'name': inchi_key}
        con_1 = {'synonyms.synonym': inchi_key}
        con_2 = {'inchikey': inchi_key}
        query = {'$or': [con_0, con_1, con_2]}
        doc = self.e_collection.find_one(filter=query, projection=projection, collation=self.collation)
        if doc is not None:
            return doc
        else:
            doc = self.y_collection.find_one(filter=query, projection=projection, collation=self.collation)
            return doc

    def get_doc_by_name(self, names):
        """Get document by metabolite's list of possible names.

        Args:
            names(:obj:`list` of :obj:`str`): Name of possible names.

        Return:
            (:obj:`Obj`)
        """
        con_0 = {'name': {'$in': names}}
        con_1 = {'synonyms': {'$in': names}}
        query = {'$or': [con_0, con_1]}
        return self._collection.find_one(filter=query, collation=self.collation)