datanator_query_python/query/query_kegg_orthology.py
from datanator_query_python.util import mongo_util
from pymongo.collation import Collation, CollationStrength
class QueryKO(mongo_util.MongoUtil):
def __init__(self, username=None, password=None, server=None, authSource='admin',
database='datanator', max_entries=float('inf'), verbose=True,
readPreference='nearest', replicaSet=None):
super().__init__(MongoDB=server, username=username,
password=password, authSource=authSource, db=database,
readPreference=readPreference, replicaSet=replicaSet)
self.max_entries = max_entries
self.verbose = verbose
self.client, self.db, self.collection = self.con_db('kegg_orthology')
self.ortho = self.db_obj["orthodb"]
self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY)
def get_ko_by_name(self, name):
'''Get a gene's ko number by its gene name
Args:
name: (:obj:`str`): gene name
Returns:
result: (:obj:`str`): ko number of the gene
'''
query = {'gene_name': name}
projection = {'gene_name': 1, 'kegg_orthology_id': 1}
collation = {'locale': 'en', 'strength': 2}
docs = self.collection.find_one(
filter=query, projection=projection, collation=collation)
if docs != None:
return docs['kegg_orthology_id']
else:
return None
def get_def_by_kegg_id(self, kegg_id):
"""Get kegg definition by kegg id
Args:
kegg_id (:obj:`str`): kegg orthology
Returns:
(:obj:`list` of :obj:`str`): list of kegg orthology definitions
"""
query = {'kegg_orthology_id': kegg_id}
projection = {'definition.name': 1, '_id': 0}
doc = self.collection.find_one(filter=query, projection=projection)
if doc is None:
return [None]
definitions = doc['definition']['name']
return definitions
def get_loci_by_id_org(self, kegg_id, org, gene_id):
"""Get ortholog locus id given kegg_id, organism code and gene_id.
Args:
kegg_id (:obj:`str`): Kegg ortholog id.
org (:obj:`str`): Kegg organism code.
gene_id (:obj:`str`): Gene id.
Return:
(:obj:`str`): locus id.
"""
con_0 = {'kegg_orthology_id': kegg_id}
con_1 = {'gene_ortholog.organism': org}
con_2 = {'gene_ortholog.genetic_info.gene_id': gene_id}
query = {'$and': [con_0, con_1, con_2]}
projection = {'_id': 0, 'gene_ortholog.$': 1}
doc = self.collection.find_one(filter=query, projection=projection, collation=self.collation)
if doc is None:
return {}
else:
obj = doc['gene_ortholog'][0]['genetic_info']
return next((item['locus_id'] for item in obj if item["gene_id"] == gene_id), None)
def get_meta_by_kegg_ids(self, kegg_ids, projection={'_id': 0, 'gene_ortholog': 0}):
"""Get meta given kegg ids
Args:
kegg_ids (:obj:`list` of :obj:`str`): List of kegg ids.
projection (:obj:`dict`): MongoDB result projection.
Return:
(:obj:`tuple` of :obj:`pymongo.Cursor` and :obj:`int`): pymongo Cursor obj and number of documents found.
"""
projection['__order'] = 0
query = {'kegg_orthology_id': {'$in': kegg_ids}}
pipeline = [
{'$match': {'kegg_orthology_id': {'$in': kegg_ids}}},
{'$addFields': {"__order": {'$indexOfArray': [kegg_ids, "$kegg_orthology_id" ]}}},
{'$sort': {"__order": 1}},
{"$project": projection}
]
docs = self.collection.aggregate(pipeline, collation=self.collation)
count = self.collection.count_documents(query, collation=self.collation)
return docs, count
def get_meta_by_ortho_ids(self, orthodb_ids, projection={'_id': 0, 'gene_ortholog': 0},
limit=0):
"""Get meta given kegg ids
Args:
orthodb_ids (:obj:`list` of :obj:`str`): List of orthodb ids.
projection (:obj:`dict`): MongoDB result projection.
Return:
(:obj:`tuple` of :obj:`pymongo.Cursor` and :obj:`int`): pymongo Cursor obj and number of documents found.
"""
projection['__order'] = 0
query = {'orthodb_id': {'$in': orthodb_ids}}
pipeline = [
{'$match': {'orthodb_id': {'$in': orthodb_ids}}},
{'$addFields': {"__order": {'$indexOfArray': [orthodb_ids, "$orthodb_id" ]}}},
{'$sort': {"__order": 1}},
{"$project": projection}
]
docs = self.ortho.aggregate(pipeline)
count = self.ortho.count_documents(query)
return docs, count
def get_meta_by_kegg_id(self, kegg_id):
"""Get meta information by kegg_id
Args:
kegg_id (:obj:`str`): Kegg ID.
Return:
(:obj:`Obj`): Kegg meta object.
"""
projection = {'_id': 0}
query = {'kegg_orthology_id': kegg_id}
doc = self.collection.find_one(filter=query, projection=projection, collation=self.collation)
return doc