greenelab/adage-server

View on GitHub
adage/analyze/api.py

Summary

Maintainability
C
1 day
Test Coverage
from django.conf.urls import url
from django.core.exceptions import ObjectDoesNotExist
from django.db.models import Q
from django.http import HttpResponse
from organisms.api import OrganismResource
from genes.api import GeneResource
from tastypie import fields, http
from tastypie.resources import Resource, ModelResource, convert_post_to_VERB
from tastypie.utils import trailing_slash
from tastypie.bundle import Bundle
from tastypie.exceptions import BadRequest
from haystack.query import SearchQuerySet
from haystack.inputs import AutoQuery
from models import (
    Experiment, Sample, SampleAnnotation, AnnotationType, MLModel, Signature,
    Activity, Edge, ParticipationType, Participation, ExpressionValue
)

# Many helpful hints for this implementation came from:
# https://michalcodes4life.wordpress.com/2013/11/26/custom-tastypie-resource-from-multiple-django-models/


class SearchItemObject(object):
    pass


class SearchResource(Resource):
    item_type = fields.CharField(attribute='item_type')
    pk = fields.CharField(attribute='pk')
    description = fields.CharField(attribute='description')
    snippet = fields.CharField(attribute='snippet')
    related_items = fields.ListField(attribute='related_items')

    class Meta:
        resource_name = 'search'
        allowed_methods = ['get']
        object_class = SearchItemObject

    def resource_uri_kwargs(self, bundle_or_obj):
        kwargs = {
            'resource_name': self._meta.resource_name,
        }

        if self._meta.api_name is not None:
            kwargs['api_name'] = self._meta.api_name
        if bundle_or_obj is not None:
            if isinstance(bundle_or_obj, Bundle):
                obj = bundle_or_obj.obj
            else:
                obj = bundle_or_obj
            kwargs['resource_name'] = obj.item_type
            kwargs['pk'] = obj.pk

        return kwargs

    def obj_get_list(self, request=None, **kwargs):
        if not request:
            request = kwargs['bundle'].request
        return self.get_object_list(request)

    def get_object_list(self, request):
        # unpack the query string from the request headers
        query_str = request.GET.get('q', '')

        # restrict our search to the Experiment and Sample models
        sqs = SearchQuerySet().models(Experiment, Sample)

        # run the query and specify we want highlighted results
        sqs = sqs.filter(content=AutoQuery(query_str)).load_all().highlight()

        object_list = []
        for result in sqs:
            new_obj = SearchItemObject()

            new_obj.pk = result.pk
            if result.model_name == 'experiment':
                new_obj.item_type = result.model_name
                new_obj.description = result.object.name
                e = Experiment.objects.get(pk=result.pk)
                new_obj.related_items = [s.pk for s in e.sample_set.all()]
            elif result.model_name == 'sample':
                new_obj.item_type = result.model_name
                new_obj.description = result.object.name
                s = Sample.objects.get(pk=result.pk)
                new_obj.related_items = [e.pk for e in s.experiments.all()]
            else:
                new_obj.item_type = result.model_name
                new_obj.description = result.verbose_name
                new_obj.related_items = []
            new_obj.snippet = ' ...'.join(result.highlighted)
            object_list.append(new_obj)

        return object_list


class ExperimentResource(ModelResource):
    sample_set = fields.ManyToManyField(
        'analyze.api.SampleResource', 'sample_set')

    class Meta:
        queryset = Experiment.objects.all()
        allowed_methods = ['get']
        filtering = {
            'signature': ('exact', ),  # See apply_filters().
        }

    # Implementation of "signature" filter, which allows the API to get
    # the experiements that are related to a given signature.  According
    # to the database schema, "Signature" model has many-to-many
    # relationship with "Sample" model through "Activity" model; and
    # "Sample" model has (implicit) many-to-many relationship with
    # "Experiment" model through the "experiments" field in "Sample".
    def apply_filters(self, request, applicable_filters):
        object_list = super(ExperimentResource, self).apply_filters(
            request, applicable_filters)
        signature = request.GET.get('signature', None)
        if signature:
            # Catch ValueError exception that may be raised by int() below,
            # and raise a customized BadRequest exception with more details.
            try:
                sig_id = int(signature)
            except ValueError:
                raise BadRequest("Invalid signature ID: %s" % signature)

            samples = Activity.objects.filter(signature=sig_id).values(
                'sample')
            experiments = Sample.objects.filter(pk__in=samples).values(
                'experiments').distinct()
            object_list = object_list.filter(pk__in=experiments)
        return object_list


class AnnotationTypeResource(ModelResource):
    class Meta:
        queryset = AnnotationType.objects.all()
        allowed_methods = ['get']


class SampleResource(ModelResource):
    annotations = fields.DictField(attribute='get_annotation_dict')

    class Meta:
        queryset = Sample.objects.all()
        allowed_methods = ['get']
        experiments_allowed_methods = allowed_methods
        annotations_allowed_methods = allowed_methods
        filtering = {
            'id': ('in',),
            'experiment': ('exact', ),  # Implemented in apply_filters()
        }

    def prepend_urls(self):
        return [
            url((r'^(?P<resource_name>%s)/'
                 r'(?P<pk>[0-9]+)/get_experiments%s$') %
                (self._meta.resource_name, trailing_slash()),
                self.wrap_view('dispatch_experiments'),
                name='api_get_experiments'),
            url((r'^(?P<resource_name>%s)/'
                 r'get_annotations%s$') %
                (self._meta.resource_name, trailing_slash()),
                self.wrap_view('dispatch_annotations'),
                name='api_get_annotations'),
        ]

    def dispatch_experiments(self, request, **kwargs):
        """
        This handler takes a URL request (see above) and dispatches it to the
        get_experiments method via Tastypie's built-in ModelResource.dispatch()
        (inherited from Resource). We do this in order to take advantage of the
        Tastypie-supplied dispatcher, which properly checks if a method is
        allowed, does request throttling and authentication (if required). See
        Resource.dispatch() for details.
        """
        return self.dispatch('experiments', request, **kwargs)

    def get_experiments(self, request, pk=None, **kwargs):
        if pk:
            try:
                sample_obj = Sample.objects.get(pk=pk)
            except ObjectDoesNotExist:
                return http.HttpNotFound()
        else:
            return http.HttpNotFound()
        objects = []
        er = ExperimentResource()
        for e in sample_obj.experiments.all():
            bundle = er.build_bundle(obj=e, request=request)
            bundle = er.full_dehydrate(bundle)
            objects.append(bundle)

        return self.create_response(request, objects)

    def dispatch_annotations(self, request, **kwargs):
        """
        This handler takes a URL request (see above) and dispatches it to the
        get_annotations method via Tastypie's built-in ModelResource.dispatch()
        (inherited from Resource). We do this in order to take advantage of the
        Tastypie-supplied dispatcher, which properly checks if a method is
        allowed, does request throttling and authentication (if required). See
        Resource.dispatch() for details.
        """
        return self.dispatch('annotations', request, **kwargs)

    @staticmethod
    def get_annotations(request=None, **kwargs):
        """
        Return a tab-delimited file containing all samples for every Experiment
        with Sample properties for each and all annotations (by default) or, if
        `annotation_types` is specified, return only those annotation types for
        each sample, and do so in the order specified.
        """
        if 'annotation_types' in kwargs:
            # an explicitly-passed annotation_types param takes precedence
            annotation_types = kwargs['annotation_types']
        elif request and 'annotation_types' in request.GET:
            # process the comma-separated list of typenames
            annotation_types = request.GET['annotation_types'].split(",")
        else:
            # default: supply all AnnotationTypes in alphabetical order
            annotation_types = [
                at.typename
                for at in AnnotationType.objects.order_by('typename')
            ]
        rows = []
        # include a header as the first row
        headers = ['experiment', 'sample_name', 'ml_data_source']
        headers.extend(annotation_types)
        rows.append(u'\t'.join(headers) + u'\n')
        for e in Experiment.objects.all():
            for s in e.sample_set.all():
                sa = SampleAnnotation.objects.get_as_dict(s)
                ml_data_source = s.ml_data_source if s.ml_data_source else ''
                sa_cols = [e.accession, s.name, ml_data_source, ]
                for at in annotation_types:
                    sa_cols.append(sa.get(at, ''))
                rows.append(u'\t'.join(sa_cols) + u'\n')
        response = HttpResponse(rows, content_type='text/tab-separated-values')
        response['Content-Disposition'] = (
            'attachment; filename="sample_annotations.tsv"')
        return response

    def apply_filters(self, request, applicable_filters):
        """
        Implementation of "experiment" filter, which allows the API to
        get all samples that are related to a given experiment (whose
        primary key is "accession").
        """
        object_list = super(SampleResource, self).apply_filters(
            request, applicable_filters)
        experiment = request.GET.get('experiment', None)
        if experiment:
            try:
                e = Experiment.objects.get(pk=experiment)
            except ObjectDoesNotExist:
                # Return an empty set if the experiment is not found.
                return Sample.objects.none()
            samples = e.sample_set.all()
            object_list = object_list.filter(pk__in=samples)
        return object_list


class MLModelResource(ModelResource):
    organism = fields.ForeignKey(OrganismResource, "organism", full=True)

    class Meta:
        queryset = MLModel.objects.all()
        allowed_methods = ['get']


class SignatureResource(ModelResource):
    mlmodel = fields.ForeignKey(MLModelResource, "mlmodel",
                                full=True, full_list=False)

    class Meta:
        queryset = Signature.objects.all()
        resource_name = 'signature'
        allowed_methods = ['get']
        multiple_allowed_methods = ['post']
        filtering = {
            'name': ('exact', 'in', ),
            'heavy_genes': ('exact', ),  # New filter, see apply_filters().
            'mlmodel': ('exact', ),
        }
        ordering = ['name']

    def prepend_urls(self):
        return [
            url((r'^(?P<resource_name>%s)/'
                 r'post_multiple%s$') %
                (self._meta.resource_name, trailing_slash()),
                self.wrap_view('dispatch_multiple'),
                name='api_post_multiple'),
        ]

    def dispatch_multiple(self, request, **kwargs):
        return self.dispatch('multiple', request, **kwargs)

    def post_multiple(self, request, **kwargs):
        """
        use POSTs for retrieving long lists of Signatures
        """
        # we use the built-in Tastypie get_multiple implementation
        # but to do so, we need to convert the request to a GET
        request.method = 'GET'  # override the incoming POST
        converted_request = convert_post_to_VERB(request, 'GET')
        kwarg_name = '%s_list' % self._meta.detail_uri_name
        kwargs[kwarg_name] = converted_request.body
        return self.get_multiple(converted_request, **kwargs)

    def apply_filters(self, request, applicable_filters):
        object_list = super(SignatureResource, self).apply_filters(
            request, applicable_filters)
        heavy_genes = request.GET.get('heavy_genes', None)
        if heavy_genes:
            # Instead of relying on tastypie/resources.py to catch the
            # ValueError exception that may be raised in this function,
            # we catch the one that may be raised by int() below and
            # raise a customized BadRequest exception with more details.
            try:
                # Convert genes to a set of integers so that the
                # duplicate(s) will be removed implicitly.
                query_genes = {int(id) for id in heavy_genes.split(',')}
            except ValueError:
                raise BadRequest("Invalid gene IDs: %s" % heavy_genes)

            for (i, q) in enumerate(query_genes):
                q_signatures = {p.signature.id for p in
                                Participation.objects.filter(gene=q)}
                if i == 0:
                    related_signatures = q_signatures
                else:
                    related_signatures = related_signatures & q_signatures
            object_list = object_list.filter(id__in=related_signatures)
        return object_list


class ActivityResource(ModelResource):
    sample = fields.IntegerField(attribute='sample_id', null=False)
    signature = fields.IntegerField(attribute='signature_id', null=False)

    class Meta:
        queryset = Activity.objects.all()
        resource_name = 'activity'
        allowed_methods = ['get']
        include_resource_uri = False
        limit = 0      # Disable pagination
        max_limit = 0  # Disable pagination
        filtering = {
            'sample': ('exact', 'in', ),
            'signature': ('exact', 'in', ),
            'mlmodel': ('exact', ),  # See apply_filters()
        }
        ordering = [
            'sample', 'signature'
        ]

    def apply_filters(self, request, applicable_filters):
        """
        Instead of overriding prepend_url() method, we added a new
        filter "mlmodel" to retrieve the activity records of a specific
        mlmodel ID:
          api/v0/activity/?mlmodel=<mlmodel_id>&...
        """
        object_list = super(ActivityResource, self).apply_filters(
            request, applicable_filters)
        mlmodel = request.GET.get('mlmodel', None)
        if mlmodel:
            # Instead of relying on tastypie/resources.py to catch the
            # ValueError exception that may be raised in this function,
            # we catch the one that may be raised by int() below and
            # raise a customized BadRequest exception with more details.
            try:
                mlmodel_id = int(mlmodel)
            except ValueError:
                raise BadRequest("Invalid mlmodel ID: %s" % mlmodel)
            object_list = object_list.filter(signature__mlmodel=mlmodel_id)
        return object_list


class EdgeResource(ModelResource):
    gene1 = fields.IntegerField(attribute='gene1_id', null=False)
    gene2 = fields.IntegerField(attribute='gene2_id', null=False)
    mlmodel = fields.IntegerField(attribute='mlmodel_id', null=False)

    class Meta:
        queryset = Edge.objects.all()
        resource_name = 'edge'
        allowed_methods = ['get']
        include_resource_uri = False
        limit = 0      # Disable default pagination
        max_limit = 0  # Disable default pagination
        filtering = {
            'gene1': ('exact', 'in', ),
            'gene2': ('exact', 'in', ),
            'mlmodel': ('exact', 'in', ),
            'genes': ('exact', ),  # New filter, see apply_filters().
        }
        # Allow ordering by weight.
        # The following URL will sort the edges in ascending order of
        # weight:
        #   "api/v0/edge/?field=value&order_by=weight&format=json"
        # The following URL will sort the edges in descending order of
        # weight:
        #   "api/v0/edge/?field=value&order_by=-weight&format=json"
        # (Note the extra "-" character before "weight".)
        ordering = ['weight']

    def apply_filters(self, request, applicable_filters):
        """
        Instead of overriding prepend_url() method, we added a new
        filter "genes" to retrieve the edges whose "gene1" or "gene2"
        field is on the list of genes in a URL like this:
          api/v0/edge/?genes=id1,id2,...&...
        """
        object_list = super(EdgeResource, self).apply_filters(
            request, applicable_filters)
        genes = request.GET.get('genes', None)
        if genes:
            # Instead of relying on tastypie/resources.py to catch the
            # ValueError exception that may be raised in this function,
            # we catch the one that may be raised by int() below and
            # raise a customized BadRequest exception with more details.
            try:
                # Convert genes to a set of integers so that the
                # duplicate(s) will be removed implicitly.
                ids = {int(id) for id in genes.split(',')}
            except ValueError:
                raise BadRequest("Invalid gene IDs: %s" % genes)
            # "in" operator in Django supports both list and set.
            qset = Q(gene1__in=ids) | Q(gene2__in=ids)
            direct_edges = object_list.filter(qset).distinct()
            related_genes = set()
            for e in direct_edges:
                related_genes.add(e.gene1)
                related_genes.add(e.gene2)
            object_list = object_list.filter(
                gene1__in=related_genes, gene2__in=related_genes
            ).distinct()
        return object_list


class ParticipationTypeResource(ModelResource):

    class Meta:
        queryset = ParticipationType.objects.all()
        allowed_methods = ['get']
        limit = 0      # Disable default pagination
        max_limit = 0  # Disable default pagination


class ParticipationResource(ModelResource):
    """To avoid unnecessary table joins and improve the query
    performance, only "gene" is enabled as a foreign key (because the
    Signature page on frontend needs the gene details given a certain
    signature); "signature" is not set as a foreign key because at this
    time, we don't intend to do a query that returns signature details
    given an input gene.
    """
    signature = fields.IntegerField(attribute='signature_id', null=False)
    gene = fields.ForeignKey(GeneResource, "gene", full=True)
    participation_type = fields.ForeignKey(ParticipationTypeResource,
                                           "participation_type", full=True)

    class Meta:
        queryset = Participation.objects.all()
        allowed_methods = ['get']
        include_resource_uri = False
        limit = 0      # Disable default pagination
        max_limit = 0  # Disable default pagination
        filtering = {
            'signature': ('exact', 'in', ),
            'gene': ('exact', 'in', ),
            'participation_type': ('exact', 'in', ),
            'related_genes': ('exact')  # see apply_filters()
        }

    def apply_filters(self, request, applicable_filters):
        """Implementation of "related_genes" filter, which allows the
        API to get all participation records of signatures that include
        at least one of the entries in "related_genes" list.
        This filter is used to calculate enriched signatures given a
        list of genes.
        """
        object_list = super(ParticipationResource, self).apply_filters(
            request, applicable_filters)
        related_genes = request.GET.get('related_genes', None)
        if related_genes:
            try:
                query_genes = {int(id) for id in related_genes.split(',')}
            except ValueError:
                raise BadRequest("Invalid gene IDs: %s" % related_genes)

            signatures = Participation.objects.filter(
                gene__in=query_genes).values('signature').distinct()
            object_list = object_list.filter(signature__in=signatures)
        return object_list


class ExpressionValueResource(ModelResource):
    gene = fields.IntegerField(attribute='gene_id', null=False)
    sample = fields.IntegerField(attribute='sample_id', null=False)

    class Meta:
        queryset = ExpressionValue.objects.all()
        include_resource_uri = False
        list_allowed_methods = ['get', 'post']
        detail_allowed_methods = ['get']
        limit = 0
        max_limit = 0
        filtering = {
            'gene': ('exact', 'in', ),
            'sample': ('exact', 'in', ),
        }
        # Allow ordering by gene ID.
        ordering = ['gene']

    def post_list(self, request, **kwargs):
        """
        handle an incoming POST as a GET to work around URI length limitations
        """
        # The convert_post_to_VERB() technique is borrowed from
        # resources.py in tastypie source. This helps us to convert the POST
        # to a GET in the proper way internally.
        request.method = 'GET'  # override the incoming POST
        dispatch_request = convert_post_to_VERB(request, 'GET')
        return self.dispatch('list', dispatch_request, **kwargs)