adsabs/biblib-service

View on GitHub
biblib/views/document_view.py

Summary

Maintainability
F
1 wk
Test Coverage
"""
Document view
"""

from biblib.utils import err, get_post_data, get_GET_params
from biblib.client import client
from biblib.utils import err, get_post_data
from biblib.models import Library, Permissions
from biblib.views.base_view import BaseView
from flask import request, current_app
from flask_discoverer import advertise
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy import Boolean
from biblib.views.http_errors import MISSING_USERNAME_ERROR, DUPLICATE_LIBRARY_NAME_ERROR, \
    WRONG_TYPE_ERROR, NO_PERMISSION_ERROR, MISSING_LIBRARY_ERROR, \
    BAD_LIBRARY_ID_ERROR, INVALID_BIBCODE_SPECIFIED_ERROR, BAD_QUERY_ERROR
from biblib.biblib_exceptions import PermissionDeniedError


class DocumentView(BaseView):
    """
    End point to interact with a specific library, by adding documents and
    removing documents. You also use this endpoint to delete the entire
    library as this method should be scoped.
    """
    # TODO: adding tags using PUT for RESTful endpoint?

    decorators = [advertise('scopes', 'rate_limit')]
    scopes = ['user']
    rate_limit = [1000, 60*60*24]

    @classmethod
    def add_document_to_library(cls, library_id, document_data):
        """
        Adds a document to a user's library
        :param library_id: the library id to update
        :param document_data: the meta data of the document

        :return: number_added: number of documents successfully added
        """
        current_app.logger.info('Adding a document: {0} to library_uuid: {1}'
                                .format(document_data, library_id))

        with current_app.session_scope() as session:
            # Find the specified library
            library = session.query(Library).filter_by(id=library_id).one()

            start_length = len(library.bibcode)

            #Validate supplied bibcodes to confirm they exist in SOLR
            valid_bibcodes = []
            doc_bibcodes = document_data['bibcode']
            len_bibcodes = len(doc_bibcodes)
            page_size = min(current_app.config.get('BIGQUERY_MAX_ROWS', len_bibcodes), len_bibcodes)
            #Check if there are more than the allowed number of bibcodes. Iterate over pages if needed.
            pages = len_bibcodes // page_size + (len_bibcodes % page_size > 0)
            
            for page in range(0, pages):
                solr_resp, status_code = cls.query_valid_bibcodes(doc_bibcodes, start=page*page_size, rows=page_size)

                if "error" in solr_resp.keys():
                    #If SOLR request fails, pass the error back to the user
                    current_app.logger.error("Failed to retrieve bibcodes with error: {}".format(solr_resp.get("error")))
                    output_dict = {"error": solr_resp.get("error"), "number_added": 0, "status": status_code}
                    valid_bibcodes += []
                    return output_dict
                else:
                    #If SOLR query succeeds generate list of valid bibcodes from response
                    add_bibcodes = [doc.get('bibcode') for doc in solr_resp.get('docs', {})]
                    if add_bibcodes:
                        valid_bibcodes += add_bibcodes
                        current_app.logger.debug("Found the following valid bibcodes: {}".format(add_bibcodes))
                    #Added additional check to prevent unnecessary calls to bigquery.
                    if len(add_bibcodes) < current_app.config.get('BIGQUERY_MAX_ROWS'):
                        current_app.logger.debug("Bigquery returned less than max row number of bibcodes. Assuming all valid bibcodes are accounted for.")
                        break
                         
            
            if valid_bibcodes:
                #Add all valid bibcodes to library
                library.add_bibcodes(valid_bibcodes)

                session.add(library)
                session.commit()

                current_app.logger.debug('Added: {0} to {1}'.format(
                    valid_bibcodes,
                    library_id)
                )

            end_length = len(library.bibcode)

            #Generate a list of invalid bibcodes
            invalid_bibcodes = list(set(doc_bibcodes) - set(valid_bibcodes))
            
            #Generate output that contains the number added and the number of invalid bibcodes.
            output_dict = {"number_added": end_length - start_length}
            if invalid_bibcodes: output_dict['invalid_bibcodes'] = invalid_bibcodes
            
            return  output_dict

    
    
    @classmethod
    def remove_documents_from_library(cls, library_id, document_data):
        """
        Remove a given document from a specific library

        :param library_id: the unique ID of the library
        :param document_data: the meta data of the document

        :return: number_removed: number of documents successfully removed
        """
        current_app.logger.info('Removing a document: {0} from library_uuid: '
                                '{1}'.format(document_data, library_id))
        with current_app.session_scope() as session:
            library = session.query(Library).filter_by(id=library_id).one()
            start_length = len(library.bibcode)
            bibcodes_removed = list(set(document_data['bibcode']) & set(library.bibcode))

            library.remove_bibcodes(document_data['bibcode'])

            session.add(library)
            session.commit()
            current_app.logger.info('Removed document successfully: {0}'
                                    .format(library.bibcode))
            end_length = len(library.bibcode)
            
            number_removed = start_length - end_length
            if number_removed != 0:
                output_dict = {"number_removed": number_removed, "bibcodes": bibcodes_removed}
            else:
                output_dict = {"number_removed": number_removed, "bibcodes": ""}

            return output_dict

    @staticmethod
    def update_library(library_id, library_data):
        """
        Update the meta data of the library
        :param library_id: the unique ID of the library
        :param library_data: dictionary containing the updateable values

        :return: values updated
        """
        updateable = ['name', 'description', 'public']
        updated = {}

        with current_app.session_scope() as session:
            library = session.query(Library).filter_by(id=library_id).one()

            for key in library_data:
                if key not in updateable:
                    continue
                setattr(library, key, library_data[key])
                updated[key] = library_data[key]

            session.add(library)
            session.commit()

        return updated

    @staticmethod
    def delete_library(library_id):
        """
        Delete the entire library from the database
        :param library_id: the unique ID of the library

        :return: no return
        """

        with current_app.session_scope() as session:
            library = session.query(Library).filter_by(id=library_id).one()
            session.delete(library)
            session.commit()

    def post(self, library):
        """
        HTTP POST request that adds a document to a library for a given user
        :param library: library ID

        :return: the response for if the library was successfully created

        Header:
        -------
        Must contain the API forwarded user ID of the user accessing the end
        point

        Post body:
        ----------
        KEYWORD, VALUE

        bibcode:  <list>          List of bibcodes to be added
        action:   add, remove     add - adds a bibcode, remove - removes a
                                  bibcode

        Return data:
        -----------
        number_added: number of documents added (if 'add' is used)
        number_removed: number of documents removed (if 'remove' is used)

        Permissions:
        -----------
        The following type of user can add documents:
          - owner
          - admin
          - write
        """
        # Get the user requesting this from the header
        try:
            user_editing = self.helper_get_user_id()
        except KeyError:
            return err(MISSING_USERNAME_ERROR)

        # URL safe base64 string to UUID
        try:
            library = self.helper_slug_to_uuid(library)
        except TypeError:
            return err(BAD_LIBRARY_ID_ERROR)

        user_editing_uid = \
            self.helper_absolute_uid_to_service_uid(absolute_uid=user_editing)

        # Check the permissions of the user
        if not self.write_access(service_uid=user_editing_uid,
                                 library_id=library):
            return err(NO_PERMISSION_ERROR)

        try:
            data = get_post_data(
                request,
                types=dict(bibcode=list, action=str)
            )
        except TypeError as error:
            current_app.logger.error('Wrong type passed for POST: {0} [{1}]'
                                     .format(request.data, error))
            return err(WRONG_TYPE_ERROR)

        if data['action'] == 'add':
            current_app.logger.info('User requested to add a document')
            output = self.add_document_to_library(
                library_id=library,
                document_data=data
            )
            if "error" in output.keys():
                return err(dict(body=output.get("error"), number=output.get("status_code", 400)))

            elif "invalid_bibcodes" in output.keys():
                #Returns the list of invalid bibcodes, but only returns 400 if no bibcodes were added.
                if output.get('number_added') != 0:
                    return {"invalid_identifiers": output.get("invalid_bibcodes"), "number_added": output.get('number_added')}, 200
                else:
                    return err(INVALID_BIBCODE_SPECIFIED_ERROR(output))

            else:
                current_app.logger.info(
                    'Successfully added {0} documents to {1} by {2}'
                    .format(output.get("number_added"), library, user_editing_uid)
                )
                return {'number_added': output.get("number_added")}, 200

        elif data['action'] == 'remove':
            current_app.logger.info('User requested to remove a document')
            output_dict = self.remove_documents_from_library(
                library_id=library,
                document_data=data
            )
            current_app.logger.info(
                'Successfully removed {0} documents to {1} by {2}'
                .format(output_dict.get('number_removed'), library, user_editing_uid)
                )
            return output_dict, 200

        else:
            current_app.logger.info('User requested a non-standard action')
            return {}, 400

    def put(self, library):
        """
        HTTP PUT request that updates the meta-data of the library
        :param library: library ID

        :return: the response for if the library was updated

        Header:
        -------
        Must contain the API forwarded user ID of the user accessing the end
        point

        Post-body:
        ---------
        name: name of the library
        description: description of the library
        public: boolean

        Note: The above are optional, they can be empty if needed.

        Return data:
        -----------
        returns the key/value that was requested to be updated

        Permissions:
        -----------
        The following type of user can update the 'name', 'library', and
        'public':

          - owner
          - admin
        """
        try:
            user = self.helper_get_user_id()
        except KeyError:
            return err(MISSING_USERNAME_ERROR)

        # URL safe base64 string to UUID
        try:
            library = self.helper_slug_to_uuid(library)
        except TypeError:
            return err(BAD_LIBRARY_ID_ERROR)

        if not self.helper_user_exists(user):
            return err(NO_PERMISSION_ERROR)

        if not self.helper_library_exists(library):
            return err(MISSING_LIBRARY_ERROR)

        user_updating_uid = \
            self.helper_absolute_uid_to_service_uid(absolute_uid=user)

        if not self.update_access(service_uid=user_updating_uid,
                                  library_id=library):
            return err(NO_PERMISSION_ERROR)

        try:
            library_data = get_post_data(
                request,
                types=dict(
                    name=str,
                    description=str,
                    public=bool
                )
            )
        except TypeError as error:
            current_app.logger.error('Wrong type passed for POST: {0} [{1}]'
                                     .format(request.data, error))
            return err(WRONG_TYPE_ERROR)

        # Remove content that is empty (note that the list() is necessary to create a copy, so pop will work)
        for key in list(library_data.keys()):
            if library_data[key] == ''.strip(' '):
                current_app.logger.warning('Removing key: {0} as its empty.'
                                           .format(key))
                library_data.pop(key)

        # Check for duplicate names
        if 'name' in library_data and \
                self.library_name_exists(service_uid=user_updating_uid,
                                         library_name=library_data['name']):

                return err(DUPLICATE_LIBRARY_NAME_ERROR)

        response = self.update_library(library_id=library,
                                       library_data=library_data)

        return response, 200

    def delete(self, library):
        """
        HTTP DELETE request that deletes a library defined by the number passed
        :param library: library ID

        :return: the response for it the library was deleted

        Header:
        -------
        Must contain the API forwarded user ID of the user accessing the end
        point

        Post-body:
        ----------
        No post content accepted.

        Return data:
        -----------
        No data

        Permissions:
        -----------
        The following type of user can update a library:
          - owner
        """
        try:
            user = self.helper_get_user_id()
        except KeyError:
            return err(MISSING_USERNAME_ERROR)

        # URL safe base64 string to UUID
        try:
            library = self.helper_slug_to_uuid(library)
        except TypeError:
            return err(BAD_LIBRARY_ID_ERROR)

        if not self.helper_user_exists(user):
            return err(NO_PERMISSION_ERROR)

        if not self.helper_library_exists(library):
            return err(MISSING_LIBRARY_ERROR)

        user_deleting_uid = \
            self.helper_absolute_uid_to_service_uid(absolute_uid=user)

        try:
            current_app.logger.info('user_API: {0:d} '
                                    'requesting to delete library: {1}'
                                    .format(user_deleting_uid, library))

            if self.delete_access(service_uid=user_deleting_uid,
                                  library_id=library):
                self.delete_library(library_id=library)
                current_app.logger.info('User: {0} deleted library: {1}.'
                                        .format(user_deleting_uid,
                                                library))
            else:
                current_app.logger.error('User: {0} has incorrect permissions '
                                         'to delete: {1}.'
                                         .format(user_deleting_uid,
                                                 library))
                raise PermissionDeniedError('Incorrect permissions')

        except NoResultFound as error:
            current_app.logger.info('Failed to delete: {0}'.format(error))
            return err(MISSING_LIBRARY_ERROR)

        except PermissionDeniedError as error:
            current_app.logger.info('Failed to delete: {0}'.format(error))
            return err(NO_PERMISSION_ERROR)

        return {}, 200

class QueryView(BaseView):
    """
    End point to interact with a specific library, by adding documents and
    removing documents.
    """
    # TODO: adding tags using PUT for RESTful endpoint?

    decorators = [advertise('scopes', 'rate_limit')]
    scopes = ['user']
    rate_limit = [1000, 60*60*24]

    @classmethod
    def add_query_to_library(cls, library_id, document_data):
        """
        Adds a the results of a query to a user's library
        :param library_id: the library id to update
        :param document_data: the /search query parameters

        :return: {number_added: number of documents successfully added, valid_bibcodes: corresponding bibcodes}
        """
        current_app.logger.info('Adding queried documents: {0} to library_uuid: {1}'
                                .format(document_data, library_id))

        with current_app.session_scope() as session:
            # Find the specified library
            library = session.query(Library).filter_by(id=library_id).one()

            start_length = len(library.bibcode)
            #Validate supplied bibcodes to confirm they exist in SOLR
            current_app.logger.info("Calling SOLR Query with params: {}".format(document_data.get('params')))
            
            response = cls.process_standard_ADS_bibcode_query(params=document_data.get('params'))
            if "error" in response:
                return response

            solr_resp = response.json()
            status_code = response.status_code
            current_app.logger.info("SOLR returned status: {}".format(status_code))
            
            if "error" in solr_resp.keys():
                #If SOLR request fails, pass the error back to the user
                current_app.logger.error("Failed to retrieve bibcodes with error: {}".format(solr_resp.get("error")))
                valid_bibcodes = []
                output_dict = {"error": solr_resp.get("error"), "number_added": 0, "status": status_code}
                return output_dict
            else:
                current_app.logger.info("docs: {}".format(solr_resp.get('response').get("docs")))
                #If SOLR query succeeds generate list of valid bibcodes from response
                valid_bibcodes = [doc.get('bibcode') for doc in solr_resp.get('response').get('docs')]
                current_app.logger.debug("Found the following valid bibcodes: {}".format(valid_bibcodes))

            library.add_bibcodes(valid_bibcodes)

            session.add(library)
            session.commit()

            current_app.logger.debug('Added: {0} is now {1}'.format(
                valid_bibcodes,
                library.bibcode)
            )

            end_length = len(library.bibcode)
            added_bibcodes = end_length - start_length
            if added_bibcodes != 0:
                output_dict = {"number_added": added_bibcodes, "bibcodes": list(set(valid_bibcodes) & set(library.bibcode))}
            else:
                output_dict = {"number_added": added_bibcodes, "bibcodes": ""}


            return output_dict

    @classmethod
    def remove_query_from_library(cls, library_id, document_data):
        """
        Remove the results of a given query from a specific library

        :param library_id: the unique ID of the library
        :param document_data: the /search query parameters

        :return: {number_removed: number of documents successfully removed, valid_bibcodes: Corresponding bibcodes}
        """
        current_app.logger.info('Removing queried documents: {0} from library_uuid: '
                                '{1}'.format(document_data, library_id))
        current_app.logger.info("Calling SOLR Query with params: {}".format(document_data.get('params')))
        response = cls.process_standard_ADS_bibcode_query(params=document_data.get('params'))
        solr_resp = response.json()
        status_code = response.status_code
        current_app.logger.info("SOLR returned status: {}".format(status_code))

        if "error" in solr_resp.keys():
            #If SOLR request fails, pass the error back to the user
            current_app.logger.error("Failed to retrieve bibcodes with error: {}".format(solr_resp.get("error")))
            valid_bibcodes = []
            output_dict = {"error": solr_resp.get("error"), "number_removed": 0, "status": status_code}
            return output_dict
        else:
            current_app.logger.info("docs: {}".format(solr_resp.get('response').get("docs")))
            #If SOLR query succeeds generate list of valid bibcodes from response
            valid_bibcodes = [doc.get('bibcode') for doc in solr_resp.get('response').get('docs')]
            current_app.logger.debug("Found the following valid bibcodes: {}".format(valid_bibcodes))

        with current_app.session_scope() as session:
            library = session.query(Library).filter_by(id=library_id).one()
            start_length = len(library.bibcode)
            bibcodes_removed = list(set(valid_bibcodes) & set(library.bibcode))

            library.remove_bibcodes(valid_bibcodes)

            session.add(library)
            session.commit()
            current_app.logger.info('Removed queried documents successfully: {0}'
                                    .format(library.bibcode))
            end_length = len(library.bibcode)
            number_removed = start_length - end_length
            if number_removed != 0:
                output_dict = {"number_removed": number_removed, "bibcodes": bibcodes_removed}
            else:
                output_dict = {"number_removed": number_removed, "bibcodes": ""}

            return output_dict

    def post(self, library):
        """
        HTTP POST request that adds a document to a library for a given user
        :param library: library ID

        :return: the response for if the library was successfully created

        Header:
        -------
        Must contain the API forwarded user ID of the user accessing the end
        point

        Post body:
        ----------
        KEYWORD, VALUE

        query:  dict         parameters of a solr query
        action:   add, remove     add - adds a bibcode, remove - removes a
                                  bibcode

        Return data:
        -----------
        number_added: number of documents added (if 'add' is used)
        number_removed: number of documents removed (if 'remove' is used)
        valid_bibcodes: The list of valid bibcodes

        Permissions:
        -----------
        The following type of user can add documents:
          - owner
          - admin
          - write
        """
        # Get the user requesting this from the header
        try:
            user_editing = self.helper_get_user_id()
        except KeyError:
            return err(MISSING_USERNAME_ERROR)

        # URL safe base64 string to UUID
        try:
            library = self.helper_slug_to_uuid(library)
        except TypeError:
            return err(BAD_LIBRARY_ID_ERROR)

        user_editing_uid = \
            self.helper_absolute_uid_to_service_uid(absolute_uid=user_editing)

        # Check the permissions of the user
        if not self.write_access(service_uid=user_editing_uid,
                                 library_id=library):
            return err(NO_PERMISSION_ERROR)

        try:
            data = get_post_data(
                request,
                types=dict(params=dict, action=str)
            )
        except TypeError as error:
            current_app.logger.error('Wrong type passed for POST: {0} [{1}]'
                                     .format(request.data, error))
            return err(WRONG_TYPE_ERROR)

        if data['action'] == 'add':
            current_app.logger.info('User requested to add documents from query')
            output_dict = self.add_query_to_library(
                library_id=library,
                document_data=data
            )
            if "error" in output_dict.keys():
                return err(dict(body=output_dict.get("error"), number=output_dict.get("status")))
            else:
                current_app.logger.info(
                'Successfully added {0} documents to {1} by {2}'
                .format(output_dict.get('number_added'), library, user_editing_uid)
            )
            return {'number_added': output_dict.get('number_added'), "bibcodes": output_dict.get('bibcodes')}, 200

        elif data['action'] == 'remove':
            current_app.logger.info('User requested to remove documents by query')
            output_dict = self.remove_query_from_library(
                library_id=library,
                document_data=data
            )
            if "error" in output_dict.keys():
                return err(dict(body=output_dict.get("error"), number=output_dict.get("status")))
            else:
                current_app.logger.info(
                'Successfully removed {0} documents to {1} by {2}'
                .format(output_dict.get('number_removed'), library, user_editing_uid)
            )
            return {'number_removed': output_dict.get('number_removed'), "bibcodes": output_dict.get('bibcodes')}, 200

        else:
            current_app.logger.info('User requested a non-standard action')
            return {}, 400

    def get(self, library):
        """
        HTTP GET request that adds documents to a library for a given user
        :param library: library ID based on a /search query.

        :return: the response for if the document was successfully added

        Header:
        -------
        Must contain the API forwarded user ID of the user accessing the end
        point

        GET params:
        Any params that would be passed to the /search endpoint.
        'fl' will automatically be reset to only bibcode.

        Return data:
        -----------
        number_added: number of documents added
        valid_bibcode: the list of valid bibcodes

        Permissions:
        -----------
        The following type of user can add documents:
          - owner
          - admin
          - write
        """
        # Get the user requesting this from the header
        try:
            user_editing = self.helper_get_user_id()
        except KeyError:
            return err(MISSING_USERNAME_ERROR)

        # URL safe base64 string to UUID
        try:
            library = self.helper_slug_to_uuid(library)
        except TypeError:
            return err(BAD_LIBRARY_ID_ERROR)

        user_editing_uid = \
            self.helper_absolute_uid_to_service_uid(absolute_uid=user_editing)

        # Check the permissions of the user
        if not self.write_access(service_uid=user_editing_uid,
                                 library_id=library):
            return err(NO_PERMISSION_ERROR)

        try:
            data = {"params": get_GET_params(request)}
        except ValueError as error:
            current_app.logger.error('Improperly formatted query passed to GET: {0} [{1}]'
                                     .format(request.data, error))
            return err(BAD_QUERY_ERROR)

        current_app.logger.info('User requested to add documents from query')
        output_dict = self.add_query_to_library(
            library_id=library,
            document_data=data
        )
        if "error" in output_dict.keys():
            return err(dict(body=output_dict.get("error"), number=output_dict.get("status")))
        else:
            current_app.logger.info(
                'Successfully added {0} documents to {1} by {2}'
                .format(output_dict.get('number_added'), library, user_editing_uid)
            )
            return {'number_added': output_dict.get('number_added'), "bibcodes": output_dict.get('bibcodes')}, 200