citation_helper_service/utils.py from adsabs/citation_helper_service

citation_helper_service/utils.py
Summary

Maintainability

1 hr
Test Coverage

Issues
'''
Created on Nov 1, 2014

@author: ehenneken
'''
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
from builtins import str
from builtins import range
from flask import current_app
from flask import request
import sys
import os
import urllib.request, urllib.parse, urllib.error
import simplejson as json
from .client import Client


def get_data(**args):
    """
    Get the references for a set of bibcodes
    """
    references = []
    citations = []
    # This information can be retrieved with one single Solr query
    # (just an 'OR' query of a list of bibcodes)
    # To restrict the size of the query URL, we split the list of
    # bibcodes up in a list of smaller lists
    biblists = list(
        chunks(args['bibcodes'],
               current_app.config.get('CITATION_HELPER_CHUNK_SIZE')))
    for biblist in biblists:
        q = " OR ".join(["bibcode:%s" % a for a in biblist])
        # Get the information from Solr
        # We only need the contents of the 'reference' field (i.e. the
        # list of bibcodes referenced by the paper at hand)
        params = {'wt': 'json', 'q': q, 'fl': 'reference,citation',
                  'rows': current_app.config['CITATION_HELPER_MAX_HITS']}
        response = Client(config=current_app.config).get(
            current_app.config.get('CITATION_HELPER_SOLR_PATH'),
            params=params)
        if response.status_code != 200:
            return {"Error": "Unable to get results!",
                    "Error Info": "Solr response: %s" % str(response.text),
                    "Status Code": response.status_code}
        resp = response.json()
        # Collect all bibcodes in a list (do NOT remove multiplicity)
        for doc in resp['response']['docs']:
            if 'reference' in doc:
                references += doc['reference']
            if 'citation' in doc:
                citations += doc['citation']
    return [bib for bib in references + citations]


def get_meta_data(**args):
    """
    Get the meta data for a set of bibcodes
    """
    data_dict = {}
    # This information can be retrieved with one single Solr query
    # (just an 'OR' query of a list of bibcodes)
    bibcodes = [bibcode for (bibcode, score) in args['results']]
    list = " OR ".join(["bibcode:%s" % a for a in bibcodes])
    q = '%s' % list
    # Get the information from Solr
    params = {'wt': 'json', 'q': q, 'fl': 'bibcode,title,first_author',
              'rows': current_app.config.get('CITATION_HELPER_MAX_HITS')}
    response = Client(config=current_app.config).get(
        current_app.config.get('CITATION_HELPER_SOLR_PATH'), params=params
        )
    if response.status_code != 200:
        return {"Error": "Unable to get results!",
                "Error Info": response.text,
                "Status Code": response.status_code}
    resp = response.json()
    # Collect meta data
    for doc in resp['response']['docs']:
        title = 'NA'
        if 'title' in doc:
            title = doc['title'][0]
        author = 'NA'
        if 'first_author' in doc:
            author = "%s et al." % doc['first_author']
        data_dict[doc['bibcode']] = {'title': title, 'author': author}
    return data_dict


def chunks(l, n):
    """
    Yield successive n-sized chunks from l.
    """
    for i in range(0, len(l), n):
        yield l[i:i + n]