eulexistdb/db.py
# file eulexistdb/db.py
#
# Copyright 2010,2011 Emory University Libraries
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Connect to an eXist XML database and query it.
This module provides :class:`ExistDB` and related classes for connecting to
an eXist-db_ database and executing XQuery_ queries against it.
.. _XQuery: http://www.w3.org/TR/xquery/
.. _eXist-db: http://exist.sourceforge.net/
When used with Django, :class:`~eulexistdb.db.ExistDB` can pull
configuration settings directly from Django settings. If you create
an instance of :class:`~eulexistdb.db.ExistDB` without specifying a
server url, it will attempt to configure an eXist database based on
Django settings, using the configuration names documented below.
Projects that use this module should include the following settings in their
``settings.py``::
# Exist DB Settings
EXISTDB_SERVER_USER = 'user'
EXISTDB_SERVER_PASSWORD = 'password'
EXISTDB_SERVER_URL = "http://megaserver.example.com:8042/exist"
EXISTDB_ROOT_COLLECTION = "/sample_collection"
.. note:
User and password settings are optional.
To configure a timeout for most eXist connections, specify the desired
time in seconds as ``EXISTDB_TIMEOUT``; if none is specified, the
global default socket timeout will be used.
.. note::
Any configured ``EXISTDB_TIMEOUT`` will be ignored by the
**existdb** management command, since reindexing a large collection
could take significantly longer than a normal timeout would allow
for.
If you are using an eXist index configuration file, you can add another setting
to specify your configuration file::
EXISTDB_INDEX_CONFIGFILE = "/path/to/my/exist_index.xconf"
This will allow you to use the ``existdb`` management command to
manage your index configuration file in eXist.
If you wish to specify options for fulltext queries, you can set a dictionary
of options like this::
EXISTDB_FULLTEXT_OPTIONS = {'default-operator': 'and'}
.. Note::
Python :mod:`xmlrpclib` does not support extended types, some of which
are used in eXist returns. This does not currently affect the
functionality exposed within :class:`ExistDB`, but may cause issues
if you use the :attr:`ExistDB.server` XML-RPC connection directly
for other available eXist XML-RPC methods. If you do make use of
those, you may want to enable XML-RPC patching to handle the return
types::
from eulexistdb import patch
patch.request_patching(patch.XMLRpcLibPatch)
---
If you are writing unit tests against code that uses
:mod:`eulexistdb`, you may want to take advantage of
:class:`eulexistdb.testutil.TestCase` for loading fixture data to a
test eXist-db collection, and
:class:`eulexistdb.testutil.ExistDBTestSuiteRunner`, which has logic
to set up and switch configurations between a development and test
collections in eXist.
----
"""
from functools import wraps
import logging
import requests
import socket
import time
from urllib.parse import urlparse
import warnings
import xmlrpc.client as xmlrpclib
try:
from django.dispatch import Signal
except ImportError:
Signal = None
from . import patch
from eulxml import xmlmap
from eulexistdb.exceptions import ExistDBException, ExistDBTimeout
__all__ = ['ExistDB', 'QueryResult', 'ExistDBException', 'EXISTDB_NAMESPACE']
logger = logging.getLogger(__name__)
EXISTDB_NAMESPACE = 'http://exist.sourceforge.net/NS/exist'
def _wrap_xmlrpc_fault(f):
@wraps(f)
def wrapper(*args, **kwargs):
try:
return f(*args, **kwargs)
except (socket.timeout, requests.exceptions.ReadTimeout) as err:
raise ExistDBTimeout(err)
except (socket.error, xmlrpclib.Fault,
xmlrpclib.ProtocolError, xmlrpclib.ResponseError,
requests.exceptions.ConnectionError) as err:
raise ExistDBException(err)
# FIXME: could we catch IOerror (connection reset) and try again ?
# occasionally getting this error (so far exclusively in unit tests)
# error: [Errno 104] Connection reset by peer
return wrapper
xquery_called = None
if Signal is not None:
xquery_called = Signal(providing_args=[
"time_taken", "name", "return_value", "args", "kwargs"])
class ExistDB(object):
"""Connect to an eXist database, and manipulate and query it.
Construction doesn't initiate server communication, only store
information about where the server is, to be used in later
communications.
:param server_url: The eXist server URL. New syntax (as of 0.20)
expects primary eXist url and *not* the ``/xmlrpc`` endpoint;
for backwards compatibility, urls that include `/xmlrpc``
are still handled, and will be parsed to set exist server path
as well as username and password if specified. Note that username
and password parameters take precedence over username
and password in the server url if both are specified.
:param username: exist username, if any
:param password: exist user password, if any
:param resultType: The class to use for returning :meth:`query` results;
defaults to :class:`QueryResult`
:param encoding: The encoding used to communicate with the server;
defaults to "UTF-8"
:param verbose: When True, print XML-RPC debugging messages to stdout
:param timeout: Specify a timeout for xmlrpc connection
requests. If not specified, the global default socket timeout
value will be used.
:param keep_alive: Optional parameter, to disable requests built-in
session handling; can also be configured in django settings
with EXISTDB_SESSION_KEEP_ALIVE
"""
# default timeout, to allow distinguishing between no timeout
# specified and an explicit timeout of None (e.g., explicit timeout
# None should override a configured EXISTDB_TIMEOUT)
DEFAULT_TIMEOUT = object()
exist_url = None
username = None
password = None
def __init__(self, server_url=None, username=None, password=None,
resultType=None, encoding='UTF-8', verbose=False,
keep_alive=None, timeout=DEFAULT_TIMEOUT):
self.resultType = resultType or QueryResult
datetime_opt = {'use_datetime': True}
# distinguish between timeout not set and no timeout, to allow
# easily setting a timeout of None and have it override any
# configured EXISTDB_TIMEOUT
if server_url is not None and 'xmlrpc' in server_url:
self._init_from_xmlrpc_url(server_url)
else:
# add username/password to url if set
self.exist_url = server_url
# if username/password are supplied, set them
if username is not None:
self.username = username
if password is not None:
self.password = password
# if server url or timeout are not set, attempt to get from django settings
if self.exist_url is None or timeout == ExistDB.DEFAULT_TIMEOUT:
# Django integration is NOT required, so check for settings
# but don't error if they are not available.
try:
# if django is not installed, we should get an import error
import django
# from django.core.exceptions import ImproperlyConfigured
try:
# if django is installed but not used, we get an
# "improperly configured" error
from django.conf import settings
if self.exist_url is None:
self.exist_url = self._serverurl_from_djangoconf()
# if the default timeout is used, check for a timeout
# in django exist settings
if timeout == ExistDB.DEFAULT_TIMEOUT:
timeout = getattr(settings, 'EXISTDB_TIMEOUT',
ExistDB.DEFAULT_TIMEOUT)
# if a keep-alive option is not specified, check
# for a django option to configure the session
if keep_alive is None:
keep_alive = getattr(settings,
'EXISTDB_SESSION_KEEP_ALIVE', None)
except django.core.exceptions.ImproperlyConfigured:
pass
except ImportError:
pass
# if server url is still not set, we have a problem
if self.exist_url is None:
raise Exception('Cannot initialize an eXist-db connection without specifying ' +
'eXist server url directly or in Django settings as EXISTDB_SERVER_URL')
# initialize a requests session; used for REST api calls
# AND for xmlrpc transport
self.session = requests.Session()
if self.username is not None and self.password is not None:
self.session.auth = (self.username, self.password)
if keep_alive is not None:
self.session.keep_alive = keep_alive
self.session_opts = {}
if timeout is not ExistDB.DEFAULT_TIMEOUT:
self.session_opts['timeout'] = timeout
transport = RequestsTransport(timeout=timeout, session=self.session,
url=self.exist_url, **datetime_opt)
self.server = xmlrpclib.ServerProxy(
uri='%s/xmlrpc' % self.exist_url.rstrip('/'),
transport=transport,
encoding=encoding,
verbose=verbose,
allow_none=True,
**datetime_opt
)
def _serverurl_from_djangoconf(self):
# determine what exist url to use based on django settings, if available
try:
from django.conf import settings
# don't worry about errors on this one - if it isn't set, this should fail
self.exist_url = settings.EXISTDB_SERVER_URL
# former syntax had credentials in the server url; warn about the change
if '@' in self.exist_url:
warnings.warn('EXISTDB_SERVER_URL should not include eXist user or ' +
'password information. You should update your django ' +
'settings to use EXISTDB_SERVER_USER and EXISTDB_SERVER_PASSWORD.')
# look for username & password
self.username = getattr(settings, 'EXISTDB_SERVER_USER', None)
self.password = getattr(settings, 'EXISTDB_SERVER_PASSWORD', None)
return self.exist_url
except ImportError:
pass
def _init_from_xmlrpc_url(self, url):
# map old-style xmlrpc url with username/password to
# new-style initialization
parsed = urlparse.urlparse(url)
# add username/password if set
if parsed.username:
self.username = parsed.username
if parsed.password:
self.password = parsed.password
# construct base exist url, without xmlrpc extension
path = parsed.path.replace('/xmlrpc', '')
# parsed netloc includes username & password; reconstruct without
if parsed.port is not None:
netloc = '%s:%s' % (parsed.hostname, parsed.port)
else:
netloc = parsed.hostname
self.exist_url = '%s://%s%s' % (parsed.scheme, netloc, path)
def restapi_path(self, path):
# generate rest path to a collection or document
# FIXME: getting duplicated db path, handle this better
if path.startswith('/db'):
path = path[len('/db'):]
# make sure there is a slash between db and requested path
if not path.startswith('/'):
path = '/%s' % path
return '%s/rest/db%s' % (self.exist_url.rstrip('/'), path)
def getDocument(self, name):
"""Retrieve a document from the database.
:param name: database document path to retrieve
:rtype: string contents of the document
"""
# REST api; need an error wrapper?
logger.debug('getDocument %s', self.restapi_path(name))
response = self.session.get(self.restapi_path(name), stream=False,
**self.session_opts)
if response.status_code == requests.codes.ok:
return response.content
if response.status_code == requests.codes.not_found:
# matching previous xmlrpc behavior;
# TODO: use custom exception classes here
raise ExistDBException('%s not found' % name)
def getDoc(self, name):
"Alias for :meth:`getDocument`."
return self.getDocument(name)
def createCollection(self, collection_name, overwrite=False):
"""Create a new collection in the database.
:param collection_name: string name of collection
:param overwrite: overwrite existing document?
:rtype: boolean indicating success
"""
if not overwrite and self.hasCollection(collection_name):
raise ExistDBException(collection_name + " exists")
logger.debug('createCollection %s', collection_name)
return self.server.createCollection(collection_name)
@_wrap_xmlrpc_fault
def removeCollection(self, collection_name):
"""Remove the named collection from the database.
:param collection_name: string name of collection
:rtype: boolean indicating success
"""
if (not self.hasCollection(collection_name)):
raise ExistDBException(collection_name + " does not exist")
logger.debug('removeCollection %s', collection_name)
return self.server.removeCollection(collection_name)
def hasCollection(self, collection_name):
"""Check if a collection exists.
:param collection_name: string name of collection
:rtype: boolean
"""
try:
logger.debug('describeCollection %s', collection_name)
self.server.describeCollection(collection_name)
return True
except Exception as e:
# now could be generic ProtocolError
s = "collection " + collection_name + " not found"
if hasattr(e, 'faultCode') and (e.faultCode == 0 and s in e.faultString):
return False
else:
raise ExistDBException(e)
def reindexCollection(self, collection_name):
"""Reindex a collection.
Reindex will fail if the eXist user does not have the correct permissions
within eXist (must be a member of the DBA group).
:param collection_name: string name of collection
:rtype: boolean success
"""
if (not self.hasCollection(collection_name)):
raise ExistDBException(collection_name + " does not exist")
# xquery reindex function requires that collection name begin with /db/
if collection_name[0:3] != '/db':
collection_name = '/db/' + collection_name.strip('/')
result = self.query("xmldb:reindex('%s')" % collection_name)
return result.values[0] == 'true'
@_wrap_xmlrpc_fault
def hasDocument(self, document_path):
"""Check if a document is present in eXist.
:param document_path: string full path to document in eXist
:rtype: boolean
"""
if self.describeDocument(document_path) == {}:
return False
else:
return True
@_wrap_xmlrpc_fault
def describeDocument(self, document_path):
"""Return information about a document in eXist.
Includes name, owner, group, created date, permissions, mime-type,
type, content-length.
Returns an empty dictionary if document is not found.
:param document_path: string full path to document in eXist
:rtype: dictionary
"""
logger.debug('describeResource %s', document_path)
return self.server.describeResource(document_path)
@_wrap_xmlrpc_fault
def getCollectionDescription(self, collection_name):
"""Retrieve information about a collection.
:param collection_name: string name of collection
:rtype: boolean
"""
logger.debug('getCollectionDesc %s', collection_name)
return self.server.getCollectionDesc(collection_name)
def load(self, xml, path):
"""Insert or overwrite a document in the database.
.. Note::
This method will automatically overwrite existing content
at the same path without notice. This is a change from
versions prior to 0.20.
:param xml: string or file object with the document contents
:param path: destination location in the database
:rtype: boolean indicating success
"""
if hasattr(xml, 'read'):
xml = xml.read()
logger.debug('load %s', path)
# NOTE: overwrite is assumed by REST
response = self.session.put(self.restapi_path(path), xml, stream=False,
**self.session_opts)
if response.status_code == requests.codes.bad_request:
# response is HTML, not xml...
# could use regex or beautifulsoup to pull out the error
raise ExistDBException
# expect 201 created for new documents, 200 for
# successful update of an existing document
# NOTE: testing shows a 201 response every time (perhaps because
# eXist removes the resource before replacing?)
# check for either success response
return response.status_code in [requests.codes.created,
requests.codes.ok]
@_wrap_xmlrpc_fault
def removeDocument(self, name):
"""Remove a document from the database.
:param name: full eXist path to the database document to be removed
:rtype: boolean indicating success
"""
logger.debug('remove %s', name)
return self.server.remove(name)
@_wrap_xmlrpc_fault
def moveDocument(self, from_collection, to_collection, document):
"""Move a document in eXist from one collection to another.
:param from_collection: collection where the document currently exists
:param to_collection: collection where the document should be moved
:param document: name of the document in eXist
:rtype: boolean
"""
self.query("xmldb:move('%s', '%s', '%s')" % \
(from_collection, to_collection, document))
# query result does not return any meaningful content,
# but any failure (missing collection, document, etc) should result in
# an exception, so return true if the query completed successfully
return True
@_wrap_xmlrpc_fault
def query(self, xquery=None, start=1, how_many=10, cache=False, session=None,
release=None, result_type=None):
"""Execute an XQuery query, returning the results directly.
:param xquery: a string XQuery query
:param start: first index to return (1-based)
:param how_many: maximum number of items to return
:param cache: boolean, to cache a query and return a session id (optional)
:param session: session id, to retrieve a cached session (optional)
:param release: session id to be released (optional)
:rtype: the resultType specified at the creation of this ExistDB;
defaults to :class:`QueryResult`.
"""
# xml_s = self.server.query(xquery, how_many, start, kwargs)
params = {
'_howmany': how_many,
'_start': start,
}
if xquery is not None:
params['_query'] = xquery
if cache:
params['_cache'] = 'yes'
if release is not None:
params['_release'] = release
if session is not None:
params['_session'] = session
if result_type is None:
result_type = self.resultType
opts = ' '.join('%s=%s' % (key.lstrip('_'), val)
for key, val in params.iteritems() if key != '_query')
if xquery:
debug_query = '\n%s' % xquery
else:
debug_query = ''
logger.debug('query %s%s', opts, debug_query)
start = time.time()
response = self.session.get(self.restapi_path(''), params=params,
stream=False, **self.session_opts)
if xquery_called is not None:
args = {'xquery': xquery, 'start': start, 'how_many': how_many,
'cache': cache, 'session': session, 'release': release,
'result_type': result_type}
xquery_called.send(
sender=self.__class__, time_taken=time.time() - start,
name='query', return_value=response, args=[], kwargs=args)
if response.status_code == requests.codes.ok:
# successful release doesn't return any content
if release is not None:
return True # successfully released
# TODO: test unicode handling
return xmlmap.load_xmlobject_from_string(response.content, result_type)
# 400 bad request returns an xml error we can parse
elif response.status_code == requests.codes.bad_request:
err = xmlmap.load_xmlobject_from_string(response.content, ExistExceptionResponse)
raise ExistDBException(err.message)
# not sure if any information is available on other error codes
else:
raise ExistDBException(response.content)
# xml_s = self.server.query(xquery, how_many, start, kwargs)
# # xmlrpclib tries to guess whether the result is a string or
# # unicode, returning whichever it deems most appropriate.
# # Unfortunately, :meth:`~eulxml.xmlmap.load_xmlobject_from_string`
# # requires a byte string. This means that if xmlrpclib gave us a
# # unicode, we need to encode it:
# if isinstance(xml_s, unicode):
# xml_s = xml_s.encode("UTF-8")
# return xmlmap.load_xmlobject_from_string(xml_s, self.resultType)
@_wrap_xmlrpc_fault
def executeQuery(self, xquery):
"""Execute an XQuery query, returning a server-provided result
handle.
:param xquery: a string XQuery query
:rtype: an integer handle identifying the query result for future calls
"""
# NOTE: eXist's xmlrpc interface requires a dictionary parameter.
# This parameter is not documented in the eXist docs at
# http://demo.exist-db.org/exist/devguide_xmlrpc.xml
# so it's not clear what we can pass there.
logger.debug('executeQuery\n%s', xquery)
result_id = self.server.executeQuery(xquery, {})
logger.debug('result id is %s', result_id)
return result_id
@_wrap_xmlrpc_fault
def querySummary(self, result_id):
"""Retrieve results summary from a past query.
:param result_id: an integer handle returned by :meth:`executeQuery`
:rtype: a dict describing the results
The returned dict has four fields:
* *queryTime*: processing time in milliseconds
* *hits*: number of hits in the result set
* *documents*: a list of lists. Each identifies a document and
takes the form [`doc_id`, `doc_name`, `hits`], where:
* *doc_id*: an internal integer identifier for the document
* *doc_name*: the name of the document as a string
* *hits*: the number of hits within that document
* *doctype*: a list of lists. Each contains a doctype public
identifier and the number of hits found for this
doctype.
"""
# FIXME: This just exposes the existdb xmlrpc querySummary function.
# Frankly, this return is just plain ugly. We should come up with
# something more meaningful.
summary = self.server.querySummary(result_id)
logger.debug('querySummary result id %d : ' % result_id +
'%(hits)s hits, query took %(queryTime)s ms' % summary)
return summary
@_wrap_xmlrpc_fault
def getHits(self, result_id):
"""Get the number of hits in a query result.
:param result_id: an integer handle returned by :meth:`executeQuery`
:rtype: integer representing the number of hits
"""
hits = self.server.getHits(result_id)
logger.debug('getHits result id %d : %s', result_id, hits)
return hits
@_wrap_xmlrpc_fault
def retrieve(self, result_id, position, highlight=False, **options):
"""Retrieve a single result fragment.
:param result_id: an integer handle returned by :meth:`executeQuery`
:param position: the result index to return
:param highlight: enable search term highlighting in result; optional,
defaults to False
:rtype: the query result item as a string
"""
if highlight:
# eXist highlight modes: attributes, elements, or both
# using elements because it seems most reasonable default
options['highlight-matches'] = 'elements'
# pretty-printing with eXist matches can introduce unwanted whitespace
if 'indent' not in options:
options['indent'] = 'no'
logger.debug('retrieve result id %d position=%d options=%s',
result_id, position, options)
return self.server.retrieve(result_id, position, options)
@_wrap_xmlrpc_fault
def releaseQueryResult(self, result_id):
"""Release a result set handle in the server.
:param result_id: an integer handle returned by :meth:`executeQuery`
"""
logger.debug('releaseQueryResult result id %d', result_id)
self.server.releaseQueryResult(result_id)
@_wrap_xmlrpc_fault
def setPermissions(self, resource, permissions):
"""Set permissions on a resource in eXist.
:param resource: full path to a collection or document in eXist
:param permissions: int or string permissions statement
"""
# TODO: support setting owner, group ?
logger.debug('setPermissions %s %s', resource, permissions)
self.server.setPermissions(resource, permissions)
@_wrap_xmlrpc_fault
def getPermissions(self, resource):
"""Retrieve permissions for a resource in eXist.
:param resource: full path to a collection or document in eXist
:rtype: ExistPermissions
"""
return ExistPermissions(self.server.getPermissions(resource))
def loadCollectionIndex(self, collection_name, index):
"""Load an index configuration for the specified collection.
Creates the eXist system config collection if it is not already there,
and loads the specified index config file, as per eXist collection and
index naming conventions.
:param collection_name: name of the collection to be indexed
:param index: string or file object with the document contents (as used by :meth:`load`)
:rtype: boolean indicating success
"""
index_collection = self._configCollectionName(collection_name)
# FIXME: what error handling should be done at this level?
# create config collection if it does not exist
if not self.hasCollection(index_collection):
self.createCollection(index_collection)
# load index content as the collection index configuration file
return self.load(index, self._collectionIndexPath(collection_name))
def removeCollectionIndex(self, collection_name):
"""Remove index configuration for the specified collection.
If index collection has no documents or subcollections after the index
file is removed, the configuration collection will also be removed.
:param collection: name of the collection with an index to be removed
:rtype: boolean indicating success
"""
# collection indexes information must be stored under system/config/db/collection_name
index_collection = self._configCollectionName(collection_name)
# remove collection.xconf in the configuration collection
self.removeDocument(self._collectionIndexPath(collection_name))
desc = self.getCollectionDescription(index_collection)
# no documents and no sub-collections - safe to remove index collection
if desc['collections'] == [] and desc['documents'] == []:
self.removeCollection(index_collection)
return True
def hasCollectionIndex(self, collection_name):
"""Check if the specified collection has an index configuration in eXist.
Note: according to eXist documentation, index config file does not *have*
to be named *collection.xconf* for reasons of backward compatibility.
This function assumes that the recommended naming conventions are followed.
:param collection: name of the collection with an index to be removed
:rtype: boolean indicating collection index is present
"""
return self.hasCollection(self._configCollectionName(collection_name)) \
and self.hasDocument(self._collectionIndexPath(collection_name))
def _configCollectionName(self, collection_name):
"""Generate eXist db path to the configuration collection for a specified collection
according to eXist collection naming conventions.
"""
# collection indexes information must be stored under system/config/db/collection_name
return "/db/system/config/db/" + collection_name.strip('/')
def _collectionIndexPath(self, collection_name):
"""Generate full eXist db path to the index configuration file for a specified
collection according to eXist collection naming conventions.
"""
# collection indexes information must be stored under system/config/db/collection_name
return self._configCollectionName(collection_name) + "/collection.xconf"
# admin functionality; where should this live?
def create_group(self, group):
'''Create a group; returns true if the group was created,
false if the group already exists. Any other exist exception
is re-raised.'''
try:
self.query('sm:create-group("%s")' % group);
# returns a query result with no information on success
return True
except ExistDBException as err:
if 'group with name %s already exists' % group in err.message():
return False
raise
def create_account(self, username, password, groups):
'''Create a user account; returns true if the user was created,
false if the user already exists. Any other exist exception
is re-raised.'''
try:
self.query('sm:create-account("%s", "%s", "%s")' % \
(username, password, groups))
return True
except ExistDBException as err:
if 'user account with username %s already exists' % username in err.message():
return False
# NOTE: might be possible to also get a group error here
# perhaps just check for 'already exists' ?
raise
class ExistPermissions(object):
"Permissions for an eXist resource - owner, group, and active permissions."
def __init__(self, data):
self.owner = data['owner']
self.group = data['group']
self.permissions = data['permissions']
def __str__(self):
return "owner: %s; group: %s; permissions: %s" % (self.owner, self.group, self.permissions)
def __repr__(self):
return '<%s %s>' % (self.__class__.__name__, str(self))
class QueryResult(xmlmap.XmlObject):
"""The results of an eXist XQuery query"""
start = xmlmap.IntegerField("@start|@exist:start")
"""The index of the first result returned"""
values = xmlmap.StringListField("exist:value")
"Generic value (*exist:value*) returned from an exist xquery"
session = xmlmap.IntegerField("@exist:session")
"Session id, when a query is requested to be cached"
_raw_count = xmlmap.IntegerField("@count|@exist:count")
@property
def count(self):
"""The number of results returned in this chunk"""
return self._raw_count or 0
_raw_hits = xmlmap.IntegerField("@hits|@exist:hits")
@property
def hits(self):
"""The total number of hits found by the search"""
return self._raw_hits or 0
@property
def results(self):
"""The result documents themselves as nodes, starting at
:attr:`start` and containing :attr:`count` members"""
return self.node.xpath('*')
class ExistExceptionResponse(xmlmap.XmlObject):
'''XML exception response returned on an error'''
#: db path where the error occurred
path = xmlmap.StringField('path')
#: error message
message = xmlmap.StringField('message')
#: query that generated the error
query = xmlmap.StringField('query')
# requests-based xmlrpc transport
# https://gist.github.com/chrisguitarguy/2354951
class RequestsTransport(xmlrpclib.Transport):
"""
Transport for xmlrpclib that uses Requests instead of httplib.
Additional parameters:
:param timeout: optional timeout for xmlrpc requests
:param session: optional requests session; use a custom session
if your xmlrpc server requires authentication
:param url: optional xmlrpc url; used to determine if https should
be used when making xmlrpc requests
"""
# update user agent to reflect use of requests
user_agent = "xmlrpclib.py/%s via requests %s" % (xmlrpclib.__version__,
requests.__version__)
# boolean flag to indicate whether https should be used or not
use_https = False
def __init__(self, timeout=ExistDB.DEFAULT_TIMEOUT, session=None,
url=None, *args, **kwargs):
# if default timeout is requested, use the global socket default
if timeout is ExistDB.DEFAULT_TIMEOUT:
timeout = socket.getdefaulttimeout()
xmlrpclib.Transport.__init__(self, *args, **kwargs)
self.timeout = timeout
# NOTE: assumues that if basic auth is needed, it is set
# on the session that is passed in
if session:
self.session = session
else:
self.session = requests.Session()
self.session.headers.update({
'User-Agent': self.user_agent,
'Content-Type': 'application/xml'
})
# determine whether https is needed based on the url
if url is not None:
self.use_https = (urlparse(url).scheme == 'https')
def request(self, host, handler, request_body, verbose):
"""
Make an xmlrpc request.
"""
url = self._build_url(host, handler)
try:
resp = self.session.post(url, data=request_body,
timeout=self.timeout)
except Exception:
raise # something went wrong
else:
try:
resp.raise_for_status()
except requests.RequestException as err:
raise xmlrpclib.ProtocolError(url, resp.status_code,
str(err), resp.headers)
else:
return self.parse_response(resp)
def getparser(self):
# Patch the parser to prevent errors on Apache's extended
# attributes. See the code in the patch module for details.
parser, unmarshaller = xmlrpclib.Transport.getparser(self)
return patch.XMLRpcLibPatch.apply(parser, unmarshaller)
def parse_response(self, resp):
"""
Parse the xmlrpc response.
"""
parser, unmarshaller = self.getparser()
parser.feed(resp.text)
parser.close()
return unmarshaller.close()
def _build_url(self, host, handler):
"""
Build a url for our request based on the host, handler and use_http
property
"""
scheme = 'https' if self.use_https else 'http'
return '%s://%s%s' % (scheme, host, handler)