emory-libraries/eulxml

View on GitHub
eulxml/xpath/ast.py

Summary

Maintainability
C
7 hrs
Test Coverage
# file eulxml/xpath/ast.py
#
#   Copyright 2010,2011 Emory University Libraries
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

'''Abstract Syntax Tree nodes for parsed XPath.

This module contains basic nodes for representing parsed XPath expressions.
The parser provided by this module creates its parsed XPath representation
from the classes defined in this module. Library callers will mostly not use
this module directly, unless they need to produce XPath ASTs from scratch or
perhaps introspect ASTs returned by the parser.
'''

from __future__ import unicode_literals
import sys


# python2/3 string type logic borrowed from six
# NOTE: not importing six here because setup.py needs to generate
# the parser at install time, when six installation is not yet available
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3

if PY3:
    string_types = str
else:
    string_types = basestring


__all__ = [
    'serialize',
    'UnaryExpression',
    'BinaryExpression',
    'PredicatedExpression',
    'AbsolutePath',
    'Step',
    'NameTest',
    'NodeType',
    'AbbreviatedStep',
    'VariableReference',
    'FunctionCall',
    ]

def serialize(xp_ast):
    '''Serialize an XPath AST as a valid XPath expression.'''
    return ''.join(_serialize(xp_ast))

def _serialize(xp_ast):
    '''Generate token strings which, when joined together, form a valid
    XPath serialization of the AST.'''

    if hasattr(xp_ast, '_serialize'):
        for tok in xp_ast._serialize():
            yield tok
    elif isinstance(xp_ast, string_types):
        # strings in serialized xpath needed to be quoted
        # (e.g. for use in paths, comparisons, etc)
        # using repr to quote them; for unicode, the leading
        # u (u'') needs to be removed.
        yield repr(xp_ast).lstrip('u')
    else:
        yield str(xp_ast)


class UnaryExpression(object):

    '''A unary XPath expression. Practially, this means -foo.'''

    def __init__(self, op, right):
        self.op = op
        '''the operator used in the expression'''
        self.right = right
        '''the expression the operator is applied to'''

    def __repr__(self):
        return '<%s %s %s>' % (self.__class__.__name__,
                self.op, serialize(self.right))

    def _serialize(self):
        yield self.op
        for tok in _serialize(self.right):
            yield tok


KEYWORDS = set(['or', 'and', 'div', 'mod'])
class BinaryExpression(object):

    '''Any binary XPath expression. a/b; a and b; a | b.'''

    def __init__(self, left, op, right):
        self.left = left
        '''the left side of the binary expression'''
        self.op = op
        '''the operator of the binary expression'''
        self.right = right
        '''the right side of the binary expression'''

    def __repr__(self):
        return '<%s %s %s %s>' % (self.__class__.__name__,
                serialize(self.left), self.op, serialize(self.right))

    def _serialize(self):
        for tok in _serialize(self.left):
            yield tok

        if self.op in KEYWORDS:
            yield ' '
            yield self.op
            yield ' '
        else:
            yield self.op

        for tok in _serialize(self.right):
            yield tok


class PredicatedExpression(object):

    '''A filtered XPath expression. $var[1]; (a or b)[foo][@bar].'''

    def __init__(self, base, predicates=None):
        self.base = base
        '''the base expression to be filtered'''
        self.predicates = predicates or []
        '''a list of filter predicates'''

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__,
                serialize(self))

    def append_predicate(self, pred):
        self.predicates.append(pred)

    def _serialize(self):
        yield '('
        for tok in _serialize(self.base):
            yield tok
        yield ')'
        for pred in self.predicates:
            yield '['
            for tok in _serialize(pred):
                yield tok
            yield ']'


class AbsolutePath(object):

    '''An absolute XPath path. /a/b/c; //a/ancestor:b/@c.'''

    def __init__(self, op='/', relative=None):
        self.op = op
        '''the operator used to root the expression'''
        self.relative = relative
        '''the relative path after the absolute root operator'''

    def __repr__(self):
        if self.relative:
            return '<%s %s %s>' % (self.__class__.__name__,
                    self.op, serialize(self.relative))
        else:
            return '<%s %s>' % (self.__class__.__name__, self.op)

    def _serialize(self):
        yield self.op
        for tok in _serialize(self.relative):
            yield tok


class Step(object):

    '''A single step in a relative path. a; @b; text(); parent::foo:bar[5].'''

    def __init__(self, axis, node_test, predicates):
        self.axis = axis
        '''the step's axis, or @ or None if abbreviated or undefined'''
        self.node_test = node_test
        '''a NameTest or NodeType object describing the test represented'''
        self.predicates = predicates
        '''a list of predicates filtering the step'''

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__,
                serialize(self))

    def _serialize(self):
        if self.axis == '@':
            yield '@'
        elif self.axis:
            yield self.axis
            yield '::'

        for tok in self.node_test._serialize():
            yield tok

        for predicate in self.predicates:
            yield '['
            for tok in _serialize(predicate):
                yield tok
            yield ']'


class NameTest(object):

    '''An element name node test for a Step.'''

    def __init__(self, prefix, name):
        self.prefix = prefix
        '''the namespace prefix used for the test, or None if unset'''
        self.name = name
        '''the node name used for the test, or *'''

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__,
                serialize(self))

    def _serialize(self):
        if self.prefix:
            yield self.prefix
            yield ':'
        yield self.name

    def __str__(self):
        return ''.join(self._serialize())

class NodeType(object):

    '''A node type node test for a Step.'''

    def __init__(self, name, literal=None):
        self.name = name
        '''the node type name, such as node or text'''
        self.literal = literal
        '''the argument to the node specifier. XPath allows these only for
        processing-instruction() node tests.'''

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__,
                serialize(self))

    def _serialize(self):
        yield self.name
        yield '('
        if self.literal is not None:
            for tok in _serialize(self.literal):
                yield self.literal
        yield ')'

    def __str__(self):
        return ''.join(self._serialize())

class AbbreviatedStep(object):

    '''An abbreviated XPath step. . or ..'''

    def __init__(self, abbr):
        self.abbr = abbr
        '''the abbreviated step'''

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__,
                serialize(self))

    def _serialize(self):
        yield self.abbr


class VariableReference(object):

    '''An XPath variable reference. $foo; $myns:foo.'''

    def __init__(self, name):
        self.name = name
        '''a tuple (prefix, localname) containing the variable name'''

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__,
                serialize(self))

    def _serialize(self):
        yield '$'
        prefix, localname = self.name
        if prefix:
            yield prefix
            yield ':'
        yield localname


class FunctionCall(object):

    '''An XPath function call. foo(); my:foo(1); foo(1, 'a', $var).'''

    def __init__(self, prefix, name, args):
        self.prefix = prefix
        '''the namespace prefix, or None if unspecified'''
        self.name = name
        '''the local function name'''
        self.args = args
        '''a list of argument expressions'''

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__,
                serialize(self))

    def _serialize(self):
        if self.prefix:
            yield self.prefix
            yield ':'
        yield self.name
        yield '('
        if self.args:
            for tok in _serialize(self.args[0]):
                yield tok

            for arg in self.args[1:]:
                yield ','
                for tok in _serialize(arg):
                    yield tok
        yield ')'