emory-libraries/eulxml

View on GitHub
eulxml/xmlmap/eadmap.py

Summary

Maintainability
B
6 hrs
Test Coverage
# file eulxml/xmlmap/eadmap.py
#
#   Copyright 2010,2011 Emory University Libraries
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

from __future__ import unicode_literals
from copy import deepcopy

import six

from eulxml import xmlmap

# xmlmap objects for various sections of an ead
# organized from smallest/lowest level to highest level

EAD_NAMESPACE = 'urn:isbn:1-931666-22-9'
XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'


class _EadBase(xmlmap.XmlObject):
    '''Common EAD namespace declarations, for use by all EAD XmlObject instances.'''
    ROOT_NS = EAD_NAMESPACE
    ROOT_NAME = 'ead'
    ROOT_NAMESPACES = {
        'e': ROOT_NS,
        'xlink': XLINK_NAMESPACE,
        'exist': 'http://exist.sourceforge.net/NS/exist'
    }
    # TODO: if there are any universal EAD attributes, they should be added here

    # NOTE: this is not an EAD field, but simplifies using EAD objects with eXist
    # by making exist match-count totals available at any level
    match_count = xmlmap.IntegerField("count(.//exist:match)")
    'Count of exist matches under the current field - for use with EAD and eXist-db'


class Note(_EadBase):
    """EAD note."""
    ROOT_NAME = 'note'
    content = xmlmap.NodeListField("e:p", xmlmap.XmlObject)   # ?? (to allow formatting)
    "list of paragraphs - `p`"


class Section(_EadBase):
    """Generic EAD section.  Currently only has mappings for head, paragraph, and note."""
    head = xmlmap.NodeField("e:head", xmlmap.XmlObject)
    "heading - `head`"
    content = xmlmap.NodeListField("e:p", xmlmap.XmlObject)   # ?? (to allow formatting)
    "list of paragraphs - `p`"
    note = xmlmap.NodeField("e:note", Note)
    ":class:`Note`"


@six.python_2_unicode_compatible
class Heading(_EadBase):
    """Generic xml object for headings used under `controlaccess`"""
    source = xmlmap.StringField("@source")
    "source vocabulary for controlled term - `@source`"
    value = xmlmap.StringField(".", normalize=True)
    "controlled term text value (content of the heading element)"

    def __str__(self):
        return self.value


class ControlledAccessHeadings(Section):
    """
    Controlled access headings, such as subject terms, family and corporate
    names, etc.

    Expected node element passed to constructor: `contolaccess`.
    """
    person_name = xmlmap.NodeListField("e:persname", Heading)
    "person name :class:`Heading` list - `persname`"
    family_name = xmlmap.NodeListField("e:famname", Heading)
    "family name :class:`Heading` list  - `famname`"
    corporate_name = xmlmap.NodeListField("e:corpname", Heading)
    "corporate name :class:`Heading` list  - `corpname`"
    subject = xmlmap.NodeListField("e:subject", Heading)
    "subject :class:`Heading` list - `subject`"
    geographic_name = xmlmap.NodeListField("e:geogname", Heading)
    "geographic name :class:`Heading` list - `geogname`"
    genre_form = xmlmap.NodeListField("e:genreform", Heading)
    "genre or form :class:`Heading` list - `genreform`"
    occupation = xmlmap.NodeListField("e:occupation", Heading)
    "occupation :class:`Heading` list - `occupation`"
    function = xmlmap.NodeListField("e:function", Heading)
    "function :class:`Heading` list - `function`"
    title = xmlmap.NodeListField("e:title", Heading)
    "title :class:`Heading` list - `title`"
    # catch-all to get any of these, in order
    terms = xmlmap.NodeListField("e:corpname|e:famname|e:function|e:genreform|e:geogname|e:occupation|e:persname|e:subject|e:title", Heading)
    "list of :class:`Heading` - any allowed control access terms, in whatever order they appear"

    controlaccess = xmlmap.NodeListField("e:controlaccess", "self")
    "list of :class:`ControlledAccessHeadings` - recursive mapping to `controlaccess`"


@six.python_2_unicode_compatible
class Container(_EadBase):
    """
    Container - :class:`DescriptiveIdentification` subelement for locating materials.

    Expected node element passed to constructor: `did/container`.
    """
    type = xmlmap.StringField("@type")
    "type - `@type`"
    value = xmlmap.StringField(".")
    "text value - (contents of the container element)"

    def __str__(self):
        return self.value


@six.python_2_unicode_compatible
class DateField(_EadBase):
    """
    DateField - for access to date and unitdate elements value and attributes.
    When converted to unicode, will be the non-normalized version of the date
    in the text content of the element.
    """
    normalized = xmlmap.StringField("@normal")
    "normalized form of the date - `@normal`"
    calendar = xmlmap.StringField("@calendar")
    "calendar (e.g. gregorian) - `@calendar`"
    era = xmlmap.StringField("@era")
    "era (e.g. ce) - `@era`"
    value = xmlmap.StringField(".")
    "human-readable date - (contents of the date element)"

    def __str__(self):
        return self.value


class Unitid(_EadBase):
    '''Unitid element'''
    ROOT_NAME = 'unitid'
    identifier = xmlmap.IntegerField('@identifier')
    'machine-readable identifier - `@identifier`'
    country_code = xmlmap.StringField('@countrycode')
    'country code - `@countrycode`'
    repository_code = xmlmap.StringField('@repositorycode')
    'repository code - `@repositorycode`'
    value = xmlmap.StringField('.')
    "human-readable unitid - (contents of the element)"


class UnitTitle(_EadBase):
    ROOT_NAME = 'unittitle'
    unitdate = xmlmap.NodeField("e:unitdate", DateField)
    "unit date"

    text = xmlmap.StringField('text()')
    'text in this field'

    @property
    def short(self):
        '''Short-form of the unit title, excluding any unit date, as an instance
        of :class:`~eulxml.xmlmap.eadmap.UnitTitle` . Can be used with formatting
        anywhere the full form of the unittitle can be used.'''
        # if there is no unitdate to remove, just return the current object
        if not self.unitdate:
            return self

        # preserve any child elements (e.g., title or emph)
        # initialize a unittitle with a *copy* of the current node
        ut = UnitTitle(node=deepcopy(self.node))
        # remove the unitdate node and return
        ut.node.remove(ut.unitdate.node)
        return ut
        # not caching the modified node because the main node could be modified
        # and the short version should reflect any changes made


class DigitalArchivalObject(_EadBase):
    'Digital Archival Object (`dao` element)'
    ROOT_NAME = 'dao'
    audience = xmlmap.StringField('@audience')
    'audience (internal or external)'
    id = xmlmap.StringField("@id")
    'identifier'
    title = xmlmap.StringField("@xlink:title")
    'title'
    href = xmlmap.StringField("@xlink:href")
    'url where the digital archival object can be accessed'
    show = xmlmap.StringField("@xlink:show")
    'attribute to determine how the resource should be displayed'


class DescriptiveIdentification(_EadBase):
    """Descriptive Information (`did` element) for materials in a component"""
    ROOT_NAME = 'did'
    unitid = xmlmap.NodeField("e:unitid", Unitid)
    ":class:`Unitid` - `unitid`"
    unittitle = xmlmap.NodeField("e:unittitle", UnitTitle)
    "unit title - `unittitle`"
    unitdate = xmlmap.NodeField(".//e:unitdate", DateField)
    "unit date - `.//unitdate` can be anywhere under the DescriptiveIdentification"
    physdesc = xmlmap.StringField("e:physdesc")
    "physical description - `physdesc`"
    abstract = xmlmap.NodeField('e:abstract', xmlmap.XmlObject)
    "abstract - `abstract`"
    langmaterial = xmlmap.StringField("e:langmaterial")
    "language of materials - `langmaterial`"
    origination = xmlmap.StringField("e:origination", normalize=True)
    "origination - `origination`"
    location = xmlmap.StringField("e:physloc")
    "physical location - `physloc`"
    container = xmlmap.NodeListField("e:container", Container)
    ":class:`Container` - `container`"
    dao_list = xmlmap.NodeListField("e:dao", DigitalArchivalObject)
    "list of digital archival object references as :class:`DigitalArchivalObject`"


class Component(_EadBase):
    """Generic component `cN` (`c1`-`c12`) element - a subordinate component of the materials"""
    level = xmlmap.StringField("@level")
    "level of the component - `@level`"
    id = xmlmap.StringField("@id")
    "component id - `@id`"
    did = xmlmap.NodeField("e:did", DescriptiveIdentification)
    ":class:`DescriptiveIdentification` - `did`"
    # FIXME: these sections overlap significantly with those in archdesc; share/inherit?
    use_restriction = xmlmap.NodeField("e:userestrict", Section)
    "usage restrictions :class:`Section` - `userestrict`"
    alternate_form = xmlmap.NodeField("e:altformavail", Section)
    "alternative form available :class:`Section` - `altformavail`"
    originals_location = xmlmap.NodeField("e:originalsloc", Section)
    "location of originals :class:`Section` - `originalsloc`"
    related_material = xmlmap.NodeField("e:relatedmaterial", Section)
    "related material :class:`Section` - `relatedmaterial`"
    separated_material = xmlmap.NodeField("e:separatedmaterial", Section)
    "separated material :class:`Section` - `separatedmaterial`"
    acquisition_info = xmlmap.NodeField("e:acqinfo", Section)
    "acquistion info :class:`Section` - `acqinfo`"
    custodial_history = xmlmap.NodeField("e:custodhist", Section)
    "custodial history :class:`Section` - `custodhist`"
    preferred_citation = xmlmap.NodeField("e:prefercite", Section)
    "preferred citation :class:`Section` - `prefercite`"
    biography_history = xmlmap.NodeField("e:bioghist", Section)
    "biography or history :class:`Section` - `bioghist`"
    bibliography = xmlmap.NodeField("e:bibliography", Section)
    "bibliography :class:`Section` - `bibliograhy`"
    scope_content = xmlmap.NodeField("e:scopecontent", Section)
    "scope and content :class:`Section` - `scopecontent`"
    process_info = xmlmap.NodeField("e:processinfo", Section)
    "processing infomration :class:`Section` - `processinfo`"
    arrangement = xmlmap.NodeField("e:arrangement", Section)
    "arrangement :class:`Section` - `arrangement`"
    other = xmlmap.NodeField("e:otherfindaid", Section)
    "other finding aid :class:`Section` - `otherfindaid`"
    use_restriction = xmlmap.NodeField("e:userestrict", Section)
    "use restrictions :class:`Section` - `userestrict`"
    access_restriction = xmlmap.NodeField("e:accessrestrict", Section)
    "access restrictions :class:`Section` - `accessrestrict`"
    dao_list = xmlmap.NodeListField("e:dao", DigitalArchivalObject)
    "list of digital archival object references as :class:`DigitalArchivalObject`"

    c = xmlmap.NodeListField("e:c02|e:c03|e:c04|e:c05|e:c06|e:c07|e:c08|e:c09|e:c10|e:c11|e:c12", "self")
    "list of :class:`Component` - recursive mapping to any c-level 2-12; `c02|c03|c04|c05|c06|c07|c08|c09|c10|c11|c12`"

    # using un-numbered mapping for c-series or container lists
    def hasSubseries(self):
        """Check if this component has subseries or not.

           Determined based on level of first subcomponent (series or subseries)
           or if first component has subcomponents present.

            :rtype: boolean
        """
        if self.c and self.c[0] and ((self.c[0].level in ('series', 'subseries')) or
                                     (self.c[0].c and self.c[0].c[0])):
            return True
        else:
            return False


class SubordinateComponents(Section):
    """Description of Subordinate Components (dsc element); container lists and series.

       Expected node element passed to constructor: `ead/archdesc/dsc`.
    """
    ROOT_NAME = 'dsc'

    type = xmlmap.StringField("@type")
    "type of component - `@type`"
    c = xmlmap.NodeListField("e:c01", Component)
    "list of :class:`Component` - `c01`; list of c01 elements directly under this section"

    def hasSeries(self):
        """Check if this finding aid has series/subseries.

           Determined based on level of first component (series) or if first
           component has subcomponents present.

           :rtype: boolean
        """
        if len(self.c) and (self.c[0].level == 'series' or (self.c[0].c and self.c[0].c[0])):
            return True
        else:
            return False


@six.python_2_unicode_compatible
class Reference(_EadBase):
    """Internal linking element that may contain text.

    Expected node element passed to constructor: `ref`.
    """
    ROOT_NAME = 'ref'
    type = xmlmap.StringField("@xlink:type")
    "link type - `xlink:type`"
    target = xmlmap.StringField("@target")
    "link target"
    value = xmlmap.NodeField(".", xmlmap.XmlObject)
    "text content of the reference"
    # TODO: add mappings for other relevant reference and link attributes

    def __str__(self):
        return self.value


class PointerGroup(_EadBase):
    """Group of pointer or reference elements in an index entry

    Expected node element passed to constructor: `ptrgrp`.
    """
    ROOT_NAME = 'ptrgrp'
    ref = xmlmap.NodeListField("e:ref", Reference)
    "list of :class:`Reference` - references"


class IndexEntry(_EadBase):
    "Index entry in an archival description index."
    ROOT_NAME = 'indexentry'
    name = xmlmap.NodeField("e:corpname|e:famname|e:function|e:genreform|e:geogname|e:name|e:namegrp|e:occupation|e:persname|e:title|e:subject",
                            xmlmap.XmlObject)
    "access element, e.g. name or subject"
    ptrgroup = xmlmap.NodeField("e:ptrgrp", PointerGroup)
    ":class:`PointerGroup` - group of references for this index entry"


class Index(Section):
    """Index (index element); list of key terms and reference information.

       Expected node element passed to constructor: `ead/archdesc/index`.
    """
    ROOT_NAME = 'index'
    entry = xmlmap.NodeListField("e:indexentry", IndexEntry)
    "list of :class:`IndexEntry` - `indexentry`; entry in the index"
    id = xmlmap.StringField("@id")
    note = xmlmap.NodeField("e:note", Note)
    ":class:`Note`"


class ArchivalDescription(_EadBase):
    """Archival description, contains the bulk of the information in an EAD document.

      Expected node element passed to constructor: `ead/archdesc`.
      """
    ROOT_NAME = 'archdesc'
    did = xmlmap.NodeField("e:did", DescriptiveIdentification)
    'descriptive identification :class:`DescriptiveIdentification` - `did`'
    origination = xmlmap.StringField("e:did/e:origination", normalize=True)
    "origination - `did/origination`"
    unitid = xmlmap.NodeField("e:did/e:unitid", Unitid)
    ":class:`Unitid` - `did/unitid`"
    extent = xmlmap.StringListField("e:did/e:physdesc/e:extent")
    "extent from the physical description - `did/physdesc/extent`"
    langmaterial = xmlmap.StringField("e:did/e:langmaterial")
    "language of the materials - `did/langmaterial`"
    location = xmlmap.StringField("e:did/e:physloc")
    "physical location - `did/physloc`"
    access_restriction = xmlmap.NodeField("e:accessrestrict", Section)
    "access restrictions :class:`Section` - `accessrestrict`"
    use_restriction = xmlmap.NodeField("e:userestrict", Section)
    "use restrictions :class:`Section` - `userestrict`"
    alternate_form = xmlmap.NodeField("e:altformavail", Section)
    "alternative form available :class:`Section` - `altformavail`"
    originals_location = xmlmap.NodeField("e:originalsloc", Section)
    "location of originals :class:`Section` - `originalsloc`"
    related_material = xmlmap.NodeField("e:relatedmaterial", Section)
    "related material :class:`Section` - `relatedmaterial`"
    separated_material = xmlmap.NodeField("e:separatedmaterial", Section)
    "separated material :class:`Section` - `separatedmaterial`"
    acquisition_info = xmlmap.NodeField("e:acqinfo", Section)
    "acquistion info :class:`Section` - `acqinfo`"
    custodial_history = xmlmap.NodeField("e:custodhist", Section)
    "custodial history :class:`Section` - `custodhist`"
    preferred_citation = xmlmap.NodeField("e:prefercite", Section)
    "preferred citation :class:`Section` - `prefercite`"
    biography_history = xmlmap.NodeField("e:bioghist", Section)
    "biography or history :class:`Section` - `bioghist`"
    bibliography = xmlmap.NodeField("e:bibliography", Section)
    "bibliography :class:`Section` - `bibliograhy`"
    scope_content = xmlmap.NodeField("e:scopecontent", Section)
    "scope and content :class:`Section` - `scopecontent`"
    process_info = xmlmap.NodeField("e:archdesc/e:processinfo", Section)
    "processing information :class:`Section` - `processinfo`"
    arrangement = xmlmap.NodeField("e:arrangement", Section)
    "arrangement :class:`Section` - `arrangement`"
    other = xmlmap.NodeField("e:otherfindaid", Section)
    "other finding aid :class:`Section` - `otherfindaid`"
    controlaccess = xmlmap.NodeField("e:controlaccess", ControlledAccessHeadings)
    ":class:`ControlledAccessHeadings` - `controlaccess`; subject terms, names, etc."
    index = xmlmap.NodeListField("e:index", Index)
    "list of :class:`Index` - `index`; e.g., index of selected correspondents"
    dao_list = xmlmap.NodeListField("e:dao", DigitalArchivalObject)
    "list of digital archival object references as :class:`DigitalArchivalObject`"


class Address(_EadBase):
    """Address information.

      Expected node element passed to constructor: `address`.
    """
    ROOT_NAME = 'address'
    lines = xmlmap.StringListField("e:addressline")
    "list of lines in an address - `line`"


class PublicationStatement(_EadBase):
    """Publication information for an EAD document.

    Expected node element passed to constructor: `ead/eadheader/filedesc/publicationstmt`.
    """
    ROOT_NAME = 'publicationstmt'
    date = xmlmap.NodeField("e:date", DateField)
    ":class:`DateField` - `date`"
    publisher = xmlmap.StringField("e:publisher")
    "publisher - `publisher`"
    address = xmlmap.NodeField("e:address", Address)
    "address of publication/publisher - `address`"


class ProfileDescription(_EadBase):
    """Profile Descriptor for an EAD document.
       Expected node element passed to constructor: 'ead/eadheader/profiledesc'.
    """
    ROOT_NAME = 'profiledesc'
    date = xmlmap.NodeField("e:creation/e:date", DateField)
    ":class:`DateField` - `creation/date`"
    languages = xmlmap.StringListField("e:langusage/e:language")
    "language information - `langusage/language`"
    language_codes = xmlmap.StringListField("e:langusage/e:language/@langcode")
    "language codes - `langusage/language/@langcode`"


class FileDescription(_EadBase):
    """Bibliographic information about this EAD document.

      Expected node element passed to constructor: `ead/eadheader/filedesc`.
    """
    ROOT_NAME = 'filedesc'
    publication = xmlmap.NodeField("e:publicationstmt", PublicationStatement)
    "publication information - `publicationstmt`"


class EadId(_EadBase):
    """EAD identifier for a single EAD finding aid document.

    Expected element passed to constructor: `ead/eadheader/eadid`.
    """
    ROOT_NAME = 'eadid'
    country = xmlmap.StringField('@countrycode')
    "country code - `@countrycode`"
    maintenance_agency = xmlmap.StringField('@mainagencycode')
    "maintenance agency - `@mainagencycode`"
    url = xmlmap.StringField('@url')
    "url - `@url`"
    identifier = xmlmap.StringField('@identifier')
    "identifier - `@identifier`"
    value = xmlmap.StringField(".", normalize=True)
    "text content of the eadid node"


class EncodedArchivalDescription(_EadBase):
    """:class:`~eulxml.xmlmap.XmlObject` for an Encoded Archival Description
    (EAD) Finding Aid (Schema-based).  All XPaths use the EAD namespace; this
    class can not be used with non-namespaced, DTD-based EAD.

    Expects node passed to constructor to be top-level `ead` element.
    """

    XSD_SCHEMA = 'http://www.loc.gov/ead/ead.xsd'

    id = xmlmap.StringField('@id')
    "top-level id attribute - `@id`; preferable to use eadid"
    eadid = xmlmap.NodeField('e:eadheader/e:eadid', EadId)
    "ead id :class:`EadId` - `eadheader/eadid`"
    # mappings for fields common to access or display as top-level information
    title = xmlmap.NodeField('e:eadheader/e:filedesc/e:titlestmt/e:titleproper', xmlmap.XmlObject)
    "record title - `eadheader/filedesc/titlestmt/titleproper`"
    author = xmlmap.StringField('e:eadheader/e:filedesc/e:titlestmt/e:author')
    "record author - `eadheader/filedesc/titlestmt/author`"
    unittitle = xmlmap.NodeField('e:archdesc[@level="collection"]/e:did/e:unittitle', UnitTitle)
    """unit title for the archive - `archdesc[@level="collection"]/did/unittitle`"""
    physical_desc = xmlmap.StringField('e:archdesc[@level="collection"]/e:did/e:physdesc')
    """collection level physical description - `archdesc[@level="collection"]/did/physdesc`"""
    abstract = xmlmap.NodeField('e:archdesc[@level="collection"]/e:did/e:abstract', xmlmap.XmlObject)
    """collection level abstract - `archdesc[@level="collection"]/did/abstract`"""
    archdesc = xmlmap.NodeField("e:archdesc", ArchivalDescription)
    ":class:`ArchivalDescription` - `archdesc`"
    # dsc is under archdesc, but is a major section - mapping at top-level for convenience
    dsc = xmlmap.NodeField("e:archdesc/e:dsc", SubordinateComponents)
    ":class:`SubordinateComponents` `archdesc/dsc`; accessible at top-level for convenience"
    file_desc = xmlmap.NodeField("e:eadheader/e:filedesc", FileDescription)
    ":class:`FileDescription` - `filedesc`"
    profiledesc = xmlmap.NodeField("e:eadheader/e:profiledesc", ProfileDescription)
    ":class:`ProfileDescription` - `profiledesc`"