archivesspace/archivesspace

View on GitHub
backend/app/exporters/serializers/ead3.rb

Summary

Maintainability
F
3 wks
Test Coverage
# encoding: utf-8
require_relative 'ead'
class EAD3Serializer < EADSerializer
  serializer_for :ead3

  def valid_children_of_p
    ['abbr', 'corpname', 'date', 'emph', 'expan', 'famname', 'footnote', 'foreign', 'function', 'genreform',
      'geogname', 'lb', 'list', 'name', 'num', 'occupation', 'persname', 'ptr', 'quote', 'ref', 'subject', 'title']
  end


  def valid_children_of_unmixed_elements(element_name)
    common_children = [ 'blockquote', 'chronlist', 'head', 'list', 'p', 'table' ]
    valid_children_map = {}
    standard_elements = ['accessrestrict', 'accruals', 'acqinfo', 'altformavail', 'appraisal', 'arrangement', 'bioghist',
      'custodhist', 'fileplan', 'legalstatus', 'odd', 'originalsloc', 'phystech', 'prefercite',
      'processinfo', 'scopecontent', 'userestrict']

    standard_elements.each do |e|
      valid_children_map[e] = [e] + common_children
    end

    valid_children_map['bibliography'] = [ 'archref', 'bibliography', 'bibref' ] + common_children
    valid_children_map['controlaccess'] = [ 'controlaccess', 'corpname', 'famname', 'function', 'genreform', 'geogname',
      'name', 'occupation', 'persname', 'subject', 'title' ] + common_children
    valid_children_map['controlnote'] = [ 'blockquote', 'chronlist', 'list', 'p', 'table' ]
    valid_children_map['descriptivenote'] = [ 'p' ]
    valid_children_map['editionstmt'] = [ 'edition', 'p' ]
    valid_children_map['footnote'] = [ 'blockquote', 'chronlist', 'list', 'p', 'table' ]
    valid_children_map['index'] = [ 'index', 'indexentry', 'listhead' ] + common_children
    valid_children_map['otherfindaid'] = [ 'archref', 'bibref', 'otherfindaid' ] + common_children
    valid_children_map['publicationstmt'] = [ 'address', 'date', 'num', 'p', 'publisher' ]
    valid_children_map['relatedmaterial'] = [ 'archref', 'bibref', 'relatedmaterial' ] + common_children
    valid_children_map['separatedmaterial'] = [ 'archref', 'bibref', 'separatedmaterial' ] + common_children
    valid_children_map['seriesstmt'] = [ 'num', 'p', 'titleproper' ]
    valid_children_map[element_name] || nil
  end


  def valid_children_of_mixed_elements(element_name)
    valid_children_map = {}
    valid_children_map['p'] = valid_children_of_p
    valid_children_map['archref'] = valid_children_of_p - ['list']
    valid_children_map['bibref'] = valid_children_of_p - ['list']
    ['head', 'date', 'emph', 'num', 'quote', 'physdesc'].each do |e|
      valid_children_map[e] = ['abbr', 'emph', 'expan', 'foreign', 'lb', 'ptr', 'ref']
    end
    valid_children_map[element_name] || nil
  end


  def closed_list_attributes
    ['actuate', 'align', 'approximate', 'audience', 'colsep', 'countryencoding',
      'coverage', 'daotype', 'dateencoding', 'dsctype', 'frame', 'langencoding', 'level', 'listtype', 'mark',
      'numeration', 'parallel', 'pgwide', 'physdescstructuredtype', 'relationtype', 'render',
      'repositoryencoding', 'rowsep', 'scriptencoding', 'show', 'unitdatetype', 'valign', 'value']
  end


  def localtype_applicable_elements
    ['abstract', 'materialspec', 'accessrestrict', 'altformavail', 'archdesc', 'container',
      'originalsloc', 'phystech', 'processinfo', 'relatedmaterial', 'separatedmaterial', 'titleproper', 'title',
      'unitid', 'unittitle', 'userestrict', 'odd', 'note', 'date', 'name', 'persname', 'famname', 'corpname',
      'subject', 'occupation', 'genreform', 'function', 'num', 'physloc', 'extent', 'descgrp']
  end


  def access_elements
    ['corpname', 'famname', 'function', 'genreform', 'geogname', 'name',
      'occupation', 'persname', 'subject', 'title']
  end


  def list_numeration_value(value)
    case value
    when 'arabic'
      'decimal'
    when 'loweralpha'
      'lower-alpha'
    when 'upperalpha'
      'upper-alpha'
    when 'lowerroman'
      'lower-roman'
    when 'upperroman'
      'upper-roman'
    else
      value
    end
  end


  # Use to specify new names for EAD 2002 attributes
  # nil value indicates attribute should be removed
  def attribute_replacements(element_name=nil)
    replacements = {
      'list' => {
        'type' => 'listtype'
      },
      'ref' => {
        'title' => 'linktitle'
      },
      'language' => {
        'scriptcode' => 'script'
      }
    }
    element_name ? replacements[element_name] : replacements
  end


  def fragment_has_unwrapped_text?(fragment_or_element)
    text = ''
    fragment_or_element.children.each do |e|
      if e.text?
        text << e.inner_text
      end
    end
    text.strip.length > 0
  end


  def has_unwrapped_text?(content)
    fragment = Nokogiri::XML::DocumentFragment.parse(content)
    fragment_has_unwrapped_text?(fragment)
  end


  # Allow plugins to hook in to record processing by providing their own
  # serialization step (a class with a 'call' method accepting the arguments
  # defined in `run_serialize_step`.

  def self.add_serialize_step(serialize_step)
    @extra_serialize_steps ||= []
    @extra_serialize_steps << serialize_step
  end


  def self.run_serialize_step(data, xml, fragments, context)
    Array(@extra_serialize_steps).each do |step|
      step.new.call(data, xml, fragments, context)
    end
  end


  def prefix_id(id)
    if id.nil? or id.empty? or id == 'null'
      ""
    elsif id =~ /^#{@id_prefix}/
      id
    else
      "#{@id_prefix}#{id}"
    end
  end


  def xml_errors(content)
    # there are message we want to ignore. annoying that java xml lib doesn't
    # use codes like libxml does...
    ignore = [ /Namespace prefix .* is not defined/, /The prefix .* is not bound/ ]
    ignore = Regexp.union(ignore)
    # the "wrap" is just to ensure that there is a psuedo root element to eliminate a "false" error
    Nokogiri::XML("<wrap>#{content}</wrap>").errors.reject { |e| e.message =~ ignore }
  end


  def escape_ampersands(content)
    # first, find any pre-escaped entities and "mark" them by replacing & with @@
    # so something like &lt; becomes @@lt;
    # and &#1234 becomes @@#1234

    content.gsub!(/&\w+;/) {|t| t.gsub('&', '@@')}
    content.gsub!(/&#\d{4}/) {|t| t.gsub('&', '@@')}
    content.gsub!(/&#\d{3}/) {|t| t.gsub('&', '@@')}

    # now we know that all & characters remaining are not part of some pre-escaped entity, and we can escape them safely
    content.gsub!('&', '&amp;')

    # 'unmark' our pre-escaped entities
    content.gsub!(/@@\w+;/) {|t| t.gsub('@@', '&')}
    content.gsub!(/@@#\d{4}/) {|t| t.gsub('@@', '&')}
    content.gsub!(/@@#\d{3}/) {|t| t.gsub('@@', '&')}

    # only allow predefined XML entities, otherwise convert ampersand so XML will validate
    valid_entities = ['&quot;', '&amp;', '&apos;', '&lt;', '&gt;']
    content.gsub!(/&\w+;/) { |t| valid_entities.include?(t) ? t : t.gsub(/&/, '&amp;') }

    return content
  end


  def structure_children(content, parent_name = nil)
    # 4archon...
    content.gsub!("\n\t", "\n\n")

    content.strip!

    original_content = content

    content = escape_ampersands(content)

    valid_children = valid_children_of_unmixed_elements(parent_name)

    # wrap text in <p> if it isn't already
    p_wrap = lambda do |text|
      text.chomp!
      text.strip!
      if text =~ /^<p(\s|\/|>)/
        if !(text =~ /<\/p>$/)
          text += '</p>'
        end
      else
        text = "<p>#{ text }</p>"
      end
      return text
    end

    # this should only be called if the text fragment only has element children
    p_wrap_invalid_children = lambda do |text|
      text.strip!
      if valid_children
        fragment = Nokogiri::XML::DocumentFragment.parse(text)
        new_text = ''
        fragment.element_children.each do |e|
          if valid_children.include?(e.name)
            new_text << e.to_s
          else
            new_text << "<p>#{ e.to_s }</p>"
          end
        end
        return new_text
      else
        return p_wrap.call(text)
      end
    end

    if !has_unwrapped_text?(content)
      content = p_wrap_invalid_children.call(content)
    else
      return content if content.length < 1
      new_content = ''
      blocks = content.split("\n\n").select { |b| !b.strip.empty? }
      blocks.each do |b|
        if has_unwrapped_text?(b)
          new_content << p_wrap.call(b)
        else
          new_content << p_wrap_invalid_children.call(b)
        end
      end
      content = new_content
    end

    ## REMOVED 2018-09 - leaving here for future reference
    # first lets see if there are any &
    # note if there's a &somewordwithnospace , the error is EntityRef and wont
    # be fixed here...
    # if xml_errors(content).any? { |e| e.message.include?("The entity name must immediately follow the '&' in the entity reference.") }
    #   content.gsub!("& ", "&amp; ")
    # end
    # END - REMOVED 2018-09

    # in some cases adding p tags can create invalid markup with mixed content
    # just return the original content if there's still problems
    xml_errors(content).any? ? original_content : content
  end


  def strip_p(content)
    content = escape_ampersands(content)
    content.gsub("<p>", "").gsub("</p>", "").gsub("<p/>", '')
  end


  def remove_smart_quotes(content)
    content = content.gsub(/\xE2\x80\x9C/, '"').gsub(/\xE2\x80\x9D/, '"').gsub(/\xE2\x80\x98/, "\'").gsub(/\xE2\x80\x99/, "\'")
  end


  def sanitize_mixed_content(content, context, fragments, allow_p = false )
    # remove smart quotes from text
    content = remove_smart_quotes(content)

    # br's should be self closing
    content = content.gsub("<br>", "<br/>").gsub("</br>", '')

    ## moved this to structure_children and strop_p for easier testablity
    ## leaving this reference here in case you thought it should go here
    # content = escape_ampersands(content)

    if allow_p
      content = structure_children(content, context.parent.name)
    else
      content = strip_p(content)
    end

    # convert & to @@ before generating XML fragments for processing
    content.gsub!(/&/, '@@')

    content = convert_ead2002_markup(content)

    # convert @@ back to & on return value
    content.gsub!(/@@/, '&')

    begin
      if ASpaceExport::Utils.has_html?(content)
        context.text( fragments << content )
      else
        context.text content.gsub("&amp;", "&") #thanks, Nokogiri
      end
    rescue
      context.cdata content
    end
  end


  def convert_ead2002_markup(content)
    apply_changes = lambda do |fn, fragment|
      fragment.element_children.each do |e|
        fn.(e)
        if !e.element_children.empty?
          e.element_children.each { |ec| fn.(ec) }
        end
      end
      fragment
    end

    strip_attribute_namespace_prefixes = lambda do |e|
      e.attributes.each do |k, a|
        a.name = a.name.gsub(/^[A-Za-z0-9]*\:/, '')
      end
    end

    convert_extref = lambda do |e|
      if e.name == 'extref'
        e.name = 'ref'
        e.remove_attribute('type') if e['type']
      end
    end

    convert_attribute_names = lambda do |e|
      e.attributes.each do |k, a|
        if replace = attribute_replacements(e.name)
          if new_name = replace[a.name]
            a.name = new_name
          end
        end
        if a.name == 'authfilenumber'
          a.name = 'identifier'
        end
      end
    end

    # must run after convert_attribute_names
    convert_list_attribute_values = lambda do |e|
      if e.name == 'list'
        if e['listtype']
          case e['listtype']
          when 'simple'
            e.remove_attribute('listtype')
          when 'marked'
            e['listtype'] = 'unordered'
          when 'deflist', 'ordered'
            # leave
          else
            e.remove_attribute('listtype')
          end
        end
        if e['numeration']
          e['numeration'] = list_numeration_value(e['numeration'])
        end
      end
    end

    convert_type_to_localtype = lambda do |e|
      if localtype_applicable_elements.include? e.name
        if a = e.attribute('type')
          a.name = 'localtype'
        end
      end
    end

    wrap_access_terms_in_part = lambda do |e|
      if access_elements.include? e.name
        e.children.each do |c|
          if c.text?
            part_wrapped_text = "<part>#{ c.inner_text }</part>"
            c.replace(part_wrapped_text)
          end
        end
      end
    end

    downcase_closed_list_attribute_values = lambda do |e|
      e.attributes.each do |k, a|
        if closed_list_attributes.include? a.name
          e[a.name] = a.value.downcase
        end
      end
    end

    strip_invalid_children_of_mixed_elements = lambda do |e|
      children = e.element_children
      if !children.empty?
        if (valid_children = valid_children_of_mixed_elements(e.name))
          children.each do |el|
            if !valid_children.include?(el.name) && el.inner_text
              el.replace( el.inner_text.gsub(/\s+/, ' ') )
            end
          end
        end
      end
    end

    strip_text_content = lambda do |e|
      if e.element_children.empty? && e.inner_text
        e.content = e.inner_text.strip
      end
    end

    temp_doc = Nokogiri::XML::Document.new
    temp_doc.encoding = "UTF-8"
    fragment = Nokogiri::XML::DocumentFragment.new(temp_doc, content)

    process_fragment = lambda do |f|
      apply_changes.(strip_attribute_namespace_prefixes, f)
      apply_changes.(convert_extref, f)
      apply_changes.(convert_attribute_names, f)
      apply_changes.(convert_list_attribute_values, f)
      apply_changes.(convert_type_to_localtype, f)
      apply_changes.(wrap_access_terms_in_part, f)
      apply_changes.(downcase_closed_list_attribute_values, f)
      apply_changes.(strip_invalid_children_of_mixed_elements, f)
      apply_changes.(strip_text_content, f)

      f.element_children.each do |e|
        process_fragment.(e)
      end
    end

    process_fragment.(fragment)

    fragment.inner_html
  end


  def strip_tags_and_sanitize(content, context, fragments)
    content.gsub!(/\<[^\>]*\>/, '')
    sanitize_mixed_content(content, context, fragments)
  end


  def stream(data)
    @stream_handler = ASpaceExport::StreamHandler.new
    @fragments = ASpaceExport::RawXMLHandler.new
    @include_unpublished = data.include_unpublished?
    @include_daos = data.include_daos?
    @use_numbered_c_tags = data.use_numbered_c_tags?
    @id_prefix = I18n.t('archival_object.ref_id_export_prefix', :default => 'aspace_')

    builder = Nokogiri::XML::Builder.new(:encoding => "UTF-8") do |xml|
      begin
        ead_attributes = {}

        if data.publish === false
          ead_attributes['audience'] = 'internal'
        end

        xml.ead( ead_attributes ) {

          xml.text (
            @stream_handler.buffer { |xml, new_fragments|
              serialize_control(data, xml, new_fragments)
            }
          )

          atts = {:level => data.level, :otherlevel => data.other_level}
          atts.reject! {|k, v| v.nil?}

          xml.archdesc(atts) {

            xml.did {

              unless data.title.nil?
                xml.unittitle { sanitize_mixed_content(data.title, xml, @fragments) }
              end

              xml.unitid (0..3).map { |i| data.send("id_#{i}") }.compact.join('.')

              handle_arks(data, xml)

              serialize_aspace_uri(data, xml)

              unless data.repo.nil? || data.repo.name.nil?
                xml.repository {
                  xml.corpname {
                    xml.part {
                      sanitize_mixed_content(data.repo.name, xml, @fragments)
                    }
                  }
                }
              end

              unless (languages = data.lang_materials).empty?
                serialize_languages(languages, xml, @fragments)
              end

              data.instances_with_sub_containers.each do |instance|
                serialize_container(instance, xml, @fragments)
              end

              serialize_extents(data, xml, @fragments)

              serialize_dates(data, xml, @fragments)

              serialize_did_notes(data, xml, @fragments)

              serialize_origination(data, xml, @fragments)

              if @include_unpublished
                data.external_ids.each do |exid|
                  xml.unitid ({ "audience" => "internal", "type" => exid['source'], "identifier" => exid['external_id']}) { xml.text exid['external_id']}
                end
              end

              EAD3Serializer.run_serialize_step(data, xml, @fragments, :did)

              # Change from EAD 2002: dao must be children of did in EAD3, not archdesc
              data.digital_objects.each do |dob|
                serialize_digital_object(dob, xml, @fragments)
              end

            }# </did>

            serialize_nondid_notes(data, xml, @fragments)

            serialize_bibliographies(data, xml, @fragments)

            serialize_indexes(data, xml, @fragments)

            serialize_controlaccess(data, xml, @fragments)

            EAD3Serializer.run_serialize_step(data, xml, @fragments, :archdesc)

            xml.dsc {

              data.children_indexes.each do |i|
                xml.text( @stream_handler.buffer {
                  |xml, new_fragments| serialize_child(data.get_child(i), xml, new_fragments)
                  }
                )
              end
            }
          }
        }

      rescue => e
        xml.text "ASPACE EXPORT ERROR : YOU HAVE A PROBLEM WITH YOUR EXPORT OF YOUR RESOURCE. THE FOLLOWING INFORMATION MAY HELP:\n
                  MESSAGE: #{e.message.inspect}  \n
                  TRACE: #{e.backtrace.inspect} \n "
      end
    end

    # Add xml-model for rng
    # Make this conditional if XSD or DTD are requested
    xmlmodel_content = 'href="https://raw.githubusercontent.com/SAA-SDT/EAD3/master/ead3.rng"
      type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"'

    xmlmodel = Nokogiri::XML::ProcessingInstruction.new(builder.doc, "xml-model", xmlmodel_content)
    builder.doc.root.add_previous_sibling(xmlmodel)
    builder.doc.root.add_namespace nil, 'http://ead3.archivists.org/schema/'

    Enumerator.new do |y|
      @stream_handler.stream_out(builder, @fragments, y)
    end
  end # END stream


  def serialize_control(data, xml, fragments)
    control_atts = {
      repositoryencoding: "iso15511",
      countryencoding: "iso3166-1",
      dateencoding: "iso8601",
      relatedencoding: "marc",
      langencoding: "iso639-2b",
      scriptencoding: "iso15924"
    }.reject {|k, v| v.nil? || v.empty? || v == "null"}

    xml.control(control_atts) {
      ins_url = data.ead_location

      if AppConfig[:arks_enabled] && data.ark_name && (current_ark = data.ark_name.fetch('current', nil))
        ins_url = current_ark
      end

      recordid_atts = {
        instanceurl: ins_url
      }

      xml.recordid(recordid_atts) {
        xml.text(data.ead_id)
      }

      xml.filedesc {

        xml.titlestmt {
          # titleproper
          titleproper = ""
          titleproper += "#{data.finding_aid_title} " if data.finding_aid_title
          titleproper += "#{data.title}" if ( data.title && titleproper.empty? )
          xml.titleproper { strip_tags_and_sanitize(titleproper, xml, fragments) }

          # titleproper (filing)
          unless data.finding_aid_filing_title.nil?
            xml.titleproper("localtype" => "filing") {
              sanitize_mixed_content(data.finding_aid_filing_title, xml, fragments)
            }
          end

          # subtitle
          unless data.finding_aid_subtitle.nil?
            xml.subtitle {
              sanitize_mixed_content(data.finding_aid_subtitle, xml, fragments)
            }
          end

          # author
          unless data.finding_aid_author.nil?
            xml.author {
              sanitize_mixed_content(data.finding_aid_author, xml, fragments)
            }
          end

          # sponsor
          unless data.finding_aid_sponsor.nil?
            xml.sponsor {
              sanitize_mixed_content( data.finding_aid_sponsor, xml, fragments)
            }
          end
        }

        unless data.finding_aid_edition_statement.nil?
          xml.editionstmt {
            sanitize_mixed_content(data.finding_aid_edition_statement, xml, fragments, true )
          }
        end

        xml.publicationstmt {

          xml.publisher { sanitize_mixed_content(data.repo.name, xml, fragments) }

          repo_addresslines = data.addresslines_keyed

          unless repo_addresslines.empty?
            xml.address {

              repo_addresslines.each do |key, line|
                if key.start_with?('telephone')
                  addressline_atts = { localtype: line[0] }
                  xml.addressline(addressline_atts) {
                    sanitize_mixed_content(line[1], xml, fragments)
                  }
                elsif key == 'email'
                  addressline_atts = { localtype: key }
                  xml.addressline(addressline_atts) {
                    sanitize_mixed_content(line, xml, fragments)
                  }
                else
                  xml.addressline { sanitize_mixed_content( line, xml, fragments) }
                end
              end

              if data.repo.url
                xml.addressline {
                  xml.ref ({ href: data.repo.url, linktitle: data.repo.url, show: "new" }) {
                    xml.text(data.repo.url)
                  }
                }
              end
            }
          end

          if (data.finding_aid_date)
            xml.date { sanitize_mixed_content( data.finding_aid_date, xml, fragments) }
          end

          num = (0..3).map { |i| data.send("id_#{i}") }.compact.join('.')
          unless num.empty?
            xml.num() {
              xml.text(num)
            }
          end

          if data.repo.image_url
            xml.p {
              xml.ptr ({
                href: data.repo.image_url,
                actuate: "onload",
                show: "embed"
              })
            }
          end

          data.metadata_rights_declaration_in_publicationstmt do |mrd|
            xml.p (mrd["descriptive_note"])
          end
        }

        if (data.finding_aid_series_statement)
          xml.seriesstmt {
            sanitize_mixed_content( data.finding_aid_series_statement, xml, fragments, true )
          }
        end

        if ( data.finding_aid_note )
          xml.notestmt {
            xml.controlnote {
              sanitize_mixed_content( data.finding_aid_note, xml, fragments, true )
            }
          }
        end
      } # END filedesc

      xml.maintenancestatus( { value: 'derived' } )

      maintenanceagency_atts = {
        countrycode: data.repo.country
      }.delete_if { |k, v| v.nil? || v.empty? }

      xml.maintenanceagency(maintenanceagency_atts) {

        unless data.repo.org_code.nil?
          agencycode = data.repo.country ? "#{data.repo.country}-" : ''
          agencycode += data.repo.org_code
          xml.agencycode() {
            xml.text(agencycode)
          }
        end

        xml.agencyname() {
          xml.text(data.repo.name)
        }
      }

      unless data.finding_aid_language.nil?
        xml.languagedeclaration() {

          xml.language({ langcode: "#{data.finding_aid_language}"}) {
            xml.text(I18n.t("enumerations.language_iso639_2.#{data.finding_aid_language}"))
          }

          xml.script({ scriptcode: "#{data.finding_aid_script}" }) {
            xml.text(I18n.t("enumerations.script_iso15924.#{data.finding_aid_script}"))
          }

          unless data.finding_aid_language_note.nil?
            xml.descriptivenote {
              sanitize_mixed_content(data.finding_aid_language_note, xml, fragments, true)
            }
          end

        }
      end

      unless data.finding_aid_description_rules.nil?
        xml.conventiondeclaration {
          xml.abbr {
            xml.text(data.finding_aid_description_rules)
          }
          xml.citation {
            xml.text(I18n.t("enumerations.resource_finding_aid_description_rules.#{ data.finding_aid_description_rules}"))
          }
        }
      end

      data.metadata_rights_declaration_in_rightsdeclaration do |mrd|
        xml.rightsdeclaration {
          attributes = { href: mrd["file_uri"] }
          attributes[:arcrole] = mrd["xlink_arcrole_attribute"] if mrd["xlink_arcrole_attribute"]
          attributes[:linkrole] = mrd["xlink_role_attribute"] if mrd["xlink_role_attribute"]
          xml.citation (attributes) {
            if mrd["license"]
              xml.text (I18n.t("enumerations.metadata_license.#{mrd['license']}", :default => mrd['license']))
            end
          }
          if mrd["license"]
            xml.abbr (mrd["license"])
          end
          if mrd["descriptive_note"]
            xml.descriptivenote {

              if mrd["descriptive_note"]
                xml.p (mrd["descriptive_note"])
              end
            }
          end
        }
      end

      if @include_unpublished || data.is_finding_aid_status_published
        finding_aid_status = data.finding_aid_status
      else
        finding_aid_status = nil
      end

      unless finding_aid_status.nil?
        xml.localcontrol( { localtype: 'findaidstatus'} ) {
          xml.term() {
            xml.text(finding_aid_status)
          }
        }
      end

      xml.maintenancehistory() {
        xml.maintenanceevent() {
          xml.eventtype( { value: 'derived' } ) {}
          xml.eventdatetime() {
            xml.text(DateTime.now.to_s)
          }
          xml.agenttype( { value: 'machine' } ) {}
          xml.agent() {
            xml.text("ArchivesSpace #{ ASConstants.VERSION }")
          }
          xml.eventdescription {
            xml.text("This finding aid was produced using ArchivesSpace on #{ DateTime.now.strftime('%A %B %e, %Y at %H:%M') }")
          }
        }

        export_rs = @include_unpublished ? data.revision_statements : data.revision_statements.reject { |rs| !rs['publish'] }
        if export_rs.length > 0
          export_rs.each do |rs|
            xml.maintenanceevent(rs['publish'] ? nil : {:audience => 'internal'}) {
              xml.eventtype( { value: 'revised' } ) {}
              xml.eventdatetime() {
                xml.text(rs['date'].to_s)
              }
              xml.agenttype( { value: 'unknown' } ) {}
              xml.agent() {}
              xml.eventdescription() {
                sanitize_mixed_content( rs['description'], xml, fragments)
              }
            }
          end
        end
      }
    }
  end # END serialize_control


  def serialize_extents(obj, xml, fragments)
    if obj.extents.length
      obj.extents.each do |e|
        next if e["publish"] === false && !@include_unpublished

        # physdescstructuredtype is based on extent_type
        # These mappings only account for the default value options
        physdescstructured_atts = { coverage: e['portion'] }

        if e["publish"] === false
          physdescstructured_atts[:audience] = 'internal'
        end

        case e['extent_type']
        when 'cassettes', 'leaves', 'photographic_prints', 'photographic_slides', 'reels', 'sheets', 'volumes'
          physdescstructured_atts[:physdescstructuredtype] = 'materialtype'
        when 'cubic_feet', 'linear_feet'
          physdescstructured_atts[:physdescstructuredtype] = 'spaceoccupied'
        when 'gigabytes', 'megabytes', 'terabytes'
          physdescstructured_atts[:physdescstructuredtype] = 'otherphysdescstructuredtype'
        else
          physdescstructured_atts[:physdescstructuredtype] = 'spaceoccupied'
        end

        xml.physdescstructured(physdescstructured_atts) {
          if e['number']
            xml.quantity() {
              xml.text(e['number'])
            }
          end

          if e['extent_type']
            xml.unittype() {
              xml.text( I18n.t('enumerations.extent_extent_type.' + e['extent_type'], :default => e['extent_type']) )
            }
          end

          if e['physical_details']
            xml.physfacet() {
              sanitize_mixed_content(e['physical_details'], xml, fragments)
            }
          end

          if e['dimensions']
            xml.dimensions() {
              sanitize_mixed_content(e['dimensions'], xml, fragments)
            }
          end
        }

        if e['container_summary']
          xml.physdesc({ localtype: 'container_summary' }) {
            sanitize_mixed_content( e['container_summary'], xml, fragments)
          }
        end
      end
    end
  end


  def serialize_dates(obj, xml, fragments)
    add_unitdate = Proc.new do |value, context, fragments, atts={}|
      context.unitdate(atts) {
        sanitize_mixed_content( value, context, fragments )
      }
    end

    obj.dates.each do |date|
      next if date["publish"] === false && !@include_unpublished

      date_atts = {
        certainty: date['certainty'] ? date['certainty'] : nil,
        era: date['era'] ? date['era'] : nil,
        calendar: date['calendar'] ? date['calendar'] : nil,
        audience: date['publish'] === false ? 'internal' : nil,
        datechar: date['label'] ? date['label'] : nil
      }

      unless date['date_type'].nil?
        date_atts[:unitdatetype] = date['date_type'] == 'bulk' ? 'bulk' : 'inclusive'
      end

      date_atts.delete_if { |k, v| v.nil? }

      if date['begin'] || date['end']

        xml.unitdatestructured(date_atts) {

          if date['date_type'] == 'single' && date['begin']

            xml.datesingle( { standarddate: date['begin'] } ) {
              value = date['expression'].nil? ? date['begin'] : date['expression']
              xml.text(value)
            }

          else

            xml.daterange() {
              if date['begin']
                xml.fromdate( { standarddate: date['begin'] } ) {
                  xml.text(date['begin'])
                }
              end
              if date['end']
                xml.todate( { standarddate: date['end'] } ) {
                  xml.text(date['end'])
                }
              end
            }
          end
        }

        if date['begin'] && date['end'] && date['expression']
          add_unitdate.call(date['expression'], xml, fragments, date_atts)
        end

      elsif date['expression']
        add_unitdate.call(date['expression'], xml, fragments, date_atts)
      end
    end
  end


  def strip_invalid_children_from_note_content(content, parent_element_name)
    # convert & to @@ before generating XML fragment for processing
    content.gsub!(/&/, '@@')
    fragment = Nokogiri::XML::DocumentFragment.parse(content)
    children = fragment.element_children

    if !children.empty?
      if valid_children = valid_children_of_mixed_elements(parent_element_name)
        children.each do |e|
          if !valid_children.include?(e.name) && e.inner_text
            e.replace( e.inner_text.gsub(/\s+/, ' ') )
          end
        end
      end
    end

    # convert @@ back to & on return value
    fragment.inner_html.gsub(/@@/, '&')
  end


  def serialize_did_notes(data, xml, fragments)
    data.notes.each do |note|
      next if note["publish"] === false && !@include_unpublished
      # SEE backend/app/exporters/lib/export_helpers.rb - did note types valid for both EAD 2002 and EAD3
      next unless data.did_note_types.include?(note['type'])

      atts = {
        audience: note["publish"] === false ? 'internal' : nil,
        id: prefix_id(note['persistent_id'].gsub(/\s/, '_'))
      }
      atts.delete_if { |k, v| v.nil? || v.empty? || v == "null" }

      append_note_content = Proc.new do |note, context, fragments, parent_element_name|
        content = ASpaceExport::Utils.extract_note_text(note, @include_unpublished)
        content = strip_invalid_children_from_note_content(content, parent_element_name)
        sanitize_mixed_content( content, context, fragments, ASpaceExport::Utils.include_p?(note['type']) )
      end

      case note['type']
      when 'dimensions', 'physfacet'
        atts[:label] = note['label'] if note['label']
        xml.physdesc(atts) {
          append_note_content.(note, xml, fragments, 'physdesc')
        }
      when 'physdesc', 'physloc'
        atts[:label] = note['label'] if note['label']
        xml.send(note['type'], atts) {
          append_note_content.(note, xml, fragments, note['type'])
        }
      when 'langmaterial'
        xml.langmaterial(atts) {
          xml.language() {
            append_note_content.(note, xml, fragments, 'language')
          }
        }
      else
        xml.send(note['type'], atts) {
          append_note_content.(note, xml, fragments, note['type'])
        }
      end

    end
  end

  def serialize_languages(languages, xml, fragments)
    language_vals = languages.map {|l| l['language_and_script']}.compact
    # Language and Script subrecords with recorded values in both fields should be exported as <languageset> elements.
    xml.langmaterial {
      language_vals.map {|language|
        if !language['script']
          xml.language(:langcode => language['language']) {
              xml.text I18n.t("enumerations.language_iso639_2.#{language['language']}", :default => language['language'])
            }
        # Language and Script subrecord entries with only a Language value record should be exported as <language> elements.
        else
          xml.languageset {
            xml.language(:langcode => language['language']) {
               xml.text I18n.t("enumerations.language_iso639_2.#{language['language']}", :default => language['language'])
             }
            xml.script(:scriptcode => language['script']) {
              xml.text I18n.t("enumerations.script_iso15924.#{language['script']}", :default => language['script'])
            }
          }
        end
      }
      # Language Text subrecord content should be exported as a <descriptivenote> element
      language_notes = languages.map {|l| l['notes']}.compact.reject {|e| e == [] }.flatten
      if !language_notes.empty?
        language_notes.each do |note|
          content = ASpaceExport::Utils.extract_note_text(note)
          xml.descriptivenote {
            sanitize_mixed_content(content, xml, fragments, true)
          }
        end
      end
    }
  end


  def serialize_note_content(note, xml, fragments)
    return if note["publish"] === false && !@include_unpublished
    content = note["content"]

    atts = {
      audience: note['publish'] === false ? 'internal' : nil,
      id: prefix_id(note['persistent_id'].gsub(/\s/, '_'))
    }
    atts.delete_if { |k, v| v.nil? || v.empty? || v == "null" }

    head_text = note['label'] ? note['label'] : I18n.t("enumerations._note_types.#{note['type']}", :default => note['type'])

    content, head_text = extract_head_text(content, head_text)

    xml.send(note['type'], atts) {
      xml.head { sanitize_mixed_content(head_text, xml, fragments) } unless ASpaceExport::Utils.headless_note?(note['type'], content )
      sanitize_mixed_content(content, xml, fragments, ASpaceExport::Utils.include_p?(note['type']) ) if content
      if note['subnotes']
        serialize_subnotes(note['subnotes'], xml, fragments, ASpaceExport::Utils.include_p?(note['type']))
      end
    }
  end


  def serialize_nondid_notes(data, xml, fragments)
    data.notes.each do |note|
      next if note["publish"] === false && !@include_unpublished
      next if note['internal']
      next if note['type'].nil?
      next unless data.archdesc_note_types.include?(note['type'])
      serialize_note_content(note, xml, fragments)
    end
  end


  def serialize_origination(data, xml, fragments)
    unless data.creators_and_sources.nil?
      data.creators_and_sources.each do |link|
        agent = link['_resolved']
        link['role'] == 'creator' ? role = link['role'].capitalize : role = link['role']
        relator = link['relator']
        sort_name = agent['display_name']['sort_name']
        rules = agent['display_name']['rules']
        source = agent['display_name']['source']
        authfilenumber = agent['display_name']['authority_id']
        node_name = case agent['agent_type']
                    when 'agent_person'; 'persname'
                    when 'agent_family'; 'famname'
                    when 'agent_corporate_entity'; 'corpname'
                    when 'agent_software'; 'name'
                    end
        xml.origination(:label => role) {

          atts = {:relator => relator, :source => source, :rules => rules, :identifier => authfilenumber}

          atts.reject! {|k, v| v.nil?}

          xml.send(node_name, atts) {
            xml.part() {
              sanitize_mixed_content(sort_name, xml, fragments )
              EAD3Serializer.run_serialize_step(agent, xml, fragments, node_name.to_sym)
            }
          }
        }
      end
    end
  end


  # this extracts <head> content and returns it. optionally, you can provide a
  # backup text node that will be returned if there is no <head> nodes in the
  # content
  def extract_head_text(content, backup = "")
    content ||= ""
    match = content.strip.match(/<head( [^<>]+)?>(.+?)<\/head>/)
    if match.nil? # content has no head so we return it as it
      return [content, backup ]
    else
      [ content.gsub(match.to_a.first, ''), match.to_a.last]
    end
  end


  def serialize_child(data, xml, fragments, c_depth = 1)
    begin
      return if data["publish"] === false && !@include_unpublished
      return if data["suppressed"] === true

      tag_name = @use_numbered_c_tags ? :"c#{c_depth.to_s.rjust(2, '0')}" : :c

      atts = {:level => data.level, :otherlevel => data.other_level, :id => prefix_id(data.ref_id)}

      if data.publish === false
        atts[:audience] = 'internal'
      end

      atts.reject! {|k, v| v.nil?}
      xml.send(tag_name, atts) {

        xml.did {
          if (val = data.title)
            xml.unittitle { sanitize_mixed_content( val, xml, fragments) }
          end

          handle_arks(data, xml)

          serialize_aspace_uri(data, xml)

          if !data.component_id.nil? && !data.component_id.empty?
            xml.unitid data.component_id
          end

          if @include_unpublished
            data.external_ids.each do |exid|
              xml.unitid ({ "audience" => "internal", "type" => exid['source'], "identifier" => exid['external_id']}) { xml.text exid['external_id']}
            end
          end

          serialize_origination(data, xml, fragments)
          serialize_extents(data, xml, fragments)
          serialize_dates(data, xml, fragments)
          serialize_did_notes(data, xml, fragments)

          unless (languages = data.lang_materials).empty?
            serialize_languages(languages, xml, fragments)
          end

          EAD3Serializer.run_serialize_step(data, xml, fragments, :did)

          data.instances_with_sub_containers.each do |instance|
            serialize_container(instance, xml, @fragments)
          end

          if @include_daos
            data.instances_with_digital_objects.each do |instance|
              digital_object = instance['digital_object']['_resolved']
              serialize_digital_object(digital_object, xml, fragments)
            end
          end
        }

        serialize_nondid_notes(data, xml, fragments)
        serialize_bibliographies(data, xml, fragments)
        serialize_indexes(data, xml, fragments)
        serialize_controlaccess(data, xml, fragments)
        EAD3Serializer.run_serialize_step(data, xml, fragments, :archdesc)

        data.children_indexes.each do |i|
          xml.text(
                   @stream_handler.buffer {|xml, new_fragments|
                     serialize_child(data.get_child(i), xml, new_fragments, c_depth + 1)
                   }
                   )
        end
      }
    rescue => e
      xml.text "ASPACE EXPORT ERROR : YOU HAVE A PROBLEM WITH YOUR EXPORT OF ARCHIVAL OBJECTS. THE FOLLOWING INFORMATION MAY HELP:\n

                MESSAGE: #{e.message.inspect}  \n
                TRACE: #{e.backtrace.inspect} \n "
    end
  end

  def serialize_aspace_uri(data, xml)
    xml.unitid ({ 'localtype' => 'aspace_uri' }) { xml.text data.uri }
  end

  def handle_arks(data, xml)
    return unless AppConfig[:arks_enabled]
    return unless data.ark_name

    if current_ark = data.ark_name.fetch('current', nil)
      xml.unitid ({
                    "localtype" => "ark",
                  }) {
        xml.ref ({"href" => current_ark,
                  "actuate" => "onload",
                  "show" => "new",
                 }) { xml.text 'Archival Resource Key' }
      }
    end

    data.ark_name.fetch('previous', []).each do |old_ark_url|
      xml.unitid ({
        "localtype" => "ark-superseded",
      }) {
        xml.ref ({"href" => old_ark_url,
                  "actuate" => "onload",
                  "show" => "new",
        }) { xml.text 'Previous Archival Resource Key' }
      }
    end
  end


  def serialize_controlaccess(data, xml, fragments)
    if (data.controlaccess_subjects.length + data.controlaccess_linked_agents(@include_unpublished).reject {|x| x.empty?}.length) > 0
      xml.controlaccess {

        data.controlaccess_subjects.zip(data.subjects).each do |node_data, subject|

          if node_data[:atts]['authfilenumber']
            node_data[:atts]['identifier'] = node_data[:atts]['authfilenumber'].clone
            node_data[:atts].delete('authfilenumber')
          end

          xml.send(node_data[:node_name], node_data[:atts]) {
            xml.part() {
              sanitize_mixed_content( node_data[:content], xml, fragments, ASpaceExport::Utils.include_p?(node_data[:node_name]) )
              EAD3Serializer.run_serialize_step(subject['_resolved'], xml, fragments, node_data[:node_name].to_sym)
            }
          }
        end

        data.controlaccess_linked_agents(@include_unpublished).zip(data.linked_agents).each do |node_data, agent|

          next if node_data.empty?

          if node_data[:atts][:role]
            node_data[:atts][:relator] = node_data[:atts][:role]
            node_data[:atts].delete(:role)
          end

          if node_data[:atts][:authfilenumber]
            node_data[:atts][:identifier] = node_data[:atts][:authfilenumber].clone
            node_data[:atts].delete(:authfilenumber)
          end

          xml.send(node_data[:node_name], node_data[:atts]) {
            xml.part() {
              sanitize_mixed_content( node_data[:content], xml, fragments, ASpaceExport::Utils.include_p?(node_data[:node_name]) )
              EAD3Serializer.run_serialize_step(agent['_resolved'], xml, fragments, node_data[:node_name].to_sym)
            }
          }
        end

      } #</controlaccess>
    end
  end


  def serialize_subnotes(subnotes, xml, fragments, include_p = true)
    subnotes.each do |sn|
      next if sn["publish"] === false && !@include_unpublished

      audatt = sn["publish"] === false ? {:audience => 'internal'} : {}

      title = sn['title']

      case sn['jsonmodel_type']
      when 'note_text'
        sanitize_mixed_content(sn['content'], xml, fragments, include_p )
      when 'note_chronology'
        xml.chronlist(audatt) {
          xml.head { sanitize_mixed_content(title, xml, fragments) } if title

          sn['items'].each do |item|
            xml.chronitem {
              if (val = item['event_date'])
                xml.datesingle { sanitize_mixed_content( val, xml, fragments) }
              end
              if item['events'] && !item['events'].empty?
                xml.chronitemset {
                  item['events'].each do |event|
                    xml.event { sanitize_mixed_content(event, xml, fragments) }
                  end
                }
              end
            }
          end
        }
      when 'note_orderedlist'
        atts = {:listtype => 'ordered', :numeration => sn['enumeration']}.reject {|k, v| v.nil? || v.empty? || v == "null" }.merge(audatt)

        atts[:numeration] = list_numeration_value(atts[:numeration])

        xml.list(atts) {
          xml.head { sanitize_mixed_content(title, xml, fragments) } if title

          sn['items'].each do |item|
            xml.item { sanitize_mixed_content(item, xml, fragments)}
          end
        }
      when 'note_definedlist'
        xml.list({:listtype => 'deflist'}.merge(audatt)) {
          xml.head { sanitize_mixed_content(title, xml, fragments) } if title

          sn['items'].each do |item|
            xml.defitem {
              xml.label { sanitize_mixed_content(item['label'], xml, fragments) } if item['label']
              xml.item { sanitize_mixed_content(item['value'], xml, fragments )} if item['value']
            }
          end
        }
      end
    end
  end


  def serialize_container(inst, xml, fragments)
    atts = {}

    sub = inst['sub_container']
    top = sub['top_container']['_resolved']

    # top container
    atts[:id] = prefix_id(SecureRandom.hex)
    last_id = atts[:id]
    atts[:localtype] = top['type'] unless (top['type'].nil? || top['type'].empty?)
    text = top['indicator']
    atts[:label] = I18n.t("enumerations.instance_instance_type.#{inst['instance_type']}",
                          :default => inst['instance_type'])
    if top['barcode']
      atts[:containerid] = "#{top['barcode']}"
    end

    if (cp = top['container_profile'])
      atts[:altrender] = cp['_resolved']['url'] || cp['_resolved']['name']
    end

    xml.container(atts) {
      sanitize_mixed_content(text, xml, fragments)
    }

    # sub container
    (2..3).each do |n|
      atts = {}

      next unless sub["type_#{n}"]

      atts[:id] = prefix_id(SecureRandom.hex)
      atts[:parent] = last_id
      last_id = atts[:id]

      atts[:localtype] = sub["type_#{n}"]
      text = sub["indicator_#{n}"]

      xml.container(atts) {
        sanitize_mixed_content(text, xml, fragments)
      }
    end
  end


  def serialize_digital_object(digital_object, xml, fragments)
    if (digital_object['file_versions'].count > 1) && digital_object['_is_in_representative_instance']
      xml.daoset(linkrole: "representative") {
        serialize_digital_object_dao(digital_object, xml, fragments)
      }
    else
      serialize_digital_object_dao(digital_object, xml, fragments)
    end
  end


  def serialize_digital_object_dao(digital_object, xml, fragments)
    return if digital_object["publish"] === false && !@include_unpublished
    return if digital_object["suppressed"] === true

    file_versions = digital_object['file_versions']
    title = digital_object['title']
    date = digital_object['dates'][0] || {}

    atts = {}

    content = ""
    content << title if title
    content << ": " if date['expression'] || date['begin']
    if date['expression']
      content << date['expression']
    elsif date['begin']
      content << date['begin']
      if date['end'] != date['begin']
        content << "-#{date['end']}"
      end
    end

    atts['linktitle'] = digital_object['title'] if digital_object['title']

    if digital_object['digital_object_type']
      atts['daotype'] = 'otherdaotype'
      atts['otherdaotype'] = digital_object['digital_object_type']
    else
      atts['daotype'] = 'unknown'
    end

    if file_versions.empty?
      atts['href'] = digital_object['digital_object_id']
      atts['actuate'] = 'onrequest'
      atts['show'] = 'new'
      atts['audience'] = 'internal' unless is_digital_object_published?(digital_object)
      xml.dao(atts) {
        xml.descriptivenote { sanitize_mixed_content(content, xml, fragments, true) } if content
      }
    else
      file_versions.each do |file_version|
        atts['href'] = file_version['file_uri'] || digital_object['digital_object_id']
        atts['actuate'] = (file_version['xlink_actuate_attribute'].respond_to?(:downcase) && file_version['xlink_actuate_attribute'].downcase) || 'onrequest'
        atts['show'] = (file_version['xlink_show_attribute'].respond_to?(:downcase) && file_version['xlink_show_attribute'].downcase) || 'new'
        atts['localtype'] = file_version['use_statement'] if file_version['use_statement']
        atts['audience'] = 'internal' unless is_digital_object_published?(digital_object, file_version)
        if digital_object['_is_in_representative_instance']
          atts['linkrole'] = [file_version['use_statement'], 'representative'].compact.join(" ")
        end
        xml.dao(atts) {
          xml.descriptivenote { sanitize_mixed_content(content, xml, fragments, true) } if content
        }
      end
    end
    EAD3Serializer.run_serialize_step(digital_object, xml, fragments, :dao)
  end


  def serialize_bibliographies(data, xml, fragments)
    data.bibliographies.each do |note|
      next if note["publish"] === false && !@include_unpublished
      content = ASpaceExport::Utils.extract_note_text(note, @include_unpublished)
      note_type = note["type"] ? note["type"] : "bibliography"
      head_text = note['label'] ? note['label'] : I18n.t("enumerations._note_types.#{note_type}", :default => note_type )

      atts = {
        audience: note["publish"] === false ? 'internal' : nil,
        id: prefix_id(note['persistent_id'].gsub(/\s/, '_'))
      }
      atts.delete_if { |k, v| v.nil? || v.empty? || v == "null" }

      xml.bibliography(atts) {
        xml.head { sanitize_mixed_content(head_text, xml, fragments) }
        sanitize_mixed_content( content, xml, fragments, true)
        note['items'].each do |item|
          xml.bibref { sanitize_mixed_content( item, xml, fragments) } unless item.empty?
        end
      }
    end
  end


  def serialize_indexes(data, xml, fragments)
    data.indexes.each do |note|
      next if note["publish"] === false && !@include_unpublished
      content = ASpaceExport::Utils.extract_note_text(note, @include_unpublished)
      head_text = nil

      if note['label']
        head_text = note['label']
      elsif note['type']
        head_text = I18n.t("enumerations._note_types.#{note['type']}", :default => note['type'])
      end

      atts = {
        audience: note["publish"] === false ? 'internal' : nil,
        id: prefix_id(note['persistent_id'].gsub(/\s/, '_'))
      }
      atts.delete_if { |k, v| v.nil? || v.empty? || v == "null" }

      content, head_text = extract_head_text(content, head_text)

      xml.index(atts) {
        xml.head { sanitize_mixed_content(head_text, xml, fragments ) } unless head_text.nil?

        sanitize_mixed_content(content, xml, fragments, true)

        note['items'].each do |item|
          next unless (node_name = data.index_item_type_map[item['type']])
          xml.indexentry {

            atts = item['reference'] ? {:target => prefix_id( item['reference']) } : {}

            if (val = item['value'])
              xml.send(node_name) {
                xml.part() {
                  sanitize_mixed_content(val, xml, fragments )
                }
              }
            end

            if (val = item['reference_text'])
              xml.ref(atts) {
                sanitize_mixed_content( val, xml, fragments)
              }
            end
          }
        end
      }
    end
  end

end