datacite/lupo

View on GitHub
app/validators/xml_schema_validator.rb

Summary

Maintainability
A
3 hrs
Test Coverage
F
14%
# frozen_string_literal: true

class XmlSchemaValidator < ActiveModel::EachValidator
  # mapping of DataCite schema properties to database fields
  def schema_attributes(el)
    schema = {
      "date" => "dates",
      "publicationYear" => "publication_year",
      "alternateIdentifiers" => "identifiers",
      "relatedIdentifiers" => "related_identifiers",
      "relatedItems" => "related_items",
      "geoLocations" => "geo_locations",
      "rightsList" => "rights_list",
      "fundingReferences" => "funding_references",
      "version" => "version_info",
      "resource" => "xml",
    }

    schema[el] || el
  end

  def get_valid_kernel(sv)
    kernels = {
      "http://datacite.org/schema/kernel-2.1" => "kernel-2.1",
      "http://datacite.org/schema/kernel-2.2" => "kernel-2.2",
      "http://datacite.org/schema/kernel-3.0" => "kernel-3",
      "http://datacite.org/schema/kernel-3.1" => "kernel-3",
      "http://datacite.org/schema/kernel-3" => "kernel-3",
      "http://datacite.org/schema/kernel-4.0" => "kernel-4",
      "http://datacite.org/schema/kernel-4.1" => "kernel-4",
      "http://datacite.org/schema/kernel-4.2" => "kernel-4",
      "http://datacite.org/schema/kernel-4.3" => "kernel-4",
      "http://datacite.org/schema/kernel-4" => "kernel-4",
    }

    kernels[sv]
  end

  def validate_each(record, _attribute, value)
    kernel = get_valid_kernel(record.schema_version)
    return false if kernel.blank?

    invalid_schemas = %w[http://datacite.org/schema/kernel-2.1 http://datacite.org/schema/kernel-2.2]

    if record.new_record? && invalid_schemas.include?(record.schema_version)
      record.errors.add(:xml, "DOI #{record.uid}: Schema #{record.schema_version} is no longer supported")
      return false
    end

    filepath =
      Bundler.rubygems.find_name("bolognese").first.full_gem_path +
      "/resources/#{kernel}/metadata.xsd"
    schema = Nokogiri::XML.Schema(open(filepath))

    schema.validate(Nokogiri.XML(value, nil, "UTF-8")).reduce(
      {},
    ) do |_sum, error|
      location, _level, source, text = error.message.split(": ", 4)
      line, column = location.split(":", 2)
      title = "DOI " + record.uid
      if line.present?
        title += ": " + text.to_s.strip + " at line #{line}, column #{column}"
      end
      source = source.split("}").last[0..-2] if line.present?
      source = schema_attributes(source) if source.present?
      record.errors.add(source.to_sym, title)
    end
  rescue Nokogiri::XML::SyntaxError => e
    line, column, _level, text = e.message.split(":", 4)
    message = text.strip + " at line #{line}, column #{column}"
    record.errors.add(:xml, message)
  end
end