datacite/lupo

View on GitHub
app/lib/params_sanitizer.rb

Summary

Maintainability
C
1 day
Test Coverage
# frozen_string_literal: true

class ParamsSanitizer
  include Crosscitable
  include Helpable

  DEFAULTS_MAP =
    {
      data: {
        titles: [],
        descriptions: [],
        types: {},
        container: {},
        dates: [],
        subjects: [],
        rightsList: [],
        creators: [],
        contributors: [],
        sizes: [],
        formats: [],
        contentUrl: [],
        identifiers: [],
        relatedIdentifiers: [],
        relatedItems: [],
        fundingReferences: [],
        geoLocations: [],
      },
    }.freeze

  ATTRIBUTES_MAP =
  [
    :doi,
    :confirmDoi,
    :url,
    :titles,
    { titles: %i[title titleType lang] },
    :publisher,
      {
        publisher: %i[
          name
          publisherIdentifier
          publisherIdentifierScheme
          schemeUri
          lang
        ],
      },
    :publicationYear,
    :created,
    :prefix,
    :suffix,
    :types,
    {
      types: %i[
        resourceTypeGeneral
        resourceType
        schemaOrg
        bibtex
        citeproc
        ris
      ],
    },
    :dates,
    { dates: %i[date dateType dateInformation] },
    :subjects,
    { subjects: %i[subject subjectScheme schemeUri valueUri lang classificationCode] },
    :landingPage,
    {
      landingPage: [
        :checked,
        :url,
        :status,
        :contentType,
        :error,
        :redirectCount,
        { redirectUrls: [] },
        :downloadLatency,
        :hasSchemaOrg,
        :schemaOrgId,
        { schemaOrgId: [] },
        :dcIdentifier,
        :citationDoi,
        :bodyHasPid,
      ],
    },
    :contentUrl,
    { contentUrl: [] },
    :sizes,
    { sizes: [] },
    :formats,
    { formats: [] },
    :language,
    :descriptions,
    { descriptions: %i[description descriptionType lang] },
    :rightsList,
    {
      rightsList: %i[
        rights
        rightsUri
        rightsIdentifier
        rightsIdentifierScheme
        schemeUri
        lang
      ],
    },
    :xml,
    :regenerate,
    :source,
    :version,
    :metadataVersion,
    :schemaVersion,
    :state,
    :isActive,
    :reason,
    :registered,
    :updated,
    :mode,
    :event,
    :regenerate,
    :should_validate,
    :client,
    :creators,
    {
      creators: [
        :nameType,
        {
          nameIdentifiers: %i[nameIdentifier nameIdentifierScheme schemeUri],
        },
        :name,
        :givenName,
        :familyName,
        {
          affiliation: %i[
            name
            affiliationIdentifier
            affiliationIdentifierScheme
            schemeUri
          ],
        },
        :lang,
      ],
    },
    :contributors,
    {
      contributors: [
        :nameType,
        {
          nameIdentifiers: %i[nameIdentifier nameIdentifierScheme schemeUri],
        },
        :name,
        :givenName,
        :familyName,
        {
          affiliation: %i[
            name
            affiliationIdentifier
            affiliationIdentifierScheme
            schemeUri
          ],
        },
        :contributorType,
        :lang,
      ],
    },
    :identifiers,
    { identifiers: %i[identifier identifierType] },
    :alternateIdentifiers,
    { alternateIdentifiers: %i[alternateIdentifier alternateIdentifierType] },
    :relatedIdentifiers,
    {
      relatedIdentifiers: %i[
        relatedIdentifier
        relatedIdentifierType
        relationType
        relatedMetadataScheme
        schemeUri
        schemeType
        resourceTypeGeneral
        relatedMetadataScheme
        schemeUri
        schemeType
      ],
    },
    :fundingReferences,
    {
      fundingReferences: %i[
        funderName
        funderIdentifier
        funderIdentifierType
        awardNumber
        awardUri
        awardTitle
        schemeUri
      ],
    },
    :geoLocations,
    {
      geoLocations: [
        { geoLocationPoint: %i[pointLongitude pointLatitude] },
        {
          geoLocationBox: %i[
            westBoundLongitude
            eastBoundLongitude
            southBoundLatitude
            northBoundLatitude
          ],
        },
        :geoLocationPlace,
      ],
    },
    :container,
    {
      container: %i[
        type
        identifier
        identifierType
        title
        volume
        issue
        firstPage
        lastPage
      ],
    },
    :relatedItems,
    {
      relatedItems: [
        :relationType,
        :relatedItemType,
        {
          relatedItemIdentifier: %i[relatedItemIdentifier relatedItemIdentifierType relatedMetadataScheme schemeURI schemeType],
        },
        {
          creators: %i[nameType name givenName familyName],
        },
        {
          titles: %i[title titleType],
        },
        :publicationYear,
        :volume,
        :issue,
        :number,
        :numberType,
        :firstPage,
        :lastPage,
        :publisher,
        :edition,
        {
          contributors: %i[contributorType name givenName familyName nameType],
        },
      ],
    },
    :published,
    :downloadsOverTime,
    { downloadsOverTime: %i[yearMonth total] },
    :viewsOverTime,
    { viewsOverTime: %i[yearMonth total] },
    :citationsOverTime,
    { citationsOverTime: %i[year total] },
    :citationCount,
    :downloadCount,
    :partCount,
    :partOfCount,
    :referenceCount,
    :versionCount,
    :versionOfCount,
    :viewCount,
  ].freeze

  RELATIONSHIPS_MAP = [{ client: [data: %i[type id]] }].freeze

  def initialize(params = {})
    @params = params
  end

  def cleanse
    xml = add_xml

    meta = generate_meta(xml)
    add_schema_version(meta)
    xml = meta["string"]
    # if metadata for DOIs from other registration agencies are not found
    fail ActiveRecord::RecordNotFound if meta["state"] == "not_found"

    add_random_id

    # replace DOI, but otherwise don't touch the XML
    # use Array.wrap(read_attrs.first) as read_attrs may also be [[]]
    if meta["from"] == "datacite" && Array.wrap(read_attrs.first).blank?
      xml = replace_doi(xml, doi: @params[:doi] || meta["doi"])
    elsif xml.present? || Array.wrap(read_attrs.first).present?
      regenerate = true
    end

    @params[:xml] = xml if xml.present?

    add_xml_attributes(meta)
    add_metadata_version(meta)
    add_landingpage()

    @params.merge(regenerate: @params[:regenerate] || regenerate).except(
      # ignore camelCase keys, and read-only keys
      :confirmDoi,
      :prefix,
      :suffix,
      :publicationYear,
      :alternateIdentifiers,
      :rightsList,
      :relatedIdentifiers,
      :relatedItems,
      :fundingReferences,
      :geoLocations,
      :metadataVersion,
      :schemaVersion,
      :state,
      :mode,
      :isActive,
      :landingPage,
      :created,
      :registered,
      :updated,
      :published,
      :lastLandingPage,
      :version,
      :lastLandingPageStatus,
      :lastLandingPageStatusCheck,
      :lastLandingPageStatusResult,
      :lastLandingPageContentType,
      :contentUrl,
      :viewsOverTime,
      :downloadsOverTime,
      :citationsOverTime,
      :citationCount,
      :downloadCount,
      :partCount,
      :partOfCount,
      :referenceCount,
      :versionCount,
      :versionOfCount,
      :viewCount,
    )
  end

  def  read_attrs
    [
      @params[:creators],
      @params[:contributors],
      @params[:titles],
      @params[:publisher],
      @params[:publicationYear],
      @params[:types],
      @params[:descriptions],
      @params[:container],
      @params[:sizes],
      @params[:formats],
      @params[:version],
      @params[:language],
      @params[:dates],
      @params[:identifiers],
      @params[:relatedIdentifiers],
      @params[:relatedItems],
      @params[:fundingReferences],
      @params[:geoLocations],
      @params[:rightsList],
      @params[:subjects],
      @params[:contentUrl],
      @params[:schemaVersion],
    ].compact
  end

  def get_params
    @params
  end

  def add_xml
    xml =
    @params[:xml].present? ? Base64.decode64(@params[:xml]).force_encoding("UTF-8") : nil

    if xml.present?
      # remove optional utf-8 bom
      xml.gsub!("\xEF\xBB\xBF", "")

      # remove leading and trailing whitespace
      xml.strip
    end
  end

  def add_xml_attributes(meta)
    read_attrs_keys = %i[
      url
      creators
      contributors
      titles
      publisher
      publicationYear
      types
      descriptions
      container
      sizes
      formats
      language
      dates
      identifiers
      relatedIdentifiers
      relatedItems
      fundingReferences
      geoLocations
      rightsList
      agency
      subjects
      contentUrl
      schemaVersion
    ]

    # merge attributes from xml into regular attributes
    # make sure we don't accidentally set any attributes to nil
    read_attrs_keys.each do |attr|
      if @params.has_key?(attr) || meta.has_key?(attr.to_s.underscore)
        @params.merge!(
          attr.to_s.underscore =>
            @params[attr] || meta[attr.to_s.underscore] || @params[attr],
        )
      end
    end
  end

  def add_landingpage
    # only update landing_page info if something is received via API to not overwrite existing data
    @params[:landing_page] = @params[:landingPage] if @params[:landingPage].present?
  end

  def add_metadata_version(hash)
    # handle version metadata
    if @params.has_key?(:version) || hash["version_info"].present?
      @params[:version_info] = @params[:version] || hash["version_info"]
    end
  end

  def add_random_id
    # generate random DOI if no DOI is provided
    # make random DOI predictable in test
    if @params[:doi].blank? && @params[:prefix].present? && Rails.env.test?
      @params[:doi] = generate_random_dois(@params[:prefix], number: 123_456).first
    elsif @params[:doi].blank? && @params[:prefix].present?
      @params[:doi] = generate_random_dois(@params[:prefix]).first
    end
  end

  def generate_meta(xml)
    xml.present? ? parse_xml(xml, doi: @params[:doi]) : {}
  end

  def add_schema_version(hash)
    @params[:schemaVersion] =
      if METADATA_FORMATS.include?(hash["from"])
        LAST_SCHEMA_VERSION
      else
        @params[:schemaVersion]
      end
  end

  def self.sanitize_nameIdentifiers(array)
    Array.wrap(array)&.each do |c|
      if c[:nameIdentifiers]&.respond_to?(:keys)
        fail(
          ActionController::UnpermittedParameters,
          ["nameIdentifiers must be an Array"],
        )
      end
    end
  end
end