sanger/sequencescape

View on GitHub
app/models/accession_service.rb

Summary

Maintainability
C
1 day
Test Coverage
C
73%
# frozen_string_literal: true
# The EBI operates two key AccessionServices
# {EnaAccessionService ENA}: Mostly non-human data, provides open access to uploaded data
# {EgaAccessionService EGA}: Mostly for human data, provides managed access to uploaded data
# We also submit information to ArrayExpress, but this happens indirectly via the accession services above.
# @see https://www.ebi.ac.uk/ega/submission#which_archive
#
# Accessioning involves submitting metadata to an external database as XML files.
# This data receives a unique 'accession number' which we store in the database.
# These accession numbers can then be used in publications to allow external researchers
# access to the metadata.
#
# Accessionables
# --------------
# {Accessionable::Sample}     Represents information about the sample, maps to a Sequencescape {Sample}.
# {Accessionable::Study}      Represents information about the study. Indicates WHY the samples have been sequenced.
#                             Maps to a Sequencescape {Study}.
# {Accessionable::Submission} Wrapper object required by the submission service. We use one per accessionable.
# The following are associated with EGA studies.
# {Accessionable::Dac}    Data access committee. Information about who to contact to gain access to the data. (EGA)
# {Accessionable::Policy} Details about how the data may be used. (EGA)
#
# Accessioning of samples has been partially migrated to {Accession 'a separate accession library'}
class AccessionService # rubocop:todo Metrics/ClassLength
  # We overide this in testing to do a bit of evesdropping
  class_attribute :rest_client_class
  self.rest_client_class = RestClient::Resource

  AccessionServiceError = Class.new(StandardError)
  NumberNotRequired = Class.new(AccessionServiceError)
  NumberNotGenerated = Class.new(AccessionServiceError)

  CenterName = 'SC' # TODO: [xxx] use confing file
  Protect = 'protect'
  Hold = 'hold'

  def provider; end

  class AccessionedFile < File
    # This class provides an original_filename method
    # which RestClient can use to define the remote filename
    attr_accessor :original_filename
  end

  # When samples belong to multiple studies, the submission service with the highest priority will be selected
  class_attribute :priority, instance_writer: false

  # When true, allows the accessioning of samples prior to accessioning of the study
  class_attribute :no_study_accession_needed, instance_writer: false

  # Indicates that the class reflects a real accessioning service. Set to false for dummy services. This allow
  # scripts like the accessioning cron to break out prematurely for dummy services
  class_attribute :operational, instance_writer: false

  self.priority = 0
  self.no_study_accession_needed = false
  self.operational = false

  # rubocop:todo Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/BlockLength, Metrics/AbcSize
  def submit(user, *accessionables) # rubocop:todo Metrics/CyclomaticComplexity
    ActiveRecord::Base.transaction do
      submission = Accessionable::Submission.new(self, user, *accessionables)

      errors = submission.all_accessionables.map(&:errors).flatten

      raise AccessionServiceError, errors.join("\n") unless errors.empty?

      files = [] # maybe not necessary, but just to be sure that the tempfile still exists when they are sent
      begin
        xml_result =
          post_files(
            submission.all_accessionables.map do |acc|
              file = Tempfile.open("#{acc.schema_type}_file")
              files << file
              file.puts(acc.xml)
              file.open # reopen for read

              Rails.logger.debug { file.each_line.to_a.join("\n") }

              { name: acc.schema_type.upcase, local_name: file.path, remote_name: acc.file_name }
            end
          )
        Rails.logger.debug { xml_result }
        if xml_result.match?(/(Server error|Auth required|Login failed)/)
          raise AccessionServiceError, "EBI Server Error. Couldnt get accession number: #{xml_result}"
        end

        xmldoc = Document.new(xml_result)
        success = xmldoc.root.attributes['success']
        accession_numbers = []

        # for some reasons, ebi doesn't give us back a accession number for the submission if it's a MODIFY action
        # therefore, we should be ready to get one or not
        number_generated = true
        case success
        when 'true'
          # extract and update accession numbers
          accession_number =
            submission.all_accessionables.each do |acc|
              accession_number = acc.extract_accession_number(xmldoc)
              if accession_number
                acc.update_accession_number!(user, accession_number)
                accession_numbers << accession_number
              else
                # error only, if one of the expected accessionable didn't get a AN
                # We don't care about the submission
                number_generated = false if accessionables.include?(acc)
              end
              ae_an = acc.extract_array_express_accession_number(xmldoc)
              acc.update_array_express_accession_number!(ae_an) if ae_an
            end

          raise NumberNotGenerated, 'Service gave no numbers back' unless number_generated
        when 'false'
          errors = xmldoc.root.elements.to_a('//ERROR').map(&:text)
          raise AccessionServiceError,
                "Could not get accession number. Error in submitted data: #{$!} #{errors.map { |e| "\n  - #{e}" }}"
        else
          raise AccessionServiceError, "Could not get accession number. Error in submitted data: #{$!}"
        end
      ensure
        files.each(&:close) # not really necessary but recommended
      end

      return accessionables.map(&:accession_number) # rubocop:todo Rails/TransactionExitStatement
    end
  end

  # rubocop:enable Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/BlockLength, Metrics/AbcSize

  def submit_sample_for_user(sample, user)
    ebi_accession_number = sample.sample_metadata.sample_ebi_accession_number

    submit(user, Accessionable::Sample.new(sample))
  end

  def submit_study_for_user(study, user)
    raise NumberNotRequired, 'An accession number is not required for this study' unless study.accession_required?

    # TODO: check error
    # raise AccessionServiceError, "Cannot generate accession number: #{ sampledata[:error] }" if sampledata[:error]

    ebi_accession_number = study.study_metadata.study_ebi_accession_number

    # raise NumberNotGenerated, 'No need to' if not ebi_accession_number.blank? and not /ER/.match(ebi_accession_number)

    submit(user, Accessionable::Study.new(study))
  end

  def submit_dac_for_user(_study, _user)
    raise NumberNotRequired, 'No need to'
  end

  def accession_study_xml(study)
    Accessionable::Study.new(study).xml
  end

  def accession_sample_xml(sample)
    Accessionable::Sample.new(sample).xml
  end

  def accession_policy_xml(study)
    policy = Accessionable::Policy.new(study)
    policy.xml
  end

  def accession_dac_xml(study)
    Accessionable::Dac.new(study).xml
  end

  def sample_visibility(_sample)
    Protect
  end

  def study_visibility(_study)
    Protect
  end

  def policy_visibility(_study)
    Protect
  end

  def dac_visibility(_study)
    Protect
  end

  def private?
    false
  end

  private

  # rubocop:todo Metrics/MethodLength
  def accession_submission_xml(submission, accession_number) # rubocop:todo Metrics/AbcSize
    xml = Builder::XmlMarkup.new
    xml.instruct!
    xml.SUBMISSION(
      'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
      :center_name => submission[:center_name],
      :broker_name => submission[:broker],
      :alias => submission[:submission_id],
      :submission_date => submission[:submission_date]
    ) do
      xml.CONTACTS do
        xml.CONTACT(
          inform_on_error: submission[:contact_inform_on_error],
          inform_on_status: submission[:contact_inform_on_status],
          name: submission[:name]
        )
      end
      xml.ACTIONS do
        xml.ACTION do
          if accession_number.blank?
            xml.ADD(source: submission[:source], schema: submission[:schema])
          else
            xml.MODIFY(source: submission[:source], target: '')
          end
        end
        xml.ACTION { submission[:hold] == AccessionService::Protect ? xml.PROTECT : xml.HOLD }
      end
    end
    xml.target!
  end

  # rubocop:enable Metrics/MethodLength

  require 'rexml/document'

  # require 'curb'
  include REXML

  def accession_options
    raise NotImplemented, 'abstract method'
  end

  def rest_client_resource
    rest_client_class.new(configatron.accession.url!, accession_options)
  end

  # rubocop:todo Metrics/MethodLength, Metrics/AbcSize
  def post_files(file_params) # rubocop:todo Metrics/CyclomaticComplexity
    rc = rest_client_resource

    if configatron.disable_web_proxy == true
      RestClient.proxy = nil
    elsif configatron.fetch(:proxy).present?
      RestClient.proxy = configatron.proxy

      # UA required to get through Sanger proxy
      # Although currently this UA is actually being set elsewhere in the
      # code as RestClient doesn't pass this header to the proxy.
      rc.options[:headers] = { user_agent: "Sequencescape Accession Client (#{Rails.env})" }
    elsif ENV['http_proxy'].present?
      RestClient.proxy = ENV['http_proxy']
    end

    payload =
      file_params.each_with_object({}) do |param, hash|
        hash[param[:name]] =
          AccessionedFile.open(param[:local_name]).tap { |f| f.original_filename = param[:remote_name] }
      end

    response = rc.post(payload)
    case response.code
    when (200...300)
      # success
      response.body.to_s
    when (400...600)
      Rails.logger.warn($!)
      $! = nil
      raise AccessionServiceError
    else
      ''
    end
  rescue StandardError => e
    raise AccessionServiceError, "Could not get accession number. EBI may be down or invalid data submitted: #{$!}"
  end
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
end