SpeciesFileGroup/taxonworks

View on GitHub
app/models/otu.rb

Summary

Maintainability
A
0 mins
Test Coverage
# An Otu (loosely, operational taxonomic unit) can be thought of as a unit of study.  In most cases an otu is a taxon.
#
# An Otu is defined by its underlying data and may be labeled with a name (TaxonName). Otus are used to represent rows
# in matrices, taxon pages, individuals or populations, or arbitrary clusters of organisms (e.g. 'unsorted specimens in this container').
# Otus are a primary unit of work in TaxonWorks.
#
# OTU is labeled with a name, either arbitrarily given or specifically linked to a taxon_name_id.
#
# @!attribute name
#   @return [String]
#   A label for the OTU.
#
# @!attribute project_id
#   @return [Integer]
#   the project ID
#
# @!attribute taxon_name_id
#   @return [Integer]
#   The id of the nomenclatural name for this OTU.  The presence of a nomenclatural name carries no biological meaning, it is
#   simply a means to organize concepts within a nomenclatural system.
#
# TODO Semantics vs. taxon_name_id
#
class Otu < ApplicationRecord
  include Housekeeping
  include SoftValidation
  # include Shared::AlternateValues # No alternate values on Name!! Consequences - search cumbersome, names not unified and controllable ... others?
  include Shared::Citations
  include Shared::DataAttributes
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::Depictions
  include Shared::Loanable
  include Shared::Confidences
  include Shared::Observations
  include Shared::BiologicalAssociations
  include Shared::HasPapertrail
  include Shared::OriginRelationship

  include Shared::AutoUuid
  include Shared::Taxonomy
  include Otu::DwcExtensions

  include Shared::MatrixHooks::Member
  include Otu::MatrixHooks
  include Otu::Maps

  include Shared::IsData

  include Shared::QueryBatchUpdate

  is_origin_for 'Sequence', 'Extract'

  GRAPH_ENTRY_POINTS = [:asserted_distributions, :biological_associations, :common_names, :contents, :data_attributes, :observation_matrices].freeze

  belongs_to :taxon_name, inverse_of: :otus

  # Why?  Could be combination too.
  belongs_to :protonym, -> { where(type: 'Protonym') }, foreign_key: :taxon_name_id

  has_many :asserted_distributions, inverse_of: :otu, dependent: :restrict_with_error

  has_many :taxon_determinations, inverse_of: :otu, dependent: :destroy # TODO: change

  # TODO, move to infer BiologicalCollectionObject
  has_many :collection_objects, through: :taxon_determinations, source: :taxon_determination_object, inverse_of: :otus, source_type: 'CollectionObject'
  has_many :field_occurrences, through: :taxon_determinations, source: :taxon_determination_object, inverse_of: :otus, source_type: 'FieldOccurrence'

  has_many :type_materials, through: :protonym

  # TODO: no longer true since they can come through Otu as well
  has_many :extracts, through: :collection_objects, source: :extracts
  has_many :sequences, through: :extracts, source: :derived_sequences

  has_many :collecting_events, -> { distinct }, through: :collection_objects
  has_many :common_names, dependent: :destroy
  has_many :collection_profiles, dependent: :restrict_with_error # Do not destroy old profiles

  has_many :contents, inverse_of: :otu, dependent: :destroy
  has_many :public_contents, inverse_of: :otu, dependent: :destroy

  has_many :geographic_areas_from_asserted_distributions, through: :asserted_distributions, source: :geographic_area
  has_many :geographic_areas_from_collecting_events, through: :collecting_events, source: :geographic_area
  has_many :georeferences, through: :collecting_events

  has_many :content_topics, through: :contents, source: :topic

  has_many :otu_relationships, foreign_key: :subject_otu_id
  has_many :related_otu_relationships, class_name: 'OtuRelationship', foreign_key: :object_otu_id

  has_many :leads, inverse_of: :otu, dependent: :restrict_with_error

  scope :with_taxon_name_id, -> (taxon_name_id) { where(taxon_name_id:) }
  scope :with_name, -> (name) { where(name:) }

  validate :check_required_fields

  soft_validate(:sv_taxon_name, set: :taxon_name)
  soft_validate(:sv_duplicate_otu, set: :duplicate_otu)

  accepts_nested_attributes_for :common_names, allow_destroy: true

  # @return Scope
  def self.alphabetically
    includes(:taxon_name).select('otus.*, taxon_names.cached').references(:taxon_names).order('taxon_names.cached ASC')
  end

  # @param [Integer] otu_id
  # @param [String] rank_class
  # @return [Scope]
  #    Otu.joins(:taxon_name).where(taxon_name: q).to_sql
  def self.self_and_descendants_of(otu_id, rank_class = nil)
    if o = Otu.joins(:taxon_name).find(otu_id)
      if rank_class.nil?
        joins(:taxon_name).
          where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)) #this also covers synonyms of self
      else
        joins(:taxon_name).
          where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)).
          where( 'taxon_names.rank_class = ?', rank_class)
      end
    else # no taxon name just return self in scope
      Otu.where(id: otu_id)
    end
  end

  # @return [Otu::ActiveRecordRelation]
  #
  # All OTUs that are synonymous/same/matching target, for either
  #    historical and pragmatic (i.e. share the same `taxon_name_id`), or
  #    nomenclatural reasons (are synonyms of the taxon name). Includes self.
  #
  # TODO: Replace with Queries::Otu::Filter
  #
  def self.coordinate_otus(otu_id)
    begin
      i = Otu.joins(:taxon_name).find(otu_id)
      j = i.taxon_name.cached_valid_taxon_name_id
      o = Otu.arel_table
      t = TaxonName.arel_table

      q = o.join(t, Arel::Nodes::InnerJoin).on(
        o[:taxon_name_id].eq( t[:id] ).and(t[:cached_valid_taxon_name_id].eq(j))
      )

      Otu.joins(q.join_sources)
    rescue ActiveRecord::RecordNotFound
      Otu.where(id: otu_id)
    end
  end

  # TODO: REplace with Queries::Otu::Filter
  # TODO: This is coordinate_otus with children,
  #       it should probably be renamed coordinate.
  # @return [Otu::ActiveRecordRelation]
  #   all OTUs linked to the taxon_name_id, it descendants, and
  #   any synonym of any of the previous
  #   linked directly to the taxon name
  #   !! Invalid taxon_name_ids return nothing
  #   !! Taxon names with synonyms return the OTUs of their synonyms
  # @param taxon_name_id [The id of a valid TaxonName]
  def self.descendant_of_taxon_name(taxon_name_id = [])
    ids = [taxon_name_id].flatten.compact.uniq

    o = Otu.arel_table
    t = TaxonName.arel_table
    h = TaxonNameHierarchy.arel_table

    q = o.join(t, Arel::Nodes::InnerJoin).on(
      o[:taxon_name_id].eq( t[:id]))
      .join(h, Arel::Nodes::InnerJoin).on(
        t[:cached_valid_taxon_name_id].eq(h[:descendant_id]))

    Otu.joins(q.join_sources).where(h[:ancestor_id].in(ids).to_sql)
  end

  # TODO: replace with filter
  # return [Scope] the Otus bound to that taxon name and its descendants
  def self.for_taxon_name(taxon_name)
    if taxon_name.kind_of?(String) || taxon_name.kind_of?(Integer)
      tn = TaxonName.find(taxon_name)
    else
      tn = taxon_name
    end
    Otu.joins(taxon_name: [:ancestor_hierarchies]).where(taxon_name_hierarchies: {ancestor_id: tn.id})
  end

  # TODO: This need to be renamed to reflect "simple" association
  def self.batch_preview(file: nil, ** args)
    # f     = CSV.read(file, headers: true, col_sep: "\t", skip_blanks: true, header_converters: :symbol)
    @otus = []
    File.open(file).each do |row|
      name = row.strip
      next if name.blank?
      @otus.push(Otu.new(name: row.strip))
    end
    @otus
  end

  def self.batch_create(otus: {}, ** args)
    new_otus = []
    begin
      Otu.transaction do
        otus.each_key do |k|
          o = Otu.new(otus[k])
          o.save!
          new_otus.push(o)
        end
      end
    rescue
      return false
    end
    new_otus
  end

  # Batch update

  # @params params [Hash]
  #   { otu_query: {},
  #     otu_filter_query: {},
  #     async_cutoff: 1
  #   }
  def self.batch_update(params)
    request = QueryBatchRequest.new(
      async_cutoff: params[:async_cutoff] || 26,
      klass: 'Otu',
      object_filter_params: params[:otu_query],
      object_params: params[:otu],
      preview: params[:preview],
    )

    a = request.filter

    v = a.all.select(:taxon_name_id).distinct.limit(2).pluck(:taxon_name_id)

    cap = 0

    case v.size
    when 1
      if v.first.nil?
        cap = 10000
        request.cap_reason = 'Maximum allowed for empty records.'
      else
        cap = 2000
        request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
      end
    when 2
      if v.include?(nil)
        cap = 2000
        request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
      else
        cap = 25
        request.cap_reason = '> 1 taxon name id'
      end
    else
      cap = 25
      request.cap_reason = '> 1 taxon name id'
    end

    request.cap = cap

    query_batch_update(request)
  end

  # @param used_on [String] required, one of `AssertedDistribution`, `Content`, `BiologicalAssociation`, `TaxonDetermination`
  # @return [Scope]
  #   the max 10 most recently used otus, as `used_on`
  def self.used_recently(user_id, project_id, used_on = '')
    t = case used_on
        when 'AssertedDistribution'
          AssertedDistribution.arel_table
        when 'Content'
          ::Content.arel_table
        when 'BiologicalAssociation'
          BiologicalAssociation.arel_table
        when 'TaxonDetermination'
          TaxonDetermination.arel_table
        else
          return Otu.none
        end

    p = Otu.arel_table

    # i is a select manager
    i = case used_on
        when 'BiologicalAssociation'
          t.project(t['biological_association_object_id'], t['updated_at']).from(t)
            .where(
              t['updated_at'].gt(1.week.ago).and(
                t['biological_association_object_type'].eq('Otu')
              )
            )
              .where(t['updated_by_id'].eq(user_id))
              .where(t['project_id'].eq(project_id))
              .order(t['updated_at'].desc)
        else
          t.project(t['otu_id'], t['updated_at']).from(t)
            .where(t['updated_at'].gt( 1.week.ago ))
            .where(t['updated_by_id'].eq(user_id))
            .where(t['project_id'].eq(project_id))
            .order(t['updated_at'].desc)
        end

    z = i.as('recent_t')

    case used_on
    when 'BiologicalAssociation'
      Otu.joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['biological_association_object_id'].eq(p['id'])))
      ).pluck(:id).uniq
    else
      Otu.joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['otu_id'].eq(p['id'])))
      ).pluck(:id).uniq
    end
  end

  # @params target [String] required, one of nil, `AssertedDistribution`, `Content`, `BiologicalAssociation`, 'TaxonDetermination'
  # @return [Hash] otus optimized for user selection
  def self.select_optimized(user_id, project_id, target = nil)
    r = used_recently(user_id, project_id, target)
    h = {
      quick: [],
      pinboard: Otu.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a,
      recent: []
    }

    if target && !r.empty?
      h[:recent] = (
        Otu.where('"otus"."id" IN (?)', r.first(10) ).to_a +
        Otu.where(project_id:, created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(3).to_a
      ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
      h[:quick] = (
        Otu.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a +
        Otu.where(project_id:, created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(1).to_a +
        Otu.where('"otus"."id" IN (?)', r.first(4) ).to_a
      ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
    else
      h[:recent] = Otu.where(project_id:).order('updated_at DESC').limit(10).to_a.sort{|a,b| a.otu_name <=> b.otu_name}
      h[:quick] = Otu.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a.sort{|a,b| a.otu_name <=> b.otu_name}
    end

    h
  end

  def current_collection_objects
    collection_objects.where(taxon_determinations: {position: 1})
  end

  # @return [Boolean]
  #   whether or not this otu is coordinate (see coordinate_otus) with this otu
  def coordinate_with?(otu_id)
    Otu.coordinate_otus(otu_id).where(otus: {id:}).any?
  end

  # TODO: Deprecate for helper method, HTML does not belong here
  def otu_name
    if name.present?
      name
    elsif !taxon_name_id.nil?
      taxon_name.cached_html_name_and_author_year
    else
      nil
    end
  end

  # TODO: move to helper method likely
  def distribution_geoJSON
    a_ds = Gis::GeoJSON.feature_collection(geographic_areas_from_asserted_distributions, :asserted_distributions)
    c_os = Gis::GeoJSON.feature_collection(collecting_events, :collecting_events_georeferences)
    c_es = Gis::GeoJSON.feature_collection(geographic_areas_from_collecting_events, :collecting_events_geographic_area)
    Gis::GeoJSON.aggregation([a_ds, c_os, c_es], :distribution)
  end

  # TODO: need's spec
  # A convienence method to wrap coordinate_otus and descendant_of_taxon_name
  # @return Scope
  def coordinate_otus_with_children
    if taxon_name_id.nil?
      Otu.coordinate_otus(id)
    else
      Otu.descendant_of_taxon_name(taxon_name.valid_taxon_name.id)
    end
  end

  # @return [Array]
  #   of ancestral otu_ids
  # !! This method does not fork, as soon as 2 ancestors are
  # !! hit the list terminates.
  def ancestor_otu_ids(prefer_unlabelled_otus: true)
    ids =  []
    a = parent_otu_id(prefer_unlabelled_otus: true)
    while a
      ids.push a
      b = Otu.find(a)
      a = b.parent_otu_id(prefer_unlabelled_otus: true)
    end
    ids
  end

  # @return [Array]
  #   all bilogical associations this Otu is part of
  def all_biological_associations
    # !! If self relationships are ever made possible this needs a DISTINCT clause
    BiologicalAssociation.find_by_sql(
      "SELECT biological_associations.*
         FROM biological_associations
         WHERE biological_associations.biological_association_subject_id = #{self.id}
           AND biological_associations.biological_association_subject_type = 'Otu'
       UNION
       SELECT biological_associations.*
         FROM biological_associations
         WHERE biological_associations.biological_association_object_id = #{self.id}
           AND biological_associations.biological_association_object_type = 'Otu' ")
  end

  # @return [Otu#id, nil, false]
  #  nil - there is no OTU parent with a valid taxon name possible
  #  id - the (unambiguous) id of the nearest parent OTU attached to a valid taxon name
  #
  #  Note this is used CoLDP export. Do not change without considerations there.
  def parent_otu_id(skip_ranks: [], prefer_unlabelled_otus: false)
    return nil if taxon_name_id.nil?

    # TODO: Unify to a single query

    candidates = TaxonName.joins(:otus, :descendant_hierarchies)
      .that_is_valid
      .where.not(id: taxon_name_id)
      .where(taxon_name_hierarchies: {descendant_id: taxon_name_id})
      .where.not(rank_class: skip_ranks)
      .order('taxon_name_hierarchies.generations')
      .limit(1)
      .pluck(:id)

    if candidates.size == 1
      otus = Otu.where(taxon_name_id: candidates.first).to_a
      otus.select! { |o| o.name.nil? } if prefer_unlabelled_otus && otus.size > 1

      if otus.size > 0
        return otus.first.id
      else
        return nil
      end
    else
      return nil
    end
  end

  # TODO: Re/move
  # temporary method to gent list of taxa from a geographic area and save it to csv file
  def taxa_by_geographic_area
    area = 'China'
    file_name1 = '/tmp/' + area + '_geographic_area_' + Time.now.to_i.to_s + '.csv'
    file_name2 = '/tmp/' + area + '_collection_object_' + Time.now.to_i.to_s + '.csv'
    c1 = GeographicArea.where(name: area).pluck(:id)
    c2 = GeographicArea.where('parent_id in (?)', c1).pluck(:id)
    c3 = GeographicArea.where('parent_id in (?)', c2).pluck(:id)
    c = c1 + c2 + c3
    ad = AssertedDistribution.where('geographic_area_id in (?)', c)

    CSV.open(file_name1, 'w') do |csv|
      csv << ['genus', 'species', 'geographic_area']
      ad.find_each do |z|
        tn = z.otu&.taxon_name&.valid_taxon_name
        unless tn.nil?
          ga, gn, sp = nil, nil, nil
          if z.geographic_area.name == area
            ga = area
          elsif z.geographic_area.parent.name == area
            ga = area + ', ' + z.geographic_area.name
          elsif z.geographic_area.parent.parent.name == area
            ga = area + ', ' + z.geographic_area.parent.name + ', ' + z.geographic_area.name
          end
          sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
          tn1 = tn.ancestor_at_rank('genus')
          unless tn1.nil?
            gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
            csv << [gn, sp, ga]
          end
        end
      end
    end

    co = CollectionObject.joins(:collecting_event).where('collecting_events.geographic_area_id in (?)', c)

    CSV.open(file_name2, 'w') do |csv|
      csv << ['genus', 'species', 'geographic_area', 'lat', 'long']
      co.find_each do |z|
        tn = z.taxon_determinations.last&.otu&.taxon_name&.valid_taxon_name
        unless tn.nil?
          ga, gn, sp, lat, long = nil, nil, nil, nil, nil
          ce = z.collecting_event.geographic_area
          if ce.name == area
            ga = area
          elsif ce.parent.name == area
            ga = area + ', ' + ce.name
          elsif ce.parent.parent.name == area
            ga = area + ', ' + ce.parent.name + ', ' + ce.name
          end
          lat_long = z.collecting_event&.georeferences&.last&.geographic_item&.to_a
          if !lat_long.nil? && lat_long.length == 2
            lat = lat_long[1]
            long = lat_long[0]
          end

          sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
          tn1 = tn.ancestor_at_rank('genus')
          unless tn1.nil?
            gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
            csv << [gn, sp, ga, lat, long]
          end
        end
      end
    end
  end

  protected

  def check_required_fields
    if taxon_name_id.blank? && name.blank? && !(taxon_name && taxon_name.persisted?) # true, true, nil is not true
      errors.add(:taxon_name_id, 'and/or name should be selected')
      errors.add(:name, 'and/or taxon name should be selected')
    end
  end

  def sv_taxon_name
    soft_validations.add(:taxon_name_id, 'Nomenclature (taxon name) is not assigned') if taxon_name_id.nil?
  end

  def sv_duplicate_otu
    unless Otu.with_taxon_name_id(taxon_name_id).with_name(name).not_self(self).with_project_id(project_id).empty?
      m = 'Another OTU with an identical nomenclature (taxon name) and name exists in this project'
      soft_validations.add(:base, m )
    end
  end

end