SpeciesFileGroup/taxonworks

View on GitHub
lib/vendor/biodiversity.rb

Summary

Maintainability
C
1 day
Test Coverage
module Vendor

  # Wraps the biodiversity gem (https://github.com/GlobalNamesArchitecture/biodiversity)
  # Links parsed string results to Protonyms/Combinations in TaxonWorks.
  #   a = Vendor::Biodiversity::Result.new 
  #   a.name = 'Aus bus'
  #   a.parse
  #
  module Biodiversity
    
    RANKS = %i{genus subgenus species subspecies variety form}.freeze

    class Result
      # query string
      attr_accessor :name

      # how to match
      #   `ranked`: return names at that queried rank only (e.g. only match a subgenus to rank subgenus
      #   `groups`: return names at Group level (species or genus), i.e. a subgenus name in query will match genus OR subgenus in database
      attr_accessor :mode

      # project to query against
      attr_accessor :project_id

      # one of :iczn, :icn, :icnp
      attr_accessor :nomenclature_code

      # the result of a ScientificNameParser parse
      attr_accessor :parse_result

      # a summarized result, used to render JSON
      #   {
      #     protonyms: { genus: [ @protonym1, ...], ... }
      #     parse: { genus:  'Aus', species: 'bus', ...}
      #   }
      # Hash of rank => [Protonyms] like { genus: [<#>, <#>] }
      attr_reader :result

      # @return [String] the bit after ` in `
      attr_reader :citation

      # @return [Boolean] 
      #   whether or not the query string was succesfully parsed
      attr_reader :parseable

      # @return [Hash]
      #   a memoized result of the matching TW protonyms per rank
      attr_reader :protonym_result

      # @return [Combination]
      #   a memoized result of disambiguated_combination
      attr_reader :disambiguated_combination

      # @return [Combination]
      #   a memoized combiantion with only unambiguous elements 
      attr_reader :combination

      def initialize(query_string: nil, project_id: nil, code: :iczn, match_mode: :groups)
        @project_id = project_id
        @name = query_string
        @nomenclature_code = code
        @mode = match_mode

        parse if !query_string.blank?
      end

      # @return [@parse_result]
      #   a Biodiversity name parser result
      def parse
        @combination = nil
        @disambiguated_combination = nil

        n, @citation = preparse

        begin
          @parse_result ||= ::Biodiversity::Parser.parse(n)
        rescue NoMethodError => e
          case e.message
          when /canonical/
            @parseable = false 
          else
            raise
          end
        end

        @parse_result[:scientificName] = parse_result[:normalized]
        @parse_result
      end

      # @return [Boolean]
      def parseable
        @parseable = parse_result[:parsed] && parse_result[:tail].blank? if @parseable.nil?
        @parseable 
      end

      # @return [Array]
      #  TODO: deprecate
      def preparse
        name.split(' in ')
      end

      # @return [Hash]
      def detail
        parse_result[:details] || {}
      end

      # @return [String, nil]
      def genus
        parse_result[:words]&.detect { |w| %w{UNINOMIAL GENUS}.include?(w[:wordType]) }&.dig(:normalized)
      end

      # @return [String, nil] 
      def subgenus
        (parse_result[:words] || [])[1..]&.detect { |w| %w{UNINOMIAL INFRA_GENUS}.include?(w[:wordType]) }&.dig(:normalized)
      end

      # @return [String, nil]
      def species
        parse_result[:words]&.detect { |w| 'SPECIES' == w[:wordType] }&.dig(:normalized)
      end

      # @return [String, nil]
      def subspecies
        infraspecies(nil)
      end

      # @return [String, nil]
      def variety
        infraspecies('var.')
      end

      # @return [String, nil]
      def form
        infraspecies('f.')
      end

      # @return [String, nil]
      def infraspecies(biodiversity_rank)
        detail.dig(:infraspecies, :infraspecies)&.detect { |e| e[:rank] == biodiversity_rank }&.dig(:value)
      end

      # @return [Integer]
      #   the total monominals in the epithet
      def name_count 
        (detail[detail.keys.first].keys - [:authorship]).count
      end

      # @return [Symbol, nil] like `:genus`
      def finest_rank
        RANKS.reverse_each do |k|
          return k if send(k)
        end
        nil
      end

      # @return [Hash, nil]
      #   the Biodiversity authorship hash
      def authorship
        parse_result.dig(:authorship, :originalAuth)
      end

      # @return [String, nil]
      def author
        if a = authorship
          Utilities::Strings.authorship_sentence(a[:authors])
        else
          nil
        end
      end

      def author_year
        [author, year].compact.join(', ')
      end

      # @return [String, nil]
      def year
        if a = authorship
          return a.dig(:year, :year)
        end
      end

      # return only references to ambiguous protonyms
      #
      # Parse 'form' 
      # Parse 'parse 'Var" 

      # @return [Boolean]
      #   true if for each parsed piece of there name there is 1 and only 1 result
      def is_unambiguous?
        RANKS.each do |r|
          if !send(r).nil?
            return false unless !send(r).nil? && !unambiguous_at?(r).nil?
          end
        end
        true
      end

      # @return [Boolean]
      def is_authored?
        author_year.size > 0
      end

      # @return [Protonym, nil]
      #   true if there is a single matching result or nominotypical subs
      # @param rank [Symbol] like `:genus` or `:species`
      def unambiguous_at?(rank)
        return protonym_result[rank].first if protonym_result[rank].size == 1
        if protonym_result[rank].size == 2
          n1 = protonym_result[rank].first
          n2 = protonym_result[rank].last
          return n2 if n2.nominotypical_sub_of?(n1) 
          return n1 if n1.nominotypical_sub_of?(n2) 
        end
        nil 
      end

      # @return [Array]
      #   the ranks, as symbols, at which there are multiple (>1) Protonym matches
      #   !! subtly different than unambiguous_at, probably should use that?!
      def ambiguous_ranks
        a = [ ]
        protonym_result.each do |k, v|
          a.push k if v.count > 1
        end
        a 
      end

      # @return [Combination]
      # @param target_protonym_ids [Hash] like like `{genus: 123, species: 345}`
      #   Given a targeted list of ids checks to see if
      #      a) there is an *ambiguous* result at the rank AND
      #      b) there is a Protonym with the id provided in the ambiguous result
      #   If a and b are both true then the combination once ambiguous result is set to the id provided in targeted_protonym_ids
      def disambiguate_combination(target_protonym_ids = {})
        return nil unless target_protonym_ids.any?
        c = combination
        b = ambiguous_ranks

        target_protonym_ids.each do |rank, id|
          if b.include?(rank)
            c.send("#{rank}_id=", id) if protonym_result[rank].map(&:id).include?(id)
          end
        end
        @disambiguated_combination = c
      end

      # @return [ String, false ]
      #   a wrapper on string returning methods
      # @param rank [Symbol, String] 
      #   rank is one of `genus`, `subgenus`, `species, `subspecies`, `variety`, `form`
      def string(rank = nil)
        send(rank)
      end

      # @return [Scope]
      # @param rank [Symbol] like `:genus` or `:species`
      def basic_scope(rank)
        Protonym.where(
          project_id: project_id,
          name: string(rank)
        )
      end

      # @return [Scope]
      # @param rank [Symbol] like `:genus` or `:species`
      def protonyms(rank)
        case mode
        when :ranked
          ranked_protonyms(rank)
        when :groups
          grouped_protonyms(rank)
        else
          Protonym.none
        end
      end

      # @return [Scope]
      #    Protonyms at a given rank
      def ranked_protonyms(rank)
        basic_scope(rank).where(rank_class: Ranks.lookup(nomenclature_code, rank))
      end

      # @return [Scope]
      # @param rank [Symbol] like `:genus` or `:species`
      #   Protonyms grouped by nomenclatural group, for a rank
      def grouped_protonyms(rank)
        s = case rank
            when :genus, :subgenus
              basic_scope(rank).is_genus_group
            when :species, :subspecies, :variety, :form
              basic_scope(rank).is_species_group
            else
              Protonym.none
            end

        (is_authored? && finest_rank == rank) ? scope_to_author_year(s) : s
      end

      # @return [Scope]
      #  if there is an exact author year match scope it to that match, otherwise
      #     ignore the author year
      def scope_to_author_year(scope)
        t = scope.where('(cached_author_year = ? OR cached_author_year = ?)', author_year, author_year.gsub(' & ', ' and '))
        t.count > 0 ? t : scope
      end

      # @return [Hash]
      #   we inspect this internally, so it has to be decoupled
      def protonym_result
        return @protonym_result if @protonym_result
        h = {}
        RANKS.each do |r|
          h[r] = protonyms(r).to_a
        end
        @protonym_result = h
        @protonym_result
      end

      # @return [Hash]
      def parse_values
        h = {
          author: author,
          year: year
        }
        RANKS.each do |r|
          h[r] = send(r)
        end
        h
      end

      # @return [Hash]
      #   summary for rendering purposes
      def result
        @result ||= build_result
      end

      # @return [Hash]
      def build_result
        @result = {}
        @result[:protonyms] = protonym_result
        @result[:parse] = parse_values
        @result[:unambiguous] = is_unambiguous?
        @result[:existing_combination_id] = combination_exists?.try(:id)
        @result[:other_matches] = other_matches
        @result
      end

      # @return [Combination]
      #   ranks that are unambiguous have their Protonym set
      def combination
        @combination ||= set_combination
      end

      def set_combination
        c = Combination.new
        RANKS.each do |r|
          c.send("#{r}=", unambiguous_at?(r))
        end
        c
      end

      # @return [Combination, false]
      #    the Combination, if it exists
      def combination_exists?
        if is_unambiguous?
          Combination.match_exists?(**combination.protonym_ids_params) # TODO: pass name?
        else
          false
        end
      end

      def author_word_position 
        if a = parse_result[:words]
          b = (a.detect { |v| v[:wordType] == 'AUTHOR_WORD'})&.dig(:start)
          p = [name.length, b].compact.min
        end
      end

      def name_without_author_year
        pos = author_word_position
        # author_word doesn't point to parens if any
        offset = pos > 0 && '(' == name[pos-1] ? 2 : 1

        name[0..pos - offset].strip
      end

      # @return [Hash]
      #   `:verbatim` - names that have verbatim supplied, these should be the only names NOT parsed that user is interested in
      #   `:subgenus` - names that exactly match a subgenus, these are potential new combinations as Genus alone 
      #   `:original_combination` - names that exactly match the original combination
      def other_matches
        h = { 
          verbatim: [],
          subgenus: [], 
          original: []
        }

        h[:verbatim] = TaxonName.where(project_id: project_id, cached: name_without_author_year).
          where('verbatim_name is not null').order(:cached).all.to_a if parseable
        
        h[:subgenus] = Protonym.where(
          project_id: project_id, 
          name: genus, 
          rank_class: Ranks.lookup(nomenclature_code, :subgenus)
        ).all.to_a

        h[:original_combination] = Protonym.where(project_id: project_id). 
          where( cached_original_combination: name_without_author_year
                ).all.to_a if parseable

        h
      end

    end
  end
end