MushroomObserver/mushroom-observer

View on GitHub
app/models/name_sorter.rb

Summary

Maintainability
A
0 mins
Test Coverage
# frozen_string_literal: true

#
#  = Name Sorter
#
#  This class is used by a few controllers (e.g.  NameController,
#  SpeciesListController) to help parse lists of names, such as in
#  create/edit_species_list, and change_synonyms.  It uses
#  NameParse to parse individual lines, then gathers unrecognized names,
#  deprecated names, accepted names, etc.
#
#  This is extraordinarily complicated-looking, but once you've worked out the
#  execution flow and what the method and attribute names mean, you'll see that
#  it's not so bad.  Still, I'd caution against making any changes here unless
#  you really know what you're doing.
#
#  1. First create and populate object:
#    sorter = NameSorter.new
#
#    # Pass in a list of name strings, or one string at a time.
#    sorter.sort_names(list_of_name_strs)
#    sorter.add_name(name_str)
#
#    # After the first pass, if there were names with multiple matches, the user
#    # may choose which ones they mean.
#    sorter.add_chosen_names(hash_mapping_name_ids_to_name_ids)
#
#    # Likewise, after the first pass, the user can tell NameSorter that it's
#    # okay to use certain deprecated names.
#    sorter.add_approved_deprecated_names(list_of_name_strings)
#
#    # Likewise, after the first pass, if some names are deprecated, the user
#    # may choose which approved synonyms they want to use.
#    sorter.append_approved_synonyms(list_of_name_ids)
#    sorter.push_synonym(id_or_name)
#
#  2. Now query object:
#
#    # Check an external list of Names for unapproved deprecated names.  (The
#    # second version takes a hash that you get back from a list of checkboxes.)
#    # These set the internal flag below, has_unapproved_deprecated_names.
#    check_for_deprecated_names(list_of_names)
#    check_for_deprecated_checklist(checkbox_list)
#
#    # Have all deprecated names that are left been approved by the user?
#    has_unapproved_deprecated_names
#
#    # Are all names recognized and unambiguously match a single Name?
#    only_single_names
#
#    # Has the user entered any "Species one = Species two" lines?
#    # (This syntax is not allowed while populating species lists, for example.)
#    has_new_synonyms
#
#    # Checks to make sure user had a chance to choose from among the synonyms
#    # of any name they've listed that has synonyms.  This is a bit misnamed.
#    only_approved_synonyms
#
#  == Note on Timestamps
#
#  When adding names to the sorter (e.g., via +add_name+ or +sort_names+), you
#  can include timestamps.  This can be used, for example, by the SpeciesList
#  constructor to specify the date each species was observed, overriding the
#  default date that is implicitly given each Observation.  This was originally
#  used only by some external script Nathan wrote for Darvin.  I've now hooked
#  up the comment mechanism to give web users access to this feature, too.  The
#  syntax would look like this:
#
#    Abrothallus hypotrachynae [20100320]
#    Paraparmelia alabamensis [2010-03-21]
#    Cladonia strepsilis [2010/3/22 2:30pm]
#
#  (Basically, include anything that +Time.parse+ would recognize inside square
#  brackets after the name.  It will be interpreted in the browser's local
#  time zone.)
#
################################################################################
#
class NameSorter
  attr_accessor :approved_deprecated_names, :approved_synonyms, :chosen_names

  attr_reader :has_new_synonyms, :has_unapproved_deprecated_names,
              :synonym_data, :all_names,
              :deprecated_name_strs, :deprecated_names,
              :multiple_line_strs, :multiple_names,
              :single_line_strs, :single_names,
              :new_line_strs, :new_name_strs

  def initialize
    @approved_deprecated_names = [] # Array of String's
    @approved_synonyms         = [] # Array of Name's
    @chosen_names              = {} # Hash mapping Name id to Name id

    @has_new_synonyms                = false
    @has_unapproved_deprecated_names = false

    @synonym_data              = [] # Array of [NameParse, [Name, Name, ...]]
    @all_names                 = [] # Array of Name's
    @deprecated_name_strs      = [] # Array of String's
    @deprecated_names          = [] # Array of Name's
    @multiple_line_strs        = [] # Array of String's
    @multiple_names            = [] # Array of Name's
    @single_line_strs          = [] # Array of String's
    @single_names              = [] # Array of [Name, Time]
    @new_line_strs             = [] # Array of String's      # whole line
    @new_name_strs             = [] # Array of String's      # just parsed name
  end

  def all_line_strs
    @new_line_strs + @multiple_line_strs + @single_line_strs
  end

  def reset_new_names
    @new_line_strs = []
    @new_name_strs = []
  end

  def only_single_names
    (@new_name_strs == []) && (@multiple_line_strs == [])
  end

  def push_synonym(arg)
    return @approved_synonyms.push(Name.find(arg)) if arg.is_a?(Integer)
    return @approved_synonyms.push(arg) if arg.is_a?(ActiveRecord::Base)

    raise(
      TypeError.new(
        "NameSorter synonyms must be Integer or ActiveRecord::Base, " \
        "not #{arg.class}."
      )
    )
  end

  def append_approved_synonyms(synonyms)
    return unless synonyms # Allow for nil

    synonyms = synonyms.split("/") if synonyms.instance_of?(String)
    if synonyms.instance_of?(Array)
      synonyms.each { |id| push_synonym(id.to_i) }
    else
      raise(
        TypeError.new(
          "Only Arrays can be appended to a NameSorter synonym list " \
          "not #{synonyms.class}"
        )
      )
    end
  end

  def add_chosen_names(new_names)
    return unless new_names

    new_names.each_pair { |key, _val| @chosen_names[key] = new_names[key] }
  end

  # append the input to the list of approved deprecated names
  # input:  array of string ids, e.g., ["4", "27", ...]
  #      or a string of name ids, each on its own line, e.g. "16\r\n14"
  def add_approved_deprecated_names(new_names)
    return unless new_names

    if new_names.instance_of?(String)
      new_names.split("\n").each { |n| @approved_deprecated_names += n.split }
    elsif new_names.instance_of?(Array)
      @approved_deprecated_names += new_names
    end
  end

  def check_for_deprecated_name(name, name_str = nil)
    return unless name.deprecated

    str = name_str || name.real_search_name
    @deprecated_name_strs.push(str)
    @deprecated_names.push(name)
    if @approved_deprecated_names.nil? ||
       !@approved_deprecated_names.member?(str) &&
       !@approved_deprecated_names.member?(name.id.to_s)
      @has_unapproved_deprecated_names = true
    end
  end

  def check_for_deprecated_names(names, name_str = nil)
    names.each { |n| check_for_deprecated_name(n, name_str) }
  end

  def check_for_deprecated_checklist(checklist)
    return unless checklist

    checklist.each do |key, value|
      check_for_deprecated_name(Name.find(key.to_i)) if value == "1"
    end
  end

  # Add a single name to the list.  It calculates "stats" as it goes, such as
  # whether the name has multiple synonyms, whether it is deprecated, whether
  # it is unrecognized, etc.
  def add_name(spl_line, timestamp = nil)
    # Need to store all this data
    name_parse = NameParse.new(spl_line)
    line_str = name_parse.line_str
    name_str = name_parse.name
    chosen = false

    # Did user enter a date/timestamp via comment?
    begin
      comment_time = Time.zone.parse(name_parse.comment) || timestamp
    rescue StandardError
      comment_time = timestamp
    end

    # Need all deprecated names even when another name is chosen
    # in case something else forces a redisplay
    names = name_parse.find_names
    check_for_deprecated_names(names, name_str)

    # Check radio boxes for multiple-names and/or approved-names that have
    # been selected -- these take priority over all else.
    if @chosen_names
      names.each do |name|
        next unless (chosen_id = @chosen_names[name.id.to_s])

        @single_line_strs.push(line_str) # (name_str)
        chosen_name = Name.find(chosen_id)
        names = [chosen_name]
        @single_names.push([chosen_name, comment_time])
        @all_names.push(chosen_name)
        chosen = true
        break
      end
    end

    # If no radio boxes checked, all names must match uniquely or we have
    # problems.  There are three cases:
    #   1) new names -- no matches
    #   2) good names -- exactly one match
    #   3) ambiguous names -- multiple matches
    unless chosen
      @all_names += names
      len = names.length
      if len.zero?
        @new_line_strs.push(line_str)
        @new_name_strs.push(name_parse.search_name)
      elsif len == 1
        @single_line_strs.push(line_str)
        @single_names.push([names.first, nil])
      else
        @multiple_line_strs.push(line_str)
        # Add a representative to @multiple_names -- doesn't matter which.
        @multiple_names.push(names.first)
      end
    end

    # Did user specify a synonym via the "Name = Synonym" syntax?
    return unless name_parse.has_synonym

    @has_new_synonyms = true
    if name_parse.find_synonym_names.empty?
      @new_name_strs.push(name_parse.synonym_search_name)
    end
    # Keep names in addition to parse for the chosen filter
    @synonym_data.push([name_parse, names])
  end

  # Deprecate all the "Species = Synonym" synonyms, and synonymize them.
  # This relies on both the species and the synonym already existing and being
  # unambiguous.  That is, only_single_names must be true.
  def create_new_synonyms
    @synonym_data.each do |parse, names|
      if names.length == 1
        # Merging earlier in this loop may have affected this name implicitly;
        # reload to pick up potential changes.
        name = names.first.reload
        synonym_names = parse.find_synonym_names
        synonym_names.each do |s|
          s.change_deprecated(true)
          s.save
          name.merge_synonyms(s)
        end
        name.change_deprecated(false)
        name.save
      else
        raise(
          TypeError.new(
            "Unexpected ambiguity: #{names.map(&:real_search_name).join(", ")}"
          )
        )
      end
    end
  end

  # Get a (mostly) full list of all the synonyms of the listed names, including
  # the names themselves... except for the names that have no synonyms.
  # Returns a list of name strings (display_name in particular), not objects.
  def synonym_name_strs
    result = []
    @all_names.each do |name|
      result += name.synonyms.map(&:display_name) if name.synonym_id
    end
    result
  end

  # This gathers a full list of all the names in the list passed in and all
  # their synonyms.  This is thus the set of all possible synonyms.  It adds to
  # this the synonyms from the previous pass, just in case.  It returns a list
  # of Name ids (not objects).  (*NOTE*: This is a superset of +all_names+.)
  def all_synonyms
    result = @approved_synonyms.dup
    @all_names.each do |name|
      result += name.synonyms
    end
    result.uniq
  end

  # This takes all the names in the list, gathers all the possible synonyms for
  # them, then makes sure the user has had a chance to choose from among them
  # all (via @approved_synonyms).  This will fail if the user enters a name
  # with a synonym on the first pass; and it can also fail on subsequent passes
  # if they change the list of names and add a new name with another synonym.
  # The idea is not to force the user to choose any particular synonyms, but
  # instead just to make sure they have a chance to *see* all the synonyms.
  def only_approved_synonyms
    result = true
    ok_name_ids = (@approved_synonyms + @all_names).map(&:id)
    # error_string = "ok_nameids: [%s]\n" % ok_name_ids.join(', ') +
    # "all_synonyms: [%s]\n" % self.all_synonyms.map(&:id).join(', ')
    all_synonyms.each do |name|
      # error_string += "%s\n" % name.id
      next if ok_name_ids.member?(name.id)

      # raise TypeError.new("member? failed")
      result = false
      break
    end
    # raise TypeError.new(error_string)
    result
  end

  # Add a list of name strings.  *NOTE*: +name_list+ can be a String separated
  # by newlines or an Array of String's.  Each String must contain a single
  # name
  def sort_names(name_list)
    name_list.split("\n").each do |n|
      add_name(n) if /\S/.match?(n)
    end
  end
end