lib/tasks/import/sf/sf_specimens.rake from SpeciesFileGroup/taxonworks

lib/tasks/import/sf/sf_specimens.rake
Summary

Maintainability

Test Coverage

Issues
namespace :tw do
  namespace :project_import do
    namespace :sf_import do
      require 'fileutils'
      require 'logged_task'
      namespace :specimens do

        desc 'time rake tw:project_import:sf_import:specimens:collection_objects user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define collection_objects: [:data_directory, :environment, :user_id] do |logger|

          logger.info 'Importing specimen records as collection objects...'

          # total (see below)
          # type (Specimen, Lot, RangedLot --  Dmitry uses lot, not ranged lot)
          # preparation_type_id (TW integer, include SF text as data attribute?)
          # respository_id (Dmitry manually reconciled these); manually reconciled, not all will be found, add sf_depo_id and sf_depo_string as attribute
          # buffered_collecting_event (no SF data)
          # buffered_determinations (no SF data)
          # buffered_other_labels (no SF data)
          # ranged_lot_category_id (leave nil)
          # collecting_event_id
          # accessioned_at (no SF data)
          # deaccession_reason (no SF data)
          # deaccessioned_at (no SF data)
          # housekeeping

          # add specimen note
          # add specimen status note (identifier?): 0 = presumed Ok, 1 = missing, 2 = destroyed, 3 = lost, 4 = unknown, 5 = missing?, 6 = destroyed?, 7 = lost?, 8 = damaged, 9 = damaged?, 10 = no data entered
          # specimen dataflags: 1 = ecological relationship, 2 = character data not yet implemented, 4 = image, 8 = sound, 16 = include specimen locality in maps, 32 = image of specimen label

          # About total:
          # @!attribute total
          #   @return [Integer]
          #   The enumerated number of things, as asserted by the person managing the record.  Different totals will default to different subclasses.  How you enumerate your collection objects is up to you.  If you want to call one chunk of coral 50 things, that's fine (total = 50), if you want to call one coral one thing (total = 1) that's fine too.  If not nil then ranged_lot_category_id must be nil.  When =1 the subclass is Specimen, when > 1 the subclass is Lot.

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          skipped_file_ids = import.get('SkippedFileIDs')
          excluded_taxa = import.get('ExcludedTaxa')
          get_tw_user_id = import.get('SFFileUserIDToTWUserID') # for housekeeping
          get_tw_project_id = import.get('SFFileIDToTWProjectID')
          get_sf_unique_id = import.get('SFSpecimenToUniqueIDs') # get the unique_id for given SF specimen_id
          get_tw_collecting_event_id = import.get('SFUniqueIDToTWCollectingEventID') # use unique_id as key to collecting_event_id
          get_tw_repo_id = import.get('SFDepoIDToTWRepoID')
          get_sf_depo_string = import.get('SFDepoIDToSFDepoString')
          get_biocuration_class_id = import.get('SpmnCategoryIDToBiocurationClassID')
          get_specimen_category_counts = import.get('SFSpecimenIDCategoryIDCount')
          get_sf_source_metadata = import.get('SFSourceMetadata')
          get_sf_identification_metadata = import.get('SFIdentificationMetadata')
          get_tw_otu_id = import.get('SFTaxonNameIDToTWOtuID')
          get_nomenclator_metadata = import.get('SFNomenclatorIDToSFNomenclatorMetadata')
          get_sf_ident_qualifier = import.get('SFIdentQualifier') # key = nomenclator_id, value = ?, aff., cf., nr. ph.
          get_tw_source_id = import.get('SFRefIDToTWSourceID')
          get_sf_verbatim_ref = import.get('RefIDToVerbatimRef')
          get_sf_locality_metadata = import.get('SFLocalityMetadata')

          # to get associated OTU, get TW taxon id, then get OTU from TW taxon id
          get_tw_taxon_name_id = import.get('SFTaxonNameIDToTWTaxonNameID')
          get_otu_from_tw_taxon_id = import.get('TWTaxonNameIDToOtuID')

          #   Following hash currently not used (was going to provide metadata for zero-count specimens not otherwise handled)
          # get_sf_collect_event_metadata = import.get('SFCollectEventMetadata')

          get_tw_collection_object_id = {} # key = SF.SpecimenID, value = TW.collection_object.id OR TW.container.id

          depo_namespace = Namespace.find_or_create_by(institution: 'Species File', name: 'SpecimenDepository', short_name: 'Depo')

          syntypes_range = {} # use ranged_lot_category for syntypes, paratypes and paralectotypes without individual counts
          paratypes_range = {}
          paralectotypes_range = {}
          get_tw_project_id.each_value do |project_id|
            syntypes_range[project_id] = RangedLotCategory.find_or_create_by(
                name: 'syntypes',
                minimum_value: 2,
                maximum_value: 100,
                project_id: project_id).id
            paratypes_range[project_id] = RangedLotCategory.find_or_create_by(
                name: 'paratypes',
                minimum_value: 2,
                maximum_value: 100,
                project_id: project_id).id
            paralectotypes_range[project_id] = RangedLotCategory.find_or_create_by(
                name: 'paralectotypes',
                minimum_value: 2,
                maximum_value: 100,
                project_id: project_id).id
          end

          path = @args[:data_directory] + 'tblSpecimens.txt'
          file = CSV.foreach(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          error_counter = 0
          saved_counter = 0
          zero_counter = 0 # Specimen_ids with no count
          no_ce_counter = 0 # No collecting_event_id
          processing_counter = 0
          ident_error_counter = 0
          asserted_dist_counter = 0

          file.each_with_index do |row, i|
            next if skipped_file_ids.include? row['FileID'].to_i
            next if excluded_taxa.include? row['TaxonNameID']
            specimen_id = row['SpecimenID']
            next if specimen_id == '0'
            next if get_sf_unique_id[specimen_id].nil?
            next if get_sf_identification_metadata[specimen_id].nil?

            sf_file_id = row['FileID']
            project_id = get_tw_project_id[sf_file_id]
            sf_taxon_name_id = row['TaxonNameID']
            tw_taxon_name_id = get_tw_taxon_name_id[sf_taxon_name_id]
            collecting_event_id = get_tw_collecting_event_id[get_sf_unique_id[specimen_id]]
            if collecting_event_id.nil?
              logger.error "NO COLLECTING EVENT: Couldn't find CollectingEvent with 'id'=: unique_id = #{get_sf_unique_id[specimen_id]}: SpecimenID = '#{specimen_id}', sf_taxon_id #{sf_taxon_name_id} = tw_taxon_name_id #{tw_taxon_name_id}, FileID = '#{sf_file_id}', no_ce_counter = '#{no_ce_counter += 1}'"
              next
            end

            logger.info "Processing SpecimenID = #{specimen_id}, FileID = '#{sf_file_id}', sf_taxon_id #{sf_taxon_name_id} = tw_taxon_name_id #{tw_taxon_name_id} ( processing_counter=#{processing_counter += 1} )[ zero_counter = #{zero_counter} ] \n"

            sf_depo_id = row['DepoID']
            ranged_lot_category_id = nil
            count_override = false # boolean, primary type zero_count specimens, count = 1 unless syntype (use ranged_lot_category)

            if get_specimen_category_counts[specimen_id].nil? # these are no-count specimens which fall into two categories:

              if get_sf_identification_metadata[specimen_id][0]['type_kind_id'].nil?
                logger.error "Identification error [ ident_error_counter = #{ident_error_counter += 1} ] \n"
                next
              end

              type_kind_id = get_sf_identification_metadata[specimen_id][0]['type_kind_id'] # used in identification section as integer

              if [1..5, 7..11].include?(type_kind_id.to_i)
                # if TypeKindID in (1 holotype, 2 syntypes, 3 neotype, 4 lectotype, 5 unspecified primary type, [not 6 unknown],
                #   7 allotype, 8 paratype, 9 lectoallotype, 10 paralectotype, 11 neoallotype), create coll obj
                #     1,3,4,5,7,9,11 use count = 1; 2,8,10 use ranged lot 2-100; rest of coll obj logic applies except for 3 former syntypes now lectotypes
                #     ( 3 specimen records with TypeKindID = 4 and SeqNum > 0: 578, 89580, 89622 )
                #     Set boolean count_override to override zero count value in loop (type_type distinguishes 1 from ranged lot? )

                if type_kind_id == '2'
                  type_kind_id = '4' if ['578', '89580', '89622'].include?(specimen_id) # was syntype, then lectotype
                end

                if type_kind_id == '2'
                  ranged_lot_category_id = syntypes_range[project_id]
                elsif type_kind_id == '8'
                  ranged_lot_category_id = paratypes_range[project_id]
                elsif type_kind_id == '10'
                  ranged_lot_category_id = paralectotypes_range[project_id]
                else
                  count_override = true # ![2, 8, 10].include?(type_kind_id.to_i) # was (type_kind_id != '2')
                end

              elsif get_sf_locality_metadata[row['LocalityID']]['level1_id'] != '0'
                # if Level1ID > 0, add asserted_distribution
                otu_id = get_otu_from_tw_taxon_id[tw_taxon_name_id]
                otu_id = get_tw_otu_id[sf_taxon_name_id] if otu_id == nil

                AssertedDistribution.new(otu_id: otu_id,
                                         geographic_area_id: CollectingEvent.find(collecting_event_id).geographic_area_id,
                                         project_id: project_id)
                logger.info " AssertedDistribution created for SpecimenID = '#{specimen_id}', FileID = '#{sf_file_id}', otu_id = '#{otu_id}' [ asserted_dist_counter = #{asserted_dist_counter}"
                next

              else # no specimen or assert dist, record error and next
                logger.error " OMITTED : No specimen or asserted distribution: SpecimenID = '#{specimen_id}', FileID = '#{sf_file_id}', DepoID = '#{sf_depo_id}', SourceID = '#{row['SourceID']}', zero_counter = '#{zero_counter += 1}' "
                next
              end
            end

            #     Rest of locality/collecting event/specimen/identification data append as import_attributes
            #         [need to import tables localities and collecting events as hashes - not unique table because indexing is too complex]
            #         [There are 18 identification records where SeqNum > 0 (highest = 1)]

            place_in_collection_keyword = Keyword.find_or_create_by(name: 'PlaceInCollection', definition: 'possible SF source of identification', project_id: project_id)

            repository_id = get_tw_repo_id.has_key?(sf_depo_id) ? get_tw_repo_id[sf_depo_id] : nil

            # get otu id from sf taxon name id, a taxon determination, called 'the primary otu id'   (what about otus without tw taxon names?)

            # list of import_attributes (aka data_attribute with type = 'ImportAttribute'):
            data_attributes_attributes = []

            # Note: collection_objects are made for all specimen records, regardless of basis of record (for now)
            #   -- except when there is no count
            if row['BasisOfRecord'].to_i > 0
              basis_of_record_string = case row['BasisOfRecord'].to_i
                                       when 1
                                         'Preserved specimen'
                                       when 2
                                         'Fossil specimen'
                                       when 3
                                         'Image (still or video)'
                                       when 4
                                         'Audio recording'
                                       when 5
                                         'Checklist/Literature/Map'
                                       when 6
                                         'Personal observation'
                                       end
              basis_of_record = {type: 'ImportAttribute',
                                 import_predicate: 'basis_of_record',
                                 value: basis_of_record_string,
                                 project_id: project_id}
              # puts " BasisOfRecord : '#{basis_of_record_string}' "
              data_attributes_attributes.push(basis_of_record)
            end

            if row['PreparationType'].present?
              preparation_type = {type: 'ImportAttribute',
                                  import_predicate: 'preparation_type',
                                  value: row['PreparationType'],
                                  project_id: project_id}
              # puts " PreparationType : '#{row[' PreparationType ']}' "
              data_attributes_attributes.push(preparation_type)
            end

            dataflags = row['DataFlags'].to_i
            if dataflags > 0
              dataflags_array = Utilities::Numbers.get_bits(dataflags)

              # for bit_position in 0..status_flags_array.length - 1 # length is number of bits set
              dataflag_text = ''
              dataflags_array.each do |bit_position|
                # 1 = ecological relationship, 2 = character data not yet implemented, 4 = image, 8 = sound, 16 = include specimen locality in maps, 32 = image of specimen label
                case bit_position # array use .join(','), flatten?
                when 0 # ecological relationship (1)
                  dataflag_text = '(ecological relationship)'
                when 1 # character data not yet implemended (2)
                  dataflag_text.concat('(character data not yet implemented)')
                when 2 # image (4)
                  dataflag_text.concat('(image)')
                when 3 # sound (8)
                  dataflag_text.concat('(sound)')
                when 4 # include specimen locality in maps (16)
                  dataflag_text.concat('(include specimen locality in maps)')
                when 5 # image of specimen label (32)
                  dataflag_text.concat('(image of specimen label)')
                end
                specimen_dataflags = {type: 'ImportAttribute',
                                      import_predicate: 'specimen_dataflags',
                                      value: dataflag_text,
                                      project_id: project_id}
                # puts " Specimen dataflags text: '#{dataflag_text}' "
                data_attributes_attributes.push(specimen_dataflags)
              end
            end

            specimen_status_id = row['SpecimenStatusID'].to_i
            if specimen_status_id > 0 && specimen_status_id != 10 # 0 = presumed Ok, 10 = no data entered
              specimen_status_string = case specimen_status_id
                                       when 1
                                         'missing'
                                       when 2
                                         'destroyed'
                                       when 3
                                         'lost'
                                       when 4
                                         'unknown'
                                       when 5
                                         'missing?'
                                       when 6
                                         'destroyed?'
                                       when 7
                                         'lost?'
                                       when 8
                                         'damaged'
                                       when 9
                                         'damaged?'
                                       end
              specimen_status = {type: 'ImportAttribute',
                                 import_predicate: 'specimen_status',
                                 value: specimen_status_string,
                                 project_id: project_id}
              # puts " specimen_status_string (SpecimenStatusID) : '#{specimen_status_string}' ('#{specimen_status_id}') "
              data_attributes_attributes.push(specimen_status)
            end

            citations_attributes = [] # if empty array will be ignored in metadata
            if row['SourceID'] != '0'
              sf_source_id = row['SourceID']

              if get_sf_source_metadata[sf_source_id] && get_sf_source_metadata[sf_source_id]['ref_id'].to_i > 0 # SF.Source has RefID, create citation or use verbatim ref string for collection object (assuming it will be created)
                sf_source_ref_id = get_sf_source_metadata[sf_source_id]['ref_id']
                # puts "SF.SourceID, RefID: '#{sf_source_id}', '#{sf_source_ref_id}'"

                # Is there a TW source_id or must we use the verbatim ref string?
                if get_tw_source_id[sf_source_ref_id]
                  citations_attributes.push(source_id: get_tw_source_id[sf_source_ref_id], project_id: project_id)
                else # no TW source equiv, use verbatim as data_attribute
                  verbatim_sf_ref = {type: 'ImportAttribute',
                                     import_predicate: "verbatim_sf_ref_id_#{sf_source_ref_id}",
                                     value: get_sf_verbatim_ref[sf_source_ref_id],
                                     project_id: project_id}
                  # puts "verbatim_sf_ref: #{get_sf_verbatim_ref[sf_source_ref_id]})"
                  data_attributes_attributes.push(verbatim_sf_ref)
                end
              end

              if get_sf_source_metadata[sf_source_id]['description'].present? # SF.Source has description, create an import_attribute
                sf_source_description_text = get_sf_source_metadata[sf_source_id]['description']
                sf_source_description = {type: 'ImportAttribute',
                                         import_predicate: 'sf_source_description',
                                         value: sf_source_description_text,
                                         project_id: project_id}
                puts "Description: '#{sf_source_description_text}'"
                data_attributes_attributes.push(sf_source_description)
              end
            end

            if sf_depo_id > '0'
              sf_depo_string = {type: 'ImportAttribute',
                                import_predicate: 'sf_depo_string',
                                value: get_sf_depo_string[sf_depo_id],
                                project_id: project_id}
              # puts "get_sf_depo_string[sf_depo_id]: '#{get_sf_depo_string[sf_depo_id]}'"
              data_attributes_attributes.push(sf_depo_string)
            end

            metadata = {notes_attributes: [{text: row['Note'],
                                            project_id: project_id,
                                            created_at: row['CreatedOn'],
                                            updated_at: row['LastUpdate'],
                                            created_by_id: get_tw_user_id[row['CreatedBy']],
                                            updated_by_id: get_tw_user_id[row['ModifiedBy']]}],

                        data_attributes_attributes: data_attributes_attributes,
                        citations_attributes: citations_attributes
            }

            # At this point all the related metadata except specimen category and count must be set

            begin

              ApplicationRecord.transaction do
                current_objects = [] # stores all objects created in the row below temporarily

                # This outer loop loops through total, category pairs, we create
                # a new collection object for each pair
                get_specimen_category_counts[specimen_id].each do |specimen_category_id, count|

                  count = 1 if count_override # is true (applies only to zero-count specimens with primary types except syntypes [=ranged_lot])

                  collection_object = CollectionObject::BiologicalCollectionObject.new(
                      metadata.merge(
                          total: count,
                          ranged_lot_category_id: ranged_lot_category_id,
                          collecting_event_id: collecting_event_id,
                          repository_id: repository_id,

                          biocuration_classifications_attributes: [{biocuration_class_id: get_biocuration_class_id[specimen_category_id.to_s], project_id: project_id}],

                          taxon_determinations_attributes: [{otu_id: get_otu_from_tw_taxon_id[tw_taxon_name_id], project_id: project_id}],
                          # taxon_determination notes here?

                          # housekeeping for collection_object
                          project_id: project_id,
                          created_at: row['CreatedOn'],
                          updated_at: row['LastUpdate'],
                          created_by_id: get_tw_user_id[row['CreatedBy']],
                          updated_by_id: get_tw_user_id[row['ModifiedBy']]
                      ))

                  collection_object.save!
                  logger.info "Collection object is saved, id = #{collection_object.id}, number #{saved_counter += 1}"
                  current_objects.push(collection_object)

                  # end  # misplaced end?

                  # At this point the collection objects have been saved successfully

                  # 1) If there are two collection objects with the same SF specimen ID, then put them in a virtual container
                  # 2) If there is an "identifier", associate it with a single collection object or the container (if applicable)
                  identifier = nil
                  if row['DepoCatNo'].present?
                    identifier = Identifier::Local::CatalogNumber.new(
                        identifier: "collection_object.id #{collection_object.id} (SF.SpecimenID #{specimen_id}): SF.DepoID #{sf_depo_id},  #{row['DepoCatNo']}",
                        namespace: depo_namespace,
                        project_id: project_id)

                    if current_objects.count == 1
                      # The "Identifier" is attached to the only collection object that is created

                      current_objects.first.identifiers << identifier if identifier

                    elsif current_objects.count > 1
                      # There is more than one object, put them in a virtual container
                      c = Container::Virtual.create!(project_id: project_id)
                      current_objects.each do |o|
                        o.put_in_container(c)
                      end

                      c.identifiers << identifier if identifier

                    else
                      puts 'OOPS' # would this happen?
                    end
                  end
                end

                # data_attributes to do:
                #   import_attribute if identification.IdentifierName
                #   other fields in tblIdentifications: HigherTaxonName, NomenclatorID, TaxonIdentNote, TypeTaxonNameID, RefID, IdentifierName/Year,
                #     PlaceInCollection, IdentificationModeNote, VerbatimLabel


                # Both SF Specimen and Identification tables have VerbatimLabel as field: Only used in Identification.
                # Treat VerbatimLabel as buffered_collecting_event -- What's that??? Since it's in identification, could be more than one
                # if identification['verbatim_label'].present?
                #   verbatim_label = ImportAttribute.create!(import_predicate: 'VerbatimLabel',
                #                                            value: identification['verbatim_label'],
                #                                            project_id: project_id)
                #   data_attributes_attributes.push(verbatim_label)
                # end


                if get_sf_identification_metadata[specimen_id]
                  get_sf_identification_metadata[specimen_id].each do |identification|
                    current_objects.each do |o|

                      # Add subsequent determinations
                      nomenclator_id = nil
                      target_nomenclator = nil

                      # If nomenclator_id exists, use it; otherwise use higher_taxon_name if available
                      if identification['nomenclator_id'].present?
                        nomenclator_id = identification['nomenclator_id']
                        # puts "Got the nomenclator_id = #{nomenclator_id}"
                        if nomenclator_id != '0'
                          # target_nomenclator = get_nomenclator_string[nomenclator_id]
                          if get_nomenclator_metadata[nomenclator_id]['nomenclator_string'].gsub('.  ', '. ').nil?
                            byebug
                          end
                          target_nomenclator = get_nomenclator_metadata[nomenclator_id]['nomenclator_string'].gsub('.  ', '. ') # delete 2nd space after period in var, form, etc.
                        elsif identification['higher_taxon_name'].present?
                          target_nomenclator = identification['higher_taxon_name']
                        end
                      end

                      if taxon_name = TaxonName.where(cached: target_nomenclator, project_id: project_id).first
                        otu = taxon_name.otus.first
                      else
                        otu = Otu.create!(name: target_nomenclator, taxon_name_id: tw_taxon_name_id, project_id: project_id) # target_nomenclator nil?
                      end

                      # create conditional attributes here
                      data_attributes_attributes = []

                      citations_attributes = []
                      if identification['ref_id'].to_i > 0
                        sf_ref_id = identification['ref_id']
                        if get_tw_source_id[sf_ref_id]
                          # source_id = get_tw_source_id[sf_ref_id]
                          # citations_attributes = Citation.create!(source_id: get_tw_source_id[sf_ref_id], project_id: project_id)
                          citations_attributes.push(source_id: get_tw_source_id[sf_ref_id], project_id: project_id)
                        else # no TW source equiv, use verbatim as data_attribute
                          verbatim_sf_ref = {type: 'ImportAttribute',
                                             import_predicate: "verbatim_sf_ref_id_#{sf_ref_id}",
                                             value: get_sf_verbatim_ref[sf_ref_id],
                                             project_id: project_id}
                          # puts "verbatim_sf_ref: #{get_sf_verbatim_ref[sf_ref_id]})"
                          data_attributes_attributes.push(verbatim_sf_ref)
                        end
                      end


                      if identification['identification_mode_note'].present?
                        identification_mode_note = {type: 'ImportAttribute',
                                                    import_predicate: 'IdentificationModeNote',
                                                    value: identification['identification_mode_note'],
                                                    project_id: project_id}
                        # puts "identification_mode_note: #{identification['identification_mode_note']}"
                        data_attributes_attributes.push(identification_mode_note)
                      end

                      # need IdentifierName: normally a role associated with the taxon determination. Since text field would be difficult to parse into people, for now adding SF tblIdentification.IdentifierName as import attribute

                      if identification['identifier_name'].present?
                        identifier_name = {type: 'ImportAttribute',
                                           import_predicate: 'IdentifierName',
                                           value: identification['identifier_name'],
                                           project_id: project_id}
                        # puts "identifier_name: #{identification['identifier_name']}"
                        data_attributes_attributes.push(identifier_name)
                        if identification['year'].to_i > 0
                          identifier_year = {type: 'ImportAttribute',
                                             import_predicate: 'IdentifierYear',
                                             value: identification['year'],
                                             project_id: project_id}
                          # puts "identifier_year: #{identification['year']}"
                          data_attributes_attributes.push(identifier_year)
                        end
                      end

                      # cannot do inline: need find_or_create
                      confidences_attributes = []
                      if get_sf_ident_qualifier[nomenclator_id]
                        confidences_attributes.push({confidence_level: ConfidenceLevel.find_or_create_by(
                            name: get_sf_ident_qualifier[nomenclator_id],
                            definition: "tblIdentifications: #{'get_sf_ident_qualifier[nomenclator_id]'}",
                            project_id: project_id)})
                      end

                      t = TaxonDetermination.create!(
                          otu_id: otu.id,
                          biological_collection_object: o,

                          citations_attributes: citations_attributes,
                          data_attributes_attributes: data_attributes_attributes,
                          notes_attributes: [text: identification['taxon_ident_note'], project_id: project_id],
                          confidences_attributes: confidences_attributes,
                          project_id: project_id)
                      t.move_to_bottom # so it's not the first record


                      if identification['verbatim_label'].present?
                        o.update_column(:buffered_collecting_event, identification['verbatim_label'])
                      end

                      if identification['place_in_collection'] == '1'
                        # o.keywords << place_in_collection_keyword     # equivalent to line below
                        # o.tags << Tag.new(keyword: place_in_collection_keyword, project_id: o.project_id)
                        o.tags.create!(keyword: place_in_collection_keyword, project_id: project_id)
                      end

                      type_kind_id = identification['type_kind_id'].to_i # exclude TypeKindID = undefined (0) and unknown (6)
                      if [1, 2, 3, 4, 8, 10].include? type_kind_id
                        type_kind = case type_kind_id
                                    when 1
                                      'holotype'
                                    when 2
                                      if o.total == 1
                                        'syntype'
                                      else
                                        'syntypes'
                                      end
                                    when 3
                                      'neotype'
                                    when 4
                                      'lectotype'
                                    when 8
                                      if o.total == 1
                                        'paratype'
                                      else
                                        'paratypes'
                                      end
                                    when 10
                                      if o.total == 1
                                        'paralectotype'
                                      else
                                        'paralectotypes'
                                      end
                                    end

                        TypeMaterial.create!(protonym_id: get_tw_taxon_name_id[identification['type_taxon_name_id']], # tw_taxon_name_id
                                             collection_object: o, # = collection_object/biological_collection_object
                                             type_type: type_kind,
                                             project_id: project_id)
                        # puts "type_material created for '#{type_kind}'"

                      elsif [5, 7, 9].include? type_kind_id
                        # create a data_attribute
                        type_kind = case type_kind_id
                                    when 5
                                      'unspecified primary type'
                                    when 7
                                      'allotype'
                                    when 9
                                      'topotype'
                                    end
                        ImportAttribute.create!(import_predicate: 'SF.TypeKind',
                                                value: type_kind,
                                                project_id: project_id,
                                                attribute_subject: o)
                        # puts "data_attribute for type_kind created for '#{type_kind}'"
                      end
                    end
                  end
                end


                puts 'CollectionObject created'
                get_tw_collection_object_id[specimen_id] = current_objects.collect {|a| a.id} # an array of collection object ids for this specimen_id

              end

            rescue ActiveRecord::RecordInvalid => e
              logger.error "CollectionObject ERROR SF.SpecimenID = #{specimen_id} (#{error_counter += 1}): " + e.record.errors.full_messages.join(';')
            end
          end

          import.set('SFSpecimenIDToCollObjID', get_tw_collection_object_id)
          puts 'SFSpecimenIDToCollObjID'
          ap get_tw_collection_object_id

          #######################################################################################
          `rake tw:db:dump backup_directory=/Users/mbeckman/src/db_backup/17_after_collections_objects/`
          #######################################################################################
        end


        desc 'time rake tw:project_import:sf_import:specimens:create_sf_loc_col_events_metadata user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define create_sf_loc_col_events_metadata: [:data_directory, :environment, :user_id] do |logger|

          logger.info 'Creating metadata from tblLocalities and tblCollectingEvents...'

          get_sf_locality_metadata = {} # key = sf.LocalityID, value = hash {lat, long, precision code, etc.}
          get_sf_collect_event_metadata = {} # key = sf.CollectEventID, value = hash {collector name, date, etc.}

          path = @args[:data_directory] + 'tblLocalities.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each do |row|
            locality_id = row['LocalityID']

            logger.info "Working with SF.LocalityID = '#{locality_id}' \n"

            get_sf_locality_metadata[locality_id] = {file_id: row['FileID'],
                                                     level1_id: row['Level1ID'],
                                                     level2_id: row['Level2ID'],
                                                     level3_id: row['Level3ID'],
                                                     level4_id: row['Level4ID'],
                                                     latitude: row['Latitude'],
                                                     longitude: row['Longitude'],
                                                     precision_code: row['PrecisionCode'],
                                                     elevation: row['Elevation'],
                                                     max_elevation: row['MaxElevation'],
                                                     time_period_id: row['TimePeriodID'],
                                                     locality_detail: row['LocalityDetail'],
                                                     time_detail: row['TimeDetail'],
                                                     dataflags: row['DataFlags'],
                                                     country: row['Country'],
                                                     state: row['State'],
                                                     county: row['County'],
                                                     body_of_water: row['BodyOfWater'],
                                                     precision_radius: row['PrecisionRadius'],
                                                     lat_long_from: row['LatLongFrom']}
          end


          path = @args[:data_directory] + 'tblCollectEvents.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each do |row|
            collect_event_id = row['CollectEventID']

            logger.info "Working with SF.CollectEventID = '#{collect_event_id}' \n"

            get_sf_collect_event_metadata[collect_event_id] = {file_id: row['FileID'],
                                                               collector_name: row['CollectorName'],
                                                               year: row['Year'],
                                                               month: row['Month'],
                                                               day: row['Day'],
                                                               days_to_end: row['DaysToEnd']}
          end


          import = Import.find_or_create_by(name: 'SpeciesFileData')
          import.set('SFLocalityMetadata', get_sf_locality_metadata)
          import.set('SFCollectEventMetadata', get_sf_collect_event_metadata)

          puts 'SFLocalityMetadata'
          ap get_sf_locality_metadata

          puts 'SFCollectEventMetadata'
          ap get_sf_collect_event_metadata

          #######################################################################################
          `rake tw:db:dump backup_directory=/Users/mbeckman/src/db_backup/16_after_col_events_metadata/`
          #######################################################################################
        end


        desc 'time rake tw:project_import:sf_import:specimens:get_ident_qualifier_from_nomenclator user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define get_ident_qualifier_from_nomenclator: [:data_directory, :environment, :user_id] do |logger|
          logger.info '!!!!! NOTE: Re-analyze table data for new abbreviations !!!!!'

          logger.info 'Creating hash of NomenclatorID and IdentQualifier...'

          get_sf_ident_qualifier = {} # key = SF.SourceID, value = hash (SourceID, FileID, RefID, Description)

          path = @args[:data_directory] + 'tblNomenclator.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each do |row|

            next if row['IdentQualifier'].blank?
            nomenclator_id = row['NomenclatorID']
            ident_qualifier = row['IdentQualifier']

            logger.info "Working with SF.NomenclatorID = '#{nomenclator_id}, IdentQualifier = '#{ident_qualifier}' \n"

            ident_qualifier_text = case ident_qualifier
                                   when '?', '(?)'
                                     '?'
                                   when 'aff.', 'sp. aff.', 'sp affinis', 'spec. aff.'
                                     'aff.'
                                   when 'cf', 'cf.', 'f.'
                                     'cf.'
                                   when 'near', 'nr.'
                                     'nr.'
                                   when 'ph.'
                                     'ph.'
                                   else
                                     nil
                                   end

            next if ident_qualifier_text == nil
            get_sf_ident_qualifier[nomenclator_id] = ident_qualifier_text

          end

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          import.set('SFIdentQualifier', get_sf_ident_qualifier)

          puts 'SFIdentQualifier'
          ap get_sf_ident_qualifier
        end


        desc 'time rake tw:project_import:sf_import:specimens:create_sf_identification_metadata user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define create_sf_identification_metadata: [:data_directory, :environment, :user_id] do |logger|

          logger.info 'Creating SF tblIdentifications metadata...'

          get_sf_identification_metadata = {} # key = SF.SpecimenID, value = array of hashes [{SeqNum => s, relevant columns => etc}, {}]

          path = @args[:data_directory] + 'tblIdentifications.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each do |row|
            specimen_id = row['SpecimenID']
            seqnum = row['SeqNum']

            logger.info "Working with SF.SpecimenID = '#{specimen_id}', SeqNum = '#{seqnum}' \n"

            this_ident = {
                seqnum: seqnum,
                higher_taxon_name: row['HigherTaxonName'],
                nomenclator_id: row['NomenclatorID'],
                taxon_ident_note: row['TaxonIdentNote'],
                type_kind_id: row['TypeKindID'],
                topotype: row['Topotype'],
                type_taxon_name_id: row['TypeTaxonNameID'],
                ref_id: row['RefID'],
                identifier_name: row['IdentifierName'],
                year: row['Year'],
                place_in_collection: row['PlaceInCollection'],
                identification_mode_note: row['IdentificationModeNote'],
                verbatim_label: row['VerbatimLabel']
            }

            if get_sf_identification_metadata[specimen_id] # this is the same SpecimenID as last row with another seqnum, add another identification record
              get_sf_identification_metadata[specimen_id].push this_ident

            else # this is a new SpecimenID, start new identification
              get_sf_identification_metadata[specimen_id] = [this_ident]
            end

          end

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          import.set('SFIdentificationMetadata', get_sf_identification_metadata)

          puts 'SFIdentificationMetadata'
          ap get_sf_identification_metadata

          #######################################################################################
          `rake tw:db:dump backup_directory=/Users/mbeckman/src/db_backup/15_after_identification_metadata/`
          #######################################################################################
        end


        desc 'time rake tw:project_import:sf_import:specimens:create_sf_source_metadata user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define create_sf_source_metadata: [:data_directory, :environment, :user_id] do |logger|

          logger.info 'Creating SF tblSources metadata...'

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          skipped_file_ids = import.get('SkippedFileIDs')

          get_sf_source_metadata = {} # key = SF.SourceID, value = hash (SourceID, FileID, RefID, Description)

          path = @args[:data_directory] + 'tblSources.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each do |row|
            sf_file_id = row['FileID']
            next if skipped_file_ids.include? sf_file_id.to_i
            source_id = row['SourceID']
            next if source_id == '0'

            logger.info "Working with SF.SourceID = '#{source_id}' \n"

            get_sf_source_metadata[source_id] = {file_id: sf_file_id, ref_id: row['RefID'], description: row['Description']}
          end

          import.set('SFSourceMetadata', get_sf_source_metadata)

          puts 'SFSourceMetadata'
          ap get_sf_source_metadata
        end


        desc 'time rake tw:project_import:sf_import:specimens:create_specimen_category_counts user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define create_specimen_category_counts: [:data_directory, :environment, :user_id] do |logger|

          logger.info 'Creating specimen category counts...'

          get_specimen_category_counts = {} # key = SF.SpecimenID, value = array [category0, count0] [category1, count1]
          #previous_specimen_id = '0'

          path = @args[:data_directory] + 'tblSpecimenCounts.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each do |row|
            specimen_id = row['SpecimenID']
            specimen_category_id = row['SpmnCategoryID'].to_i
            count = row['Count'].to_i.abs

            logger.info "Working with SF.SpecimenID = '#{specimen_id}', specimen_category_id = '#{specimen_category_id}', count = '#{count}' \n"

            if get_specimen_category_counts[specimen_id] # specimen_id == previous_specimen_id # this is the same SpecimenID as last row, add another category/count
              get_specimen_category_counts[specimen_id].push [specimen_category_id, count]

            else # this is a new SpecimenID, start new category/count
              get_specimen_category_counts[specimen_id] = [[specimen_category_id, count]]
              # previous_specimen_id = specimen_id
            end
          end

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          import.set('SFSpecimenIDCategoryIDCount', get_specimen_category_counts)

          puts 'SFSpecimenIDCategoryIDCount'
          ap get_specimen_category_counts
        end


        desc 'time rake tw:project_import:sf_import:specimens:create_biocuration_classes user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define create_biocuration_classes: [:data_directory, :environment, :user_id] do |logger|

          logger.info 'Creating biocuration classes...'

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          skipped_file_ids = import.get('SkippedFileIDs')
          get_tw_project_id = import.get('SFFileIDToTWProjectID')

          get_biocuration_class_id = {} # key = SF.tblSpecimenCategories.SpmnCategoryID, value = TW.biocuration_class.id

          path = @args[:data_directory] + 'tblSpecimenCategories.txt' # had been sfSpecimenCategories but not different from the db table??
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each_with_index do |row, i|
            sf_file_id = row['FileID']
            next if skipped_file_ids.include? sf_file_id.to_i
            spmn_category_id = row['SpmnCategoryID']
            next if spmn_category_id == '0'
            project_id = get_tw_project_id[sf_file_id]

            logger.info "Working with SF.SpmnCategoryID '#{spmn_category_id}', SF.FileID '#{row['FileID']}', project.id = '#{project_id}' \n"

            biocuration_class = BiocurationClass.create!(name: row['SingularName'], definition: "tblSpecimenCategories: #{row['PluralName']}", project_id: project_id)
            get_biocuration_class_id[spmn_category_id] = biocuration_class.id.to_s
          end

          import.set('SpmnCategoryIDToBiocurationClassID', get_biocuration_class_id)

          puts 'SpmnCategoryIDToBiocurationClassID'
          ap get_biocuration_class_id
        end


        desc 'time rake tw:project_import:sf_import:specimens:import_sf_depos user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define import_sf_depos: [:data_directory, :environment, :user_id] do |logger|

          logger.info 'Importing SF depo_strings and SF to TW depo/repo mappings...'

          get_sf_depo_string = {} # key = sf.DepoID, value = sf.depo_string
          get_tw_repo_id = {} # key = sf.DepoID, value = tw respository.id; ex. ["23, 25, 567"] => {1 => tw_repo_id, 2 => tw_repo_id, 3 => tw_repo_id}
          # Note: Many SF DepoIDs will not be mapped to TW repo_ids

          count_found = 0

          path = @args[:data_directory] + 'sfDepoStrings.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each_with_index do |row, i|
            depo_id = row['DepoID']

            depo_string = row['DepoString']

            logger.info "Working with SF.DepoID '#{depo_id}', SF.NomenclatorString '#{depo_string}' (count #{count_found += 1}) \n"

            get_sf_depo_string[depo_id] = depo_string
          end

          path = @args[:data_directory] + 'sfTWDepoMappings.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-8') # could not use 'UTF-16:UTF-8'; this is file via Access

          file.each_with_index do |row, i|
            sf_depo_id_array = row['SFDepoIDarray']
            next if sf_depo_id_array.blank?

            tw_repo_id = row['TWDepoID']
            logger.info "Working with TWD/RepoID '#{tw_repo_id}', SFDepoIDarray '#{sf_depo_id_array}' \n"

            sf_depo_id_array = sf_depo_id_array.split(', ').map(&:to_i)
            sf_depo_id_array.each do |each_id|
              get_tw_repo_id[each_id] = tw_repo_id
            end
          end

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          import.set('SFDepoIDToSFDepoString', get_sf_depo_string)
          import.set('SFDepoIDToTWRepoID', get_tw_repo_id)

          puts 'SFDepoIDToSFDepoString'
          ap get_sf_depo_string

          puts 'SFDepoIDToTWRepoID'
          ap get_tw_repo_id

        end


        desc 'time rake tw:project_import:sf_import:specimens:collecting_events user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define collecting_events: [:data_directory, :environment, :user_id] do |logger|

          logger.info 'Building new collecting events...'

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          skipped_file_ids = import.get('SkippedFileIDs')
          get_tw_project_id = import.get('SFFileIDToTWProjectID')
          get_sf_geo_level4 = import.get('SFGeoLevel4')

          # var = get_sf_geo_level4['lskdfj']['Name']

          get_tw_collecting_event_id = {} # key = sfUniqueLocColEvents.UniqueID, value = TW.collecting_event_id

          # SF.TimePeriodID to interval code (https://paleobiodb.org/data1.2/intervals/single.json?name='')
          TIME_PERIOD_MAP = {
              768 => 1, # Cenozoic
              784 => 12, # Quaternary
              790 => 32, # Holocene
              795 => 33, # Pleistocene
              800 => 13, # Tertiary
              804 => 25, # Neogene
              805 => 34, # Pliocene
              806 => 35, # Miocene
              808 => 26, # Paleogene
              809 => 36, # Oligocene
              810 => 37, # Eocene
              811 => 38, # Paleocene
              1024 => 2, # Mesozoic
              1040 => 14, # Cretaceous
              1056 => 15, # Jurassic
              1072 => 16, # Triassic
              1280 => 3, # Paleozoic
              1296 => 17, # Permian
              1312 => 18, # Carboniferous
              1316 => 27, # Pennsylvanian
              1320 => 28, # Mississippian
              1328 => 19, # Devonian
              1344 => 20, # Silurian
              1360 => 21, # Ordovician
              1376 => 22, # Cambrian
              # 1536 => nil,  # Precambrian
              1552 => 752, # Proterozoic
              1568 => 753, # Archaean vs. Archean
              1584 => 11 # Hadean
          }.freeze

          path = @args[:data_directory] + 'sfUniqueLocColEvents.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          # FileID
          # Level1ID    Level2ID    Level3ID    Level4ID
          # Latitude    Longitude    PrecisionCode
          # Elevation    MaxElevation
          # TimePeriodID
          # LocalityDetail
          # TimeDetail
          # DataFlags, ignore: bitwise, 1 = ecological relationship, 2 = character data (not implemented?), 4 = image, 8 = sound, 16 = include specimen locality in maps, 32 = image of specimen label
          # Country    State    County
          # BodyOfWater
          # PrecisionRadius
          # LatLongFrom, ignore
          # CollectorName
          # Year MonthDay
          # DaysToEnd
          # UniqueID

          counter = 0
          error_counter = 0

          # Working with TW.project_id = 3, UniqueID = 42414 (count 42414): Year 1993, Month 2, Day 29 (not a leap year), FileID = 1, TaxonNameID = 1140695, CollectEventID = 6584
          # ActiveRecord::RecordInvalid: Validation failed: Start date day 29 is not a valid start_date_day for the month provided
          # [0] 1993,
          # [1] 2,
          # [2] 29,
          # [3] nil,
          # [4] nil,
          # [5] nil


          file.each do |row|
            sf_file_id = row['FileID']
            next if skipped_file_ids.include? sf_file_id.to_i
            project_id = get_tw_project_id[sf_file_id]

            logger.info "Working with TW.project_id = #{project_id}, UniqueID = #{row['UniqueID']} (count #{counter += 1}) \n"

            this_year, this_month, this_day = row['Year'], row['Month'], row['Day']

            # in rescue below, used collect_event.errors vs. c.error
            # if (this_year == '1900' || this_year == '1993') && this_month == '2' && this_day == '29'
            #   this_month, this_day = '3', '1'
            # end

            d = this_day != '0'
            m = this_month != '0'
            y = !((this_year == '1000') || (this_year == '0'))
            dte = row['DaysToEnd'].to_i.abs != 0

            start_date_year, start_date_month, start_date_day,
                end_date_year, end_date_month, end_date_day =

                case [y, m, d, dte] # year, month, day, days_to_end

                when [true, true, true, true] # have (year, month, day, days_to_end)

                when [true, true, true, false] # have (year, month, day), no days_to_end
                  [this_year.to_i, this_month.to_i, this_day.to_i, nil, nil, nil]

                when [true, true, false, false] # have (year, month), no (day, days_to_end)
                  [this_year.to_i, this_month.to_i, nil, nil, nil, nil]

                when [true, false, false, false] # have year, no (month, day, days_to_end)
                  [this_year.to_i, nil, nil, nil, nil, nil]

                when [false, true, true, false] # no year, have (month, day), no days_to_end
                  [nil, this_month.to_i, this_day.to_i, nil, nil, nil]

                when [false, true, true, true] # no year, have (month, day, days_to_end)
                  sdm = this_month.to_i
                  sdd = this_day.to_i
                  dte = row['DaysToEnd'].to_i.abs
                  start_date = Date.new(1999, sdm, sdd) # an arbitrary non-leap year
                  end_date = dte.days.from_now(start_date)

                  [nil, sdm, sdd, nil, end_date.month, end_date.year]

                else
                  [nil, nil, nil, nil, nil, nil]
                end


            data_attributes_bucket = {
                data_attributes_attributes: [],
                # project_id: project_id  # cannot universally assign project_id to all array attribute hashes
                # rest of housekeeping?
            }

            if row['TimeDetail'].present?
              time_detail = {type: 'ImportAttribute', import_predicate: 'TimeDetail', value: row['TimeDetail'], project_id: project_id}
              data_attributes_bucket[:data_attributes_attributes].push(time_detail)
            end

            location_string = {type: 'ImportAttribute', import_predicate: 'CountryStateCounty',
                               value: [row['Country'], row['State'], row['County']].join(':'), project_id: project_id}
            data_attributes_bucket[:data_attributes_attributes].push(location_string)

            if row['BodyOfWater'].present?
              body_of_water = {type: 'ImportAttribute', import_predicate: 'BodyOfWater', value: row['BodyOfWater'], project_id: project_id}
              data_attributes_bucket[:data_attributes_attributes].push(body_of_water)
            end

            p_code = row['PrecisionCode'].to_i
            if p_code > 0
              value = case p_code
                      when 1 then
                        'from locality label'
                      when 2 then
                        'estimated from map and locality label'
                      when 3 then
                        'based on county or similar modest area specified on locality label'
                      when 4 then
                        'estimated from less specific locality label'
                      else
                        'error'
                      end

              precision_code = {type: 'ImportAttribute', import_predicate: 'PrecisionCode', value: value, project_id: project_id}
              data_attributes_bucket[:data_attributes_attributes].push(precision_code)
            end

            # do we still need next line?
            # start_date_year, end_date_year = nil, nil if row['Year'] == "1000"

            ap [start_date_year, start_date_month, start_date_day, end_date_year, end_date_month, end_date_day]

            # metadata = {
            #     # data_attributes_attributes: data_attributes_bucket
            #
            #
            # }.merge(data_attributes_bucket)


            lat, long = row['Latitude'], row['Longitude'] # if one has value, other cannot be nil
            # if lat
            #   if long.nil?
            #     lat = nil
            #   end
            # elsif long
            #   if lat.nil?
            #     long = nil
            #   end
            # end
            min_elev, max_elev = row['Elevation'], row['MaxElevation'] # in meters; SF doesn't have MinElevation
            # if min_elev   # true if not nil
            #   if max_elev.nil?
            #     max_elev = min_elev
            #   end
            # end

            c = CollectingEvent.new(
                {
                    verbatim_latitude: lat ? lat.to_f : nil, # if lat is not nil...
                    verbatim_longitude: long ? long.to_f : nil,
                    minimum_elevation: min_elev ? min_elev.to_i : nil,
                    maximum_elevation: max_elev ? max_elev.to_i : nil,
                    verbatim_locality: row['LocalityDetail'],
                    verbatim_collectors: row['CollectorName'],
                    start_date_day: start_date_day,
                    start_date_month: start_date_month,
                    start_date_year: start_date_year,
                    end_date_day: end_date_day,
                    end_date_month: end_date_month,
                    end_date_year: end_date_year,
                    geographic_area: get_tw_geographic_area(row, logger, get_sf_geo_level4),

                    project_id: project_id
                    # paleobio_db_interval_id: TIME_PERIOD_MAP[row['TimePeriodID']], # TODO: Matt add attribute to CE !! rember ENVO implications
                }.merge(data_attributes_bucket)
            )

            begin
              c.save!
              logger.info "UniqueID #{row['UniqueID']} written"

              get_tw_collecting_event_id[row['UniqueID']] = c.id.to_s

              begin
                pr = row['PrecisionRadius'].to_i
                c.generate_verbatim_data_georeference(true, no_cached: true) # reference self, no cache
                if c.georeferences.any?
                  c.georeferences[0].error_radius = pr unless pr == '0'
                else
                  # georeference failed (bad lat/long?)
                end

              rescue ActiveRecord::RecordInvalid

                logger.error "Error: TW.project_id = #{project_id}, UniqueID = #{row['UniqueID']} (error count #{error_counter += 1}) \n"
              end

            rescue ActiveRecord::RecordInvalid # bad date?
              logger.error "CollectEvent error: FileID = #{row['FileID']}, UniqueID = #{row['UniqueID']}, Year = #{this_year}, Month = #{this_month}, Day = #{this_day}, DaysToEnd = #{row['DaysToEnd']}, (error count #{error_counter += 1})" + c.errors.full_messages.join(';')
              next
            end
          end

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          import.set('SFUniqueIDToTWCollectingEventID', get_tw_collecting_event_id)

          puts 'SFUniqueIDToTWCollectingEventID'
          ap get_tw_collecting_event_id

          #######################################################################################
          `rake tw:db:dump backup_directory=/Users/mbeckman/src/db_backup/14_after_coll_events/`
          #######################################################################################
        end

        # Find a TW geographic_area
        # @todo JDT HELP!
        def get_tw_geographic_area(row, logger, sf_geo_level4_hash)

          tw_area = nil
          l1, l2, l3, l4 = row['Level1ID'], row['Level2ID'], row['Level3ID'], row['Level4ID']
          l1 = '' if l1 == '0'
          l2 = '' if l2 == '-'
          l3 = '' if l3 == '---'
          l4 = '' if l4 == '---'
          t1 = l1
          t2 = t1 + l2
          t3 = t2 + l3
          tdwg_id = l1
          tdwg_id = t3 if l4 == ''
          tdwg_id = t2 if l3 == ''
          tdwg_id = t1 if l2 == ''
          tdwg_id.strip!

          if tdwg_id.blank?
            case l4
            when /\d+/ # any digits, needs translation
              # TODO @MB if level 4 is a number, look up county name in SFGeoLevel4
              # packet = 0
              name = sf_geo_level4_hash[(t3 + t4)][:name].chomp('County').strip
              tw_area = GeographicArea.where("\"tdwgID\" like '#{t3}%' and name like '%#{name}%'").first
            when /[a-z]/i # if it exists, it might be directly findable
              tdwg_id = (t3 + '-' + l4).strip
              tw_area = GeographicArea.where(tdwgID: tdwg_id).first
              if tw_area.nil? # fall back to next larger container
                tw_area = GeographicArea.where(tdwgID: t3).first
              end
            else # must be ''
              tw_area = GeographicArea.where(tdwgID: t3).first
            end
          end

          logger.info "target tdwg id: #{tdwg_id}"

          tw_area
        end


        desc 'time rake tw:project_import:sf_import:specimens:create_sf_geo_level4_hash user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        # consists of unique_key: (level3_id, level4_id, name, country_code)
        LoggedTask.define create_sf_geo_level4_hash: [:data_directory, :environment, :user_id] do |logger|
          # Can be run independently at any time

          logger.info 'Running create_sf_geo_level4_hash...'

          get_sf_geo_level4 = {} # key = unique_key (combined level3_id + level4_id), value = level3_id, level4_id, name, country_code (from tblGeoLevel4)

          path = @args[:data_directory] + 'sfGeoLevel4.txt'
          file = CSV.foreach(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each_with_index do |row, i|

            logger.info "working with UniqueKey #{row['UniqueKey']}"

            get_sf_geo_level4[row['UniqueKey']] = {level3_id: row['Level3ID'], level4_id: row['Level4ID'], name: row['Name'], country_code: row['CountryCode']}
          end

          puts 'Getting ready to display results -- takes longer than it seems it should!'

          import = Import.find_or_create_by(name: 'SpeciesFileData')
          import.set('SFGeoLevel4', get_sf_geo_level4)

          puts 'SFGeoLevel4'
          ap get_sf_geo_level4

          #######################################################################################
          `rake tw:db:dump backup_directory=/Users/mbeckman/src/db_backup/13_after_geo_level_4/`
          #######################################################################################
        end


        desc 'time rake tw:project_import:sf_import:specimens:create_specimen_unique_id user_id=1 data_directory=/Users/mbeckman/src/onedb2tw/working/'
        LoggedTask.define create_specimen_unique_id: [:data_directory, :environment, :user_id] do |logger|
          # Can be run independently at any time

          logger.info 'Running new specimen lists (hash, array)...'

          # get_new_preserved_specimen_id = [] # array of SF.SpecimenIDs with BasisOfRecord = 0 (not stated) but with DepoID or specimen count
          get_sf_unique_id = {} # key = SF.SpecimenID, value = sfUniqueLocColEvents.UniqueID


          # logger.info '1. Getting new preferred specimen ids'
          #
          # path = @args[:data_directory] + 'sfAddPreservedSpecimens.txt'
          # file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')
          #
          # file.each do |row|
          #   get_new_preserved_specimen_id.push(row[0])
          # end


          logger.info '2. Getting SF SpecimenID to UniqueID hash'

          count = 0

          path = @args[:data_directory] + 'sfSpecimenToUniqueIDs.txt'
          file = CSV.read(path, col_sep: "\t", headers: true, encoding: 'UTF-16:UTF-8')

          file.each do |row|
            puts "SpecimenID = #{row['SpecimenID']}, count #{count += 1} \n"
            get_sf_unique_id[row['SpecimenID']] = row['UniqueLocColEventID']
          end


          import = Import.find_or_create_by(name: 'SpeciesFileData')
          # import.set('SFNewPreservedSpecimens', get_new_preserved_specimen_id)
          import.set('SFSpecimenToUniqueIDs', get_sf_unique_id)

          # puts 'SFNewPreservedSpecimens'
          # ap get_new_preserved_specimen_id

          puts 'SFSpecimenToUniqueIDs'
          ap get_sf_unique_id
        end

      end
    end
  end
end