GlobalNamesArchitecture/dwca-hunter

View on GitHub
lib/dwca_hunter/resources/mammal_divdb.rb

Summary

Maintainability
C
7 hrs
Test Coverage
# frozen_string_literal: true

module DwcaHunter
  class ResourceMammalDiversityDb < DwcaHunter::Resource
    def initialize(opts = {})
      @command = "mammal-div-db"
      @title = "ASM Mammal Diversity Database"
      @url = "https://www.mammaldiversity.org/assets/data/MDD.zip"
      @UUID = "94270cdd-5424-4bb1-8324-46ccc5386dc7"
      @download_path = File.join(Dir.tmpdir,
                                 "dwca_hunter",
                                 "mammal-div-db",
                                 "data.zip")
      @synonyms = []
      @names = []
      @vernaculars = []
      @extensions = []
      @synonyms_hash = {}
      @vernaculars_hash = {}
      super(opts)
    end

    def download
      DwcaHunter.logger_write(object_id, "Downloading")
      `curl '#{@url}' -H 'User-Agent:' -o #{@download_path}`
    end

    def unpack
      unpack_zip
    end

    def make_dwca
      DwcaHunter.logger_write(object_id, "Extracting data")
      get_names
      generate_dwca
    end

    private

    def get_names
      Dir.chdir(@download_dir)
      collect_names
    end

    def find_csv_file
      Dir.chdir(@download_dir)
      Dir.entries(".").each do |f|
        return f if f[-4..-1] == ".csv"
      end
    end

    def assemble_name(row)
      name = row["sciName"].gsub("_", " ")
      auth = "#{row['authoritySpeciesAuthor']} #{row['aurhoritySpeciesYear']}".
        strip
      auth = "(#{auth})" if row["authorityParentheses"] == 1
      rank = "species"
      rank = "subspecies" if (name.split(" ").size > 2)
      name = "#{name} #{auth}".strip
      [rank, name]
    end

    def assemble_synonym(row)
      name = row["originalNameCombination"].gsub("_", " ")
      auth = "#{row['authoritySpeciesAuthor']} #{row['aurhoritySpeciesYear']}".
        strip
      name = "#{name} #{auth}".strip
      { taxon_id: row["id"], name_string: name, status: "synonym" }
    end

    def vernaculars(row)
      id = row["id"]
      res = []
      vern = row["mainCommonName"].to_s
      res << vern  if vern != ""
      verns = row["otherCommonNames"].to_s
      if verns != ""
        verns = verns.split("|")
        res += verns
      end
      res.map do |v|
        { taxon_id: id, vern: v, lang: "en" }
      end
    end

    def collect_names
      @names_index = {}
      file = CSV.open(File.join(@download_dir, find_csv_file),
                      headers: true)
      file.each do |row|
        order = row["order"].to_s.capitalize
        order = nil if order.match(/incertae/) || order.empty?
        family = row["family"].to_s.capitalize
        family = nil if family.match(/incertae/) || family.empty?
        genus = row["genus"].to_s.capitalize
        genus = nil if genus.match(/incertae/) || genus.empty?
        rank, name_string = assemble_name(row)
        @names << {
          taxon_id: row["id"],
          kingdom: "Animalia",
          phylum: "Chordata",
          klass: "Mammalia",
          order: order,
          family: family,
          genus: genus,
          name_string: name_string,
          rank: rank,
          code: "ICZN"
        }
        if row["originalNameCombination"].to_s != ""
          @synonyms << assemble_synonym(row)
        end
        vernaculars(row).each do |vern|
          @vernaculars << vern
        end
      end
    end

    def generate_dwca
      DwcaHunter.logger_write(object_id,
                              "Creating DarwinCore Archive file")
      @core = [["http://rs.tdwg.org/dwc/terms/taxonID",
                "http://rs.tdwg.org/dwc/terms/scientificName",
                "http://rs.tdwg.org/dwc/terms/kingdom",
                "http://rs.tdwg.org/dwc/terms/phylum",
                "http://rs.tdwg.org/dwc/terms/class",
                "http://rs.tdwg.org/dwc/terms/order",
                "http://rs.tdwg.org/dwc/terms/family",
                "http://rs.tdwg.org/dwc/terms/genus",
                "http://rs.tdwg.org/dwc/terms/taxonRank",
                "http://rs.tdwg.org/dwc/terms/nomenclaturalCode"]]
      @names.each do |n|
        @core << [n[:taxon_id], n[:name_string],
                  n[:kingdom], n[:phylum], n[:klass], n[:order], n[:family],
                  n[:genus], n[:rank], n[:code]]
      end
      @extensions << {
        data: [[
          "http://rs.tdwg.org/dwc/terms/taxonID",
          "http://rs.tdwg.org/dwc/terms/vernacularName",
          "http://purl.org/dc/terms/language"
        ]],
        file_name: "vernacular_names.txt",
        row_type: "http://rs.gbif.org/terms/1.0/VernacularName"
      }

      @vernaculars.each do |v|
        @extensions[-1][:data] << [v[:taxon_id], v[:vern], v[:lang]]
      end

      @extensions << {
        data: [[
          "http://rs.tdwg.org/dwc/terms/taxonID",
          "http://rs.tdwg.org/dwc/terms/scientificName",
          "http://rs.tdwg.org/dwc/terms/taxonomicStatus"
        ]],
        file_name: "synonyms.txt"
      }
      @synonyms.each do |s|
        @extensions[-1][:data] << [s[:taxon_id], s[:name_string], s[:status]]
      end
      @eml = {
        id: @uuid,
        title: @title,
        authors: [
          { first_name: "C. J.",
            last_name: "Burgin" },
            { first_name: "J. P.",
              last_name: "Colella" },
              { first_name: "P. L.",
                last_name: "Kahn" },
                { first_name: "N. S.",
                  last_name: "Upham" }
        ],
        metadata_providers: [
          { first_name: "Dmitry",
            last_name: "Mozzherin",
            email: "dmozzherin@gmail.com" }
        ],
        abstract: "Mammal Diversity Database. 2021. www.mammaldiversity.org. " \
        "American Society of Mammalogists. Accessed 2021-01-28.", url: @url
      }
      super
      end
    end
  end