GlobalNamesArchitecture/dwca-hunter

View on GitHub
lib/dwca_hunter.rb

Summary

Maintainability
A
45 mins
Test Coverage
# frozen_string_literal: true

require "base64"
require "biodiversity"
require "dwc_archive"
require "gn_uuid"
require "dwca_hunter/resource"
require "fileutils"
require "htmlentities"
require "json"
require "zip"
require "logger"
require "net/http"
require "rest_client"
require "tmpdir"
require "uri"
require "cgi"

Dir[File.join(__dir__, "dwca_hunter", "*.rb")].
  each { |f| require f }

Dir[File.join(__dir__, "dwca_hunter", "resources", "*.rb")].
  each { |f| require f }

# DwcaHunter a namespace module for the project.
module DwcaHunter
  BATCH_SIZE = 10_000

  class << self
    attr_reader :resource

    def logger
      @logger ||= Logger.new(nil)
    end

    attr_writer :logger

    def logger_reset
      self.logger = Logger.new(nil)
    end

    def logger_write(obj_id, message, method = :info)
      logger.send(method, "|#{obj_id}|#{message}|")
    end

    def process(resource)
      resource.download if resource.needs_download?
      resource.unpack if resource.needs_unpack?
      resource.make_dwca
    end

    def resources
      ObjectSpace.each_object(Class).select do |c|
        c < Resource
      end
    end

    def normalize_authors(auth)
      reg = Regexp.new(/^(\(?)(.*?)(([\s,)][^[:upper:]]*)?$)/)
      auth = auth.gsub(/duPont/, "du Pont")
      match = reg.match(auth)
      return auth if match.nil?

      a1, a2, a3 = match[1..3]
      a2mod = a2.gsub("&", ",")
      ary2 = a2mod.split(",").map(&:strip)
      a2 = move_initials(ary2) if ary2.size > 1
      "#{a1}#{a2}#{a3}"
    end

    def move_initials(ary)
      res = []
      ary.each do |a|
        if res.empty?
          res << a
          next
        end
        match = /^([[:upper:]]{1,4})(\sJr)?$/.match(a)
        if !match.nil?
          initialls = match[1].split("").join(". ")
          res[-1] = "#{initialls}. #{res[-1]}#{match[2]}"
        else
          res << a
        end
      end
      res.size == 1 ? res[0] : "#{res[0..-2].join(', ')} & #{res[-1]}"
    end
  end
end