gitcoinco/code_fund_ads

View on GitHub
app/jobs/import_github_jobs_job.rb

Summary

Maintainability
A
3 hrs
Test Coverage
class ImportGithubJobsJob < ApplicationJob
  include ActionView::Helpers::TextHelper

  queue_as :default

  def perform(*tags)
    ScoutApm::Transaction.ignore! if rand > (ENV["SCOUT_SAMPLE_RATE"] || 1).to_f
    @count = 0
    tags.in_groups_of(5, false).each do |tag_group|
      import_jobs(tag_group)
      print tag_group.join(", ")
      sleep 3
    end
  end

  def import_jobs(tags)
    @jobs = []

    fetch_jobs(tags)

    user = User.codefund_bot
    organization = Organization.codefund
    keyword_aliases = ENUMS::KEYWORDS.values.flatten.map(&:downcase).uniq
    sieve = Stopwords::Snowball::WordSieve.new

    @jobs.each do |job|
      next if job["type"] == "Contract"

      posting = JobPosting.github.where(source_identifier: job["id"]).first_or_initialize
      next if posting.start_date && posting.start_date > Chronic.parse(job["created_at"]).to_date

      words = sieve.filter(lang: :en, words: "#{job["title"]} #{job["description"]} #{job["tag"]}".downcase.gsub(/,|\.\s/, "").split)
      matching_keyword_aliases = keyword_aliases & words

      posting.user = user
      posting.organization = organization
      posting.company_name = job["company"]
      posting.company_url = job["company_url"]
      posting.company_logo_url = job["company_logo"]
      posting.job_type = normalize_job_type(job["type"])
      posting.title = truncate(job["title"], length: 80)
      posting.how_to_apply = job["how_to_apply"]
      posting.description = job["description"]
      if job["location"].downcase.include?("remote") || job["title"].downcase.include?("remote")
        posting.remote = true
      end
      posting.keywords = normalize_keywords(matching_keyword_aliases)
      city, province_abbr = parse_location(job["location"])
      province = Province.find("US-#{province_abbr}")

      posting.city = city
      posting.province_code = province&.iso_code
      posting.country_code = province&.country_code
      posting.url = job["url"]
      posting.start_date = Chronic.parse(job["created_at"]).to_date
      posting.end_date = posting.start_date + 60.days
      posting.status = posting.start_date >= 60.days.ago ? ENUMS::JOB_STATUSES::ACTIVE : ENUMS::JOB_STATUSES::ARCHIVED
      posting.source = ENUMS::JOB_SOURCES::GITHUB
      posting.auto_renew = false

      print "#{@count += 1},"
      unless posting.save
        puts "Unable to save: #{posting.errors.inspect}"
      end
    end

    nil
  end

  def parse_location(location)
    parts = location.to_s.split(",").map { |part| part.strip }.select(&:present?)
    return [parts.join(" "), nil] unless parts.size == 2
    return [parts.first, nil] unless parts.last.size == 2
    [parts.first, parts.last.upcase]
  end

  def fetch_jobs(tags)
    hydra = Typhoeus::Hydra.new(max_concurrency: 5)
    headers = {"Content-Type" => "application/json"}

    tags.each do |tag|
      url = "https://jobs.github.com/positions.json"
      request = Typhoeus::Request.new(url, headers: headers, params: {search: tag})
      request.on_complete do |response|
        JSON.parse(response.body).each { |listing| @jobs << listing.merge({"tag" => tag}) }
      end
      hydra.queue(request)
    end

    hydra.run
  end

  def normalize_job_type(job_type)
    case job_type
    when "Full Time" then ENUMS::JOB_TYPES::FULL_TIME
    when "Part Time" then ENUMS::JOB_TYPES::PART_TIME
    end
  end

  def normalize_keywords(tags)
    keywords = []

    tags.each do |tag|
      ENUMS::KEYWORDS.keys.each do |key|
        if ENUMS::KEYWORDS[key].include?(tag)
          keywords << key
        elsif tag == key
          keywords << key
        end
      end
    end

    keywords.uniq.compact.sort
  end
end