ArtOfCode-/qpixel

View on GitHub
scripts/import/api_import.rb

Summary

Maintainability
A
1 hr
Test Coverage
class APIImport
  def initialize(options)
    @options = options
    @filters = {
      posts: '!)4k-FmSF0IDnEJZS2CCHzTx9)0VD',
      questions: '!-MOiN_e9QlHG7Z-blYG54Tx0UIt0fJoL9',
      answers: '!SWJ_aFipee(LVrV(mP',
      users: '!)sb2*WuVIS_)ybx(_xTP'
    }
  end

  def request(uri, params = {})
    params = {
      key: @options.key,
      site: @options.site
    }.merge(params)
    full_uri = URI.parse(uri)
    full_uri.query = params.map { |k, v| "#{k}=#{v}" }.join('&')

    if @backoff.present? && @backoff.future?
      seconds = ((@backoff - DateTime.now) * 86400) + 1
      $logger.debug "Waiting #{seconds.to_i}s for backoff"
      sleep seconds.to_i
    end

    resp = Net::HTTP.get_response(full_uri)
    if resp.code.start_with? '2'
      $logger.debug "#{resp.code} GET #{full_uri.to_s}"
    else
      $logger.error "#{resp.code} GET #{full_uri.to_s}:"
      $logger.error resp.body
    end

    data = JSON.parse(resp.body)
    if data['backoff']
      @backoff = DateTime.now + data['backoff'].to_i.seconds
    end

    data
  end

  def posts(ids)
    groups = ids.in_groups_of(100).map(&:compact)
    posts = []
    groups.each do |group|
      posts = posts.concat request("https://api.stackexchange.com/2.2/posts/#{group.join(';')}",
                                   filter: @filters[:posts], pagesize: '100')['items']
    end

    keyed = posts.map do |post|
      [post['post_id'], {
        'id' => post['post_id'],
        'post_type_id' => { 'question' => 1, 'answer' => 2 }[post['post_type']],
        'creation_date' => Time.at(post['creation_date']).iso8601,
        'score' => post['score'],
        'body' => post['body'],
        'owner_user_id' => post['owner']&.try(:[], 'user_id'),
        'last_editor_user_id' => post['last_editor']&.try(:[], 'user_id'),
        'last_edit_date' => Time.at(post['last_edit_date'] || post['creation_date']).iso8601,
        'last_activity_date' => Time.at(post['last_activity_date'] || post['creation_date']).iso8601,
        'title' => post['title']
      }]
    end.to_h

    questions = keyed.values.select { |p| p['post_type_id'] == 1 }
    question_ids = questions.map { |q| q['id'] }
    question_groups = question_ids.in_groups_of(100).map(&:compact)
    question_groups.each do |qg|
      data = request("https://api.stackexchange.com/2.2/questions/#{qg.join(';')}",
                     filter: @filters[:questions], pagesize: '100')['items']
      data.each do |question|
        keyed[question['question_id']] = keyed[question['question_id']].merge({
          'answer_count' => question['answer_count'],
          'tags' => "<#{question['tags'].join('><')}>"
        })
      end
    end

    answers = keyed.values.select { |p| p['post_type_id'] == 2 }
    answer_ids = answers.map { |a| a['id'] }

    answer_groups = answer_ids.in_groups_of(100).map(&:compact)
    answer_groups.each do |ag|
      data = request("https://api.stackexchange.com/2.2/answers/#{ag.join(';')}",
                     filter: @filters[:answers], pagesize: '100')['items']
      data.each do |answer|
        keyed[answer['answer_id']] = keyed[answer['answer_id']].merge({
          'parent_id' => answer['question_id']
        })
      end
    end

    keyed.values
  end

  def users(ids)
    groups = ids.in_groups_of(100).map(&:compact)
    users = []
    groups.each do |group|
      users = users.concat request("https://api.stackexchange.com/2.2/users/#{group.join(';')}",
                                   filter: @filters[:users], pagesize: '100')['items']
    end

    users.each.with_index do |user, idx|
      users[idx] = {
        'id' => user['user_id'],
        'creation_date' => Time.at(user['creation_date']).iso8601,
        'display_name' => user['display_name'],
        'website_url' => user['website_url'],
        'account_id' => user['account_id']
      }
    end

    users
  end
end