binarybabel/latestver

View on GitHub
app/models/catalog/web_scraper.rb

Summary

Maintainability
A
1 hr
Test Coverage
# == Schema Information
#
# Table name: catalog_entries
#
#  id             :integer          not null, primary key
#  name           :string           not null
#  type           :string           not null
#  tag            :string           not null
#  version        :string
#  version_date   :date
#  prereleases    :boolean          default(FALSE)
#  external_links :text
#  data           :text
#  refreshed_at   :datetime
#  last_error     :string
#  no_log         :boolean          default(FALSE)
#  hidden         :boolean          default(FALSE)
#  created_at     :datetime         not null
#  updated_at     :datetime         not null
#

require 'open-uri'

module Catalog
  class WebScraper < CatalogEntry
    validates :web_page_url,
              presence: true,
              format: {:with => /\Ahttp(s)?:\/\/.+\z/}
    validates_presence_of :include_regex

    store :data, accessors: [:web_page_url, :css_query, :xpath_query, :include_regex, :exclude_regex], coder: JSON

    def check_remote_version
      text = fetch_remote_text

      iexp = Regexp.new(include_regex.to_s)
      xexp = Regexp.new(exclude_regex.to_s)

      text.split("\n").each do |line|
        if (m = line.match(iexp)) and (exclude_regex.to_s.empty? or not line.match(xexp))
          if m.names.include?('ver')
            return m['ver']
          elsif prereleases
            return scan_short_version(m[0].to_s)
          else
            return scan_version(m[0].to_s)
          end
        end
      end

      nil
    end

    def default_links
      [
          %(<a href="#{web_page_url}"><i class="fa fa-globe"></i> Website</a>)
      ]
    end

    def self.reload_defaults!
      create_with(
          web_page_url: 'http://fontawesome.io/',
          css_query: 'div.shameless-self-promotion',
          xpath_query: 'text()',
          include_regex: '.+'
      ).find_or_create_by!(name: 'fontawesome', tag: 'latest')

      create_with(
          web_page_url: 'https://gitlab.com/gitlab-org/gitlab-ce/raw/master/CHANGELOG.md',
          include_regex: '## v?[0-9\.]+',
          exclude_regex: 'unreleased'
      ).find_or_create_by!(name: 'gitlab', tag: 'latest')

      create_with(
          web_page_url: 'https://docs.gradle.org/current/release-notes',
          css_query: 'h3.releaseinfo',
          include_regex: '.+'
      ).find_or_create_by!(name: 'gradle', tag: 'latest')
    end

    protected

    def fetch_remote_text
      url = web_page_url.to_s.gsub(/%(?!\{)/, '%%') % template_params
      text = open(url) { |f| f.read }

      unless [css_query, xpath_query].all? { |v| v.to_s.empty? }
        n = Nokogiri::HTML(text)
        unless css_query.to_s.empty?
          n = n.at_css(css_query)
        end
        unless xpath_query.to_s.empty?
          n = n.at_xpath(xpath_query)
        end
        text = n.text
      end

      text
    end

    rails_admin do
      create do
        field :web_page_url do
          help ::CatalogEntry.template_help
        end
        field :css_query
        field :xpath_query
        field :include_regex
        field :exclude_regex
      end

      edit do
        field :web_page_url do
          help ::CatalogEntry.template_help
        end
        field :css_query
        field :xpath_query
        field :include_regex
        field :exclude_regex
      end
    end
  end
end