SysMO-DB/seek

View on GitHub
lib/seek/feed_reader.rb

Summary

Maintainability
A
25 mins
Test Coverage
require 'feedjira'

module Seek
  class FeedReader

    BLACKLIST_TIME=1.day

    #Fetches the feed entries - aggregated and ordered, for a particular category
    #the category may be either :project_news or :community_news
    def self.fetch_entries_for category
      raise ArgumentError.new("Invalid category - should be either :project_news or :community_news") unless [:project_news,:community_news].include? category

      feeds = fetch_feeds_for_category(category)

      filter_feeds_entries_with_chronological_order(feeds, Seek::Config.send("#{category.to_s}_number_of_entries"))
    end

    def self.fetch_feeds_for_category(category)
      feeds = determine_feed_urls(category).collect do |url|
        begin
          Rails.cache.fetch(cache_key(url), :expires_in => Seek::Config.home_feeds_cache_timeout.minutes) do
            get_feed(url)
          end
        rescue => exception
          Rails.logger.error("Problem with feed: #{url} - #{exception.message}")
          blacklist(url)
          nil
        end
      end
      feeds.compact!
      feeds
    end

    def self.blacklist url
      blacklisted = Seek::Config.blacklisted_feeds || {}
      blacklisted[url]=Time.now
      Seek::Config.blacklisted_feeds = blacklisted
    end

    def self.is_blacklisted? url
      list = Seek::Config.blacklisted_feeds || {}
      return false unless list[url]
      if list[url] < BLACKLIST_TIME.ago
        list.delete(url)
        Seek::Config.blacklisted_feeds=list
        false
      else
        true
      end
    end

    def self.determine_feed_urls category
      urls = Seek::Config.send("#{category.to_s}_feed_urls")
      urls.split(",").select{|url| !url.blank? && !is_blacklisted?(url)}
    end

    #deletes the cache directory, along with any files in it
    def self.clear_cache
      urls = Seek::Config.project_news_feed_urls.split(",").select{|url| !url.blank?}
      urls = urls | Seek::Config.community_news_feed_urls.split(",").select{|url| !url.blank?}
      urls.each do |url|
        Rails.cache.delete(cache_key(url))
      end
    end

    def self.cache_key feed_url
      #use md5 to keep the key short - highly unlikely to be a collision
      key = Digest::MD5.hexdigest(feed_url.strip)
      "news-feed-#{key}"
    end

    def self.get_feed feed_url

      unless feed_url.blank?
        #trim the url element
        feed_url.strip!
        feed = Feedjira::Feed.fetch_and_parse(feed_url)
        raise "Error reading feed for #{feed_url} error #{feed}" if feed.is_a?(Numeric)
        feed
      end
    end

    def self.filter_feeds_entries_with_chronological_order feeds, number_of_entries=10
      filtered_entries = []
      unless feeds.blank?
        feeds.each do |feed|
          filtered_entries = fetch_and_filter_entries(feed, filtered_entries, number_of_entries)
          end
      end
      sort_entries(filtered_entries).take(number_of_entries)
    end

    def self.sort_entries(filtered_entries)
      filtered_entries.sort do |entry_a, entry_b|
        date_a = resolve_feed_date(entry_a)
        date_b = resolve_feed_date(entry_b)
        date_b <=> date_a
      end
    end

    def self.fetch_and_filter_entries(feed, filtered_entries, number_of_entries)
      entries = feed.entries || []


      entries.each do |entry|
        class << entry
          attr_accessor :feed_title
        end
        entry.feed_title = feed.title
      end

      filtered_entries |= entries.take(number_of_entries) if entries
      filtered_entries
    end

    def self.resolve_feed_date(entry)
      date = nil
      date = entry.try(:updated) if entry.respond_to?(:updated)
      date ||= entry.try(:published) if entry.respond_to?(:published)
      date ||= entry.try(:last_modified) if entry.respond_to?(:last_modified)
      date ||= 10.year.ago
      date
    end

  end
  
end