lib/feedjira_extension.rb
require 'feedjira'
require 'digest'
require 'mail'
module FeedjiraExtension
AUTHOR_ATTRS = %i[name email uri]
LINK_ATTRS = %i[href rel type hreflang title length]
ENCLOSURE_ATTRS = %i[url type length]
class Author < Struct.new(*AUTHOR_ATTRS)
def empty?
all?(&:nil?)
end
def to_json(options = nil)
each_pair.flat_map { |key, value|
if value.presence
case key
when :email
"<#{value}>"
when :uri
"(#{value})"
else
value
end
else
[]
end
}.join(' ').to_json(options)
end
end
class AtomAuthor < Author
include SAXMachine
AUTHOR_ATTRS.each do |attr|
element attr
end
end
class RssAuthor < Author
include SAXMachine
def content=(content)
@content = content
begin
addr = Mail::Address.new(content)
rescue
self.name = content
else
self.name = addr.name rescue nil
self.email = addr.address rescue nil
end
end
value :content
end
class ITunesRssOwner < Author
include SAXMachine
element :'itunes:name', as: :name
element :'itunes:email', as: :email
end
class Enclosure
include SAXMachine
ENCLOSURE_ATTRS.each do |attr|
attribute attr
end
def to_json(options = nil)
ENCLOSURE_ATTRS.each_with_object({}) { |key, hash|
if value = __send__(key)
hash[key] = value
end
}.to_json(options)
end
end
class AtomLink
include SAXMachine
LINK_ATTRS.each do |attr|
attribute attr
end
def empty?
LINK_ATTRS.all? { |attr|
__send__(attr).nil?
}
end
def to_json(options = nil)
LINK_ATTRS.each_with_object({}) { |key, hash|
if value = __send__(key)
hash[key] = value
end
}.to_json(options)
end
end
class RssLinkElement
include SAXMachine
value :href
def empty?
!href.is_a?(String)
end
def to_json(options = nil)
case href
when String
{ href: href }
else
# Ignore non-string values, because SaxMachine leaks its
# internal value :no_buffer when the content of an element
# is empty.
{}
end.to_json(options)
end
end
module HasAuthors
def self.included(mod)
mod.module_exec do
case name
when /RSS/
%w[
itunes:author
dc:creator
author
managingEditor
].each do |name|
sax_config.top_level_elements[name].clear
elements name, class: RssAuthor, as: :_authors
end
else
elements :author, class: AtomAuthor, as: :_authors
end
def authors
_authors.reject(&:empty?)
end
end
end
end
module HasEnclosure
def self.included(mod)
mod.module_exec do
sax_config.top_level_elements['enclosure'].clear
element :enclosure, class: Enclosure
def image_enclosure
case enclosure.try!(:type)
when %r{\Aimage/}
enclosure
end
end
def image
@image ||= image_enclosure.try!(:url)
end
end
end
end
module HasLinks
def self.included(mod)
mod.module_exec do
sax_config.top_level_elements['link'].clear
sax_config.collection_elements['link'].clear
case name
when /RSS/
elements :link, class: RssLinkElement, as: :rss_links
case name
when /FeedBurner/
elements :'atok10:link', class: AtomLink, as: :atom_links
def _links
[*rss_links, *atom_links]
end
else
alias_method :_links, :rss_links
end
prepend(
Module.new {
def url
super || (alternate_link || links.first).try!(:href)
end
}
)
when /Atom/
elements :link, class: AtomLink, as: :_links
def url
(alternate_link || links.first).try!(:href)
end
end
def links
_links.reject(&:empty?)
end
def alternate_link
links.find { |link|
link.is_a?(AtomLink) &&
link.rel == 'alternate' &&
(link.type == 'text/html'|| link.type.nil?)
}
end
end
end
end
module HasTimestamps
attr_reader :published, :updated
# Keep the "oldest" publish time found
def published=(value)
parsed = parse_datetime(value)
@published = parsed if !@published || parsed < @published
end
# Keep the most recent update time found
def updated=(value)
parsed = parse_datetime(value)
@updated = parsed if !@updated || parsed > @updated
end
def date_published
published.try(:iso8601)
end
def last_updated
(updated || published).try(:iso8601)
end
private
def parse_datetime(string)
DateTime.parse(string) rescue nil
end
end
module FeedEntryExtensions
def self.included(mod)
mod.module_exec do
include HasAuthors
include HasEnclosure
include HasLinks
include HasTimestamps
end
end
def id
entry_id || @dc_identifier || Digest::MD5.hexdigest(content || summary || '')
end
end
module FeedExtensions
def self.included(mod)
mod.module_exec do
include HasAuthors
include HasEnclosure
include HasLinks
include HasTimestamps
element :id, as: :feed_id
element :generator
elements :rights
element :published
element :updated
element :icon
if /RSS/ === name
element :guid, as: :feed_id
element :copyright
element :pubDate, as: :published
element :'dc:date', as: :published
element :lastBuildDate, as: :updated
element :image, value: :url, as: :icon
def copyright
@copyright || super
end
if /ITunes/ === name
sax_config.collection_elements['itunes:owner'].clear
elements :"itunes:owner", as: :_itunes_owners, class: ITunesRssOwner
private :_itunes_owners
def itunes_owners
_itunes_owners.reject(&:empty?)
end
end
else
element :subtitle, as: :description unless method_defined?(:description)
end
sax_config.collection_elements.each_value do |collection_elements|
collection_elements.each do |collection_element|
collection_element.accessor == 'entries' &&
(entry_class = collection_element.data_class).is_a?(Class) or next
entry_class.send :include, FeedEntryExtensions
end
end
end
end
def copyright
rights.join("\n").presence
end
end
Feedjira.parsers.each do |feed_class|
feed_class.send :include, FeedExtensions
end
end