lib/oai_dc_helpers.rb
include Utf8UrlFor
include KeteUrlFor
include ActionView::Helpers::SanitizeHelper
# oai dublin core xml helpers
# TODO: evaluate whether we can simply go with SystemSetting.full_site_url
# rather than request hacking
module OaiDcHelpers
unless included_modules.include? OaiDcHelpers
def self.included(klass)
klass.send :include, XmlHelpers
end
def oai_dc_xml_request(xml, passed_request = nil)
if !passed_request.nil?
request_uri = passed_request[:original_url]
else
request_uri = simulated_request[:original_url]
end
xml.request(request_uri, verb: 'GetRecord', identifier: "#{ZoomDb.zoom_id_stub}#{basket_urlified_name}:#{self.class.name}:#{id}", metadataPrefix: 'oai_dc')
end
def oai_dc_xml_oai_identifier(xml)
xml.identifier("#{ZoomDb.zoom_id_stub}#{basket_urlified_name}:#{self.class.name}:#{id}")
end
# Walter McGinnis, 2008-10-05
# adding better logic for determining last time the item was changed
# we want the datestamp to reflect the most recent change to the item
# that can be either when it is created/edited
# or when a relationship has been added
# note that if a relation is removed, this may result in rolling back in time
# of datestamp, which may be counterintuitive, however that is a rare case
def oai_dc_xml_oai_datestamp(xml)
most_recent_updated_at = updated_at
if is_a?(Topic)
# topics can be on either side of the content_item_relation join model
# so to get all possible relations, you have to combine them
all_relations = []
# we only need the last from normal content relations and child content relations
# to compare, not all of each
if content_item_relations.count > 0
all_relations << content_item_relations.last
end
if child_content_item_relations.count > 0
all_relations << child_content_item_relations.last
end
if all_relations.size > 0
all_relations.sort! { |a, b| a.updated_at <=> b.updated_at }
last_relation = all_relations.last
if last_relation.updated_at > most_recent_updated_at
most_recent_updated_at = last_relation.updated_at
end
end
elsif !is_a?(Comment) && content_item_relations.count > 0 &&
content_item_relations.last.updated_at > most_recent_updated_at
most_recent_updated_at = content_item_relations.last.updated_at
end
xml.datestamp(most_recent_updated_at.utc.xmlschema)
end
# Walter McGinnis, 2008-06-16
# adding oai pmh set support
# assumes public zoom_db
def oai_dc_xml_oai_set_specs(xml)
# get the sets that match the item
set_specs = []
ZoomDb.find(1).active_sets.each do |base_set|
set_specs += base_set.matching_specs(self)
end
set_specs.each do |set_spec_value|
xml.setSpec(set_spec_value)
end
end
def oai_dc_xml_dc_identifier(xml, passed_request = nil)
if !passed_request.nil?
host = passed_request[:host]
else
host = simulated_request[:host]
end
uri_attrs = {
controller: zoom_class_controller(self.class.name),
action: 'show',
id: self,
format: nil,
urlified_name: basket_urlified_name
}
if self.class.name == 'Comment'
# comments always point back to the thing they are commenting on
commented_on_item = commentable
uri_attrs = {
controller: zoom_class_controller(commented_on_item.class.name),
action: 'show',
id: commented_on_item,
urlified_name: commented_on_item.basket.urlified_name,
anchor: "comment-#{id}",
private: commentable_private?.to_s
}
else
# Link to private version if generating OAI record for it..
if respond_to?(:private) && private?
# don't put title in url for private items
uri_attrs[:private] = 'true'
uri_attrs[:id] = id.to_s
end
end
# If the item is private and SSL is configured, use https instead of http for full URL for the
# record.
protocol = appropriate_protocol_for(self)
xml.send(
'dc:identifier', utf8_url_for(uri_attrs.merge(
protocol: protocol,
host: host,
locale: false
))
)
end
def oai_dc_xml_dc_title(xml, options = {})
xml.send('dc:title', title, options)
end
def oai_dc_xml_dc_publisher(xml, publisher = nil)
# this website is the publisher by default
if publisher.nil?
xml.send('dc:publisher', simulated_request[:host])
else
xml.send('dc:publisher', publisher)
end
end
def oai_dc_xml_dc_description(xml, passed_description = nil, options = {})
unless passed_description.blank?
# strip out embedded html
# it only adds clutter at this point and fails oai_dc validation, too
# also pulling out some entities that sneak in
xml.send('dc:description', options) do
xml.cdata passed_description.strip_tags
end
else
# if description is blank, we should do all descriptions for this zoom_class
# topic/document specific
# order is important, first description will be used as blurb
# in result list
if [Topic, Document].include?(self.class) && short_summary.present?
oai_dc_xml_dc_description(xml, short_summary, options)
end
oai_dc_xml_dc_description(xml, description, options) if description.present?
end
end
def oai_dc_xml_dc_creators_and_date(xml)
# some sites, such as those that have lots of imported archival material,
# will find that the date created is not useful in their search record
# and will want to handle date data explicitly in their extended fields
# only turn it on if specified in the system setting
if SystemSetting.add_date_created_to_item_search_record?
item_created = created_at.utc.xmlschema
xml.send('dc:date', item_created)
end
creators.each do |creator|
user_name = creator.user_name
xml.send('dc:creator', user_name)
# we also add user.login, which is unique per site
# whereas user_name is not
# this way we can limit exactly to one user
xml.send('dc:creator', creator.login) unless user_name == creator.login
end
end
# TODO: this attribute isn't coming over even though it's in the select
# contribution_date = contributor.version_created_at.to_date
# xml.send("dcterms:modified", contribution_date)
def oai_dc_xml_dc_contributors_and_modified_dates(xml)
contributors.all(select: 'distinct(users.login), users.resolved_name').each do |contributor|
user_name = contributor.user_name
xml.send('dc:contributor', user_name)
# we also add user.login, which is unique per site
# whereas user_name is not
# this way we can limit exactly to one user
xml.send('dc:contributor', contributor.login) unless user_name == contributor.login
end
end
def oai_dc_xml_dc_relations_and_subjects(xml, passed_request = {})
# in theory, direct comments might be added in as relations here
# but since there url is the thing they are commenting on
# then it's overkill
# however, if we are in the comment record,
# we want to add the commented on item as a relation
case self.class.name
when 'Comment'
# comments always point back to the thing they are commenting on
commented_on_item = commentable
xml.send('dc:subject') do
xml.cdata commented_on_item.title
end unless [SystemSetting.blank_title, SystemSetting.no_public_version_title].include?(commented_on_item.title)
xml.send('dc:relation', url_for_dc_identifier(commented_on_item, { force_http: true, minimal: true }.merge(passed_request)))
else
related_count = related_items.count
related_items.each do |related|
# we skip subject if there are a large amount of related items
# as zebra has a maximum record size
if related_count < 500
xml.send('dc:subject') do
xml.cdata related.title
end unless [SystemSetting.blank_title, SystemSetting.no_public_version_title].include?(related.title)
end
xml.send('dc:relation', url_for_dc_identifier(related, { force_http: true, minimal: true }.merge(passed_request)))
end
end
end
def oai_dc_xml_tags_to_dc_subjects(xml)
tags.each do |tag|
xml.send('dc:subject') do
xml.cdata tag.name
end
end
end
def oai_dc_xml_dc_type(xml)
# topic's type is the default
type = 'InteractiveResource'
case self.class
when AudioRecording
type = 'Sound'
when StillImage
type = 'StillImage'
when Video
type = 'MovingImage'
end
xml.send('dc:type', type)
end
def oai_dc_xml_dc_format(xml)
# item's content type is the default
format = ''
html_classes = %w(Topic Comment WebLink)
case self.class.name
when 'StillImage'
if !original_file.nil?
format = original_file.content_type
end
when *html_classes
format = 'text/html'
else
format = content_type
end
if !format.blank?
xml.send('dc:format', format)
end
end
# currently only relevant to topics
def oai_dc_xml_dc_coverage(xml)
return unless is_a?(Topic)
topic_type.ancestors.each do |ancestor|
xml.send('dc:coverage', ancestor.name)
end
xml.send('dc:coverage', topic_type.name)
end
# if there is a license for item, put in its url
# otherwise site's terms and conditions url
def oai_dc_xml_dc_rights(xml)
terms_and_conditions_topic = Basket.about_basket.topics.find(
:first,
conditions: "UPPER(title) like '%TERMS AND CONDITIONS'"
)
terms_and_conditions_topic ||= 4
if respond_to?(:license) && !license.blank?
rights = license.url
else
rights = utf8_url_for(
host: SITE_NAME,
id: terms_and_conditions_topic,
urlified_name: Basket.about_basket.urlified_name,
action: 'show',
controller: 'topics',
escape: false,
locale: false
)
end
xml.send('dc:rights', rights)
end
def oai_dc_xml_dc_source_for_file(xml, passed_request = nil)
if !passed_request.nil?
host = passed_request[:host]
else
host = simulated_request[:host]
end
if ::Import::VALID_ARCHIVE_CLASSES.include?(self.class.name)
xml.send('dc:source', file_url_from_bits_for(self, host))
end
end
end
end