app/models/referent.rb
require 'i18n'
require 'truncate_to_db_limit'
# Note: There are a few actual attributes stored as Columns in referent --
# these were originally used for identifying a Referent identifying the
# 'same thing' as an incoming OpenURL, to re-use it. But we don't re-use
# cached referents anymore. So these attributes are NOT USED -- actual
# values are over in ReferentValues. But the attributes are left for now
# (and set) merely for making it easier to eyeball the database by hand:
# atitle, title, issn, isbn, volume, year. (why no issue/page number? hell
# if i know).
class Referent < ActiveRecord::Base
# for shortcut metadata manipulations
include MetadataHelper
has_many :requests
has_many :referent_values
has_many :permalinks
# We really should get rid of these 'mirror' attributes, but
# in the meantime truncate them.
include TruncateToDbLimit
truncate_to_db_limit :volume, :year, :issn, :isbn
# Pass in :permalink => :force to force creation of a permalink, otherwise
# no permalink is created by this method, one can be lazily created when
# needed.
def self.create_by_context_object(co, options = {})
self.clean_up_context_object(co)
rft = Referent.new
# Wrap everything in a transaction for better efficiency, at least
# with MySQL, I think.
Referent.transaction do
rft.set_values_from_context_object(co)
# Permalinks created on-demand later. But if set config to :force, can
# force old behavior.
if options[:permalink] == :force
permalink = Permalink.new_with_values!(rft, co.referrer.identifier)
end
# Add shortcuts.
rft.referent_values.each do | val |
rft.atitle = val.normalized_value if val.key_name == 'atitle' and val.metadata?
rft.title = val.normalized_value if val.key_name.match(/^[bj]?title$/) and val.metadata?
rft.issn = val.normalized_value.gsub(/[^\d]/, '')[0,8] if val.key_name == 'issn' and val.metadata?
rft.isbn = val.normalized_value.gsub(/[^\d]/, '')[0,13] if val.key_name == 'isbn' and val.metadata?
rft.volume = val.normalized_value if val.key_name == 'volume' and val.metadata?
rft.year = val.normalized_value.gsub(/[^\d]/, '')[0,4] if val.key_name == 'date' and val.metadata?
end
rft.save!
# Apply referent filters
rfr_id = ""
rfr_id = co.referrer.identifier if (co.referrer && ! co.referrer.identifier.blank?)
UmlautController.umlaut_config.lookup!("referent_filters", []).each do |filter_config|
regexp = filter_config[:match]
filter = filter_config[:filter]
if (regexp =~ rfr_id)
filter.filter(rft) if filter.respond_to?(:filter)
end
end
end
return rft
end
# Okay, we need to do some pre-processing on weird context objects
# sent by, for example, firstSearch. Remove invalid identifiers.
# Also will adjust context objects according to configured
# umlaut refernet filters (see config.app_config.referent_filters in
# environment.rb )
# Mutator: Modifies ContextObject arg passed in.
def self.clean_up_context_object(co)
# First, remove any empty DOIs! or other empty identifiers?
# LOTS of sources send awful empty identifiers.
# That's not a valid identifier!
empty_ids = co.referent.identifiers.find_all { |i| i =~ Regexp.new('^[^:]+:[^/:]*(/|:)?$')}
empty_ids.each { |e| co.referent.delete_identifier( e )}
# Now look for ISSN identifiers that are on article_level. FirstSearch
# gives us ISSN identifiers incorrectly on article level cites.
issn_ids = co.referent.identifiers.find_all { |i| i =~ /^urn:ISSN/}
issn_ids.each do |issn_id|
# Long as we're at it, add an rft.issn if one's not there.
issn_data = issn_id.slice( (9..issn_id.length)) # actual ISSN without identifier prefix
co.referent.set_metadata(issn, issn_data) if co.referent.get_metadata('issn').blank? && ! issn_data.blank?
# And remove it as an identifier unless we know this is journal-level
# cite.
unless ( co.referent.get_metadata('genre') == 'journal' )
co.referent.delete_identifier( issn_id )
end
end
# Clean up OCLC numbers from old bad formats that may have snuck in to an info url incorrectly. # also delete preceding 0's
oclcnum_ids = co.referent.identifiers.find_all { |i| i =~ /^info:oclcnum/}
oclcnum_ids.each do |oclcnum_id|
# FIXME Does this regex need "ocn" as well?
if (oclcnum_id =~ /^info:oclcnum\/(ocm0*|ocn0*|on0*|\(OCoLC\)0*|ocl70*|0+)(.*)$/)
# Delete the original, take out just the actual oclcnum, not
# those old prefixes. or preceding 0s.
co.referent.delete_identifier( oclcnum_id )
co.referent.add_identifier("info:oclcnum/#{$2}")
end
end
end
# private use. Adds a referent_value and returns it, does NOT persist
# it to db. referent_value is constructed with ActiveRecord build, and
# will be saved when Referent (self) is saved, works on persisted or
# unpersisted Referent.
def build_referent_value(key_name, value)
return self.referent_values.build(
:key_name => key_name,
:value => value,
:normalized_value => ReferentValue.normalize(value),
:private_data => (key_name == "private_data"),
:metadata => (key_name != "identifier" && key_name != "format")
)
end
# Populate the referent_values table with a ropenurl contextobject object
# Note, does NOT save self, self may still be unsaved.
def set_values_from_context_object(co)
rft = co.referent
# Multiple identifiers are possible!
rft.identifiers.each do |id_string|
build_referent_value('identifier', id_string)
end
if rft.format
build_referent_value('format', rft.format)
end
if rft.private_data
# this comes in as "pid" or "rft_dat", we store it in
# our database as "private_data", sorry, easiest way to
# fit this in at the moment.
build_referent_value("private_data", rft.private_data)
end
rft.metadata.each { | key, value |
next unless value.present?
# Sometimes value is an array, for DC for instance. Do the best we
# can.
Array(value).each do |v|
build_referent_value( key, v)
end
}
end
# pass in a Referent, or a ropenurl ContextObjectEntity that has a metadata
# method. Or really anything with a #metadata method returning openurl-style
# keys and values.
# Method returns true iff the keys in common to both metadata packages
# have equal (==) values.
def metadata_intersects?(arg)
# if it's empty, good enough.
return true unless arg
intersect_keys = self.metadata.keys & arg.metadata.keys
# Take out keys who's values are blank. If one is blank but not
# both, we can still consider that a match.
intersect_keys.delete_if{ |k| self.metadata[k].blank? || arg.metadata[k].blank? }
self_subset = self.metadata.reject{ |k, v| ! intersect_keys.include?(k) }
arg_subset = arg.metadata.reject{ |k, v| ! intersect_keys.include?(k) }
return self_subset == arg_subset
end
# Creates a hash of values from referrent_values, to assemble what was
# spread accross differnet db rows into one easy-lookup hash, for
# easy access. See also #to_citation for a different hash, specifically
# for use in View to print citation. And #to_context_object.
def metadata
metadata = {}
self.referent_values.each { | val |
metadata[val.key_name] = val.value if val.metadata? and not val.private_data?
}
return metadata
end
def private_data
self.referent_values
priv_data = {}
self.referent_values.each { | val |
priv_data[val.key_name] = val.value if val.private_data?
}
return priv_data
end
def identifiers
self.referent_values
identifiers = []
self.referent_values.each { | val |
if val.key_name == 'identifier'
identifiers << val.value
end
}
return identifiers
end
def add_identifier(id)
unless ( identifiers.find{|i| i == id} )
Referent.connection_pool.with_connection do
self.referent_values.create(:key_name => 'identifier', :value => id, :normalized_value => ReferentValue.normalize(id), :metadata => false, :private_data => false).save!
end
end
end
def format
self.referent_values.to_a.find { | val | val.key_name == 'format'}.try(:value)
end
# Some shortcuts for pulling out/manipulating specific especially
# useful data elements.
# finds and normalizes an LCCN. If multiple LCCNs are in the record,
# returns the first one. Returns a NORMALIZED lccn, but does NOT do
# validation. see:
# http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
def lccn
return get_lccn(self)
end
# Gets an ISSN, makes sure it's a valid ISSN or else returns nil.
# So will return a valid ISSN (NOT empty string) or nil.
def issn
return get_issn(self)
end
def isbn
return get_isbn(self)
end
def oclcnum
return get_oclcnum(self)
end
def doi
return get_doi(self)
end
# Creates an OpenURL::ContextObject assembling all the data in this
# referrent.
def to_context_object
co = OpenURL::ContextObject.new
# Got to initialize the referent entity properly for our format.
# OpenURL sucks, this is confusing, yes.
fmt_uri = 'info:ofi/fmt:xml:xsd:' + self.format
co.referent = OpenURL::ContextObjectEntity.new_from_format( fmt_uri )
rft = co.referent
# Now set all the values.
self.referent_values.each do | val |
if val.metadata?
rft.set_metadata(val.key_name, val.value)
next
end
rft.send('set_'+val.key_name, val.value) if rft.respond_to?('set_'+val.key_name)
end
return co
end
# Creates a hash for use in View code to display a citation
#
# TODO, move to_citation, type_of_thing, and container_type_of_thing OUT
# of Refernet, to helper module or own class.
def to_citation
citation = {}
# call self.metadata once and use the array for efficiency, don't
# keep calling it. profiling shows it DOES make a difference.
my_metadata = self.metadata
if my_metadata['atitle'].present?
citation[:title] = my_metadata['atitle']
['title','btitle','jtitle'].each do | t_type |
if my_metadata[t_type].present?
citation[:container_title] = my_metadata[t_type]
break
end
end
else # only top-level thing, no sub-thing
['title','btitle','jtitle'].each do | t_type |
if my_metadata[t_type].present?
citation[:title] = my_metadata[t_type]
break
end
end
end
citation[:title_label] = I18n.t("umlaut.citation.title_of_x", :x => self.type_of_thing, :default => "umlaut.citation.title_label")
citation[:container_label] = self.container_type_of_thing
# add publisher for books
if (my_metadata['genre'] =~ /book/i)
citation[:pub] = my_metadata['pub'] unless my_metadata['pub'].blank?
end
citation[:issn] = issn if issn
citation[:isbn] = isbn if isbn
['volume','issue','date'].each do | key |
citation[key.to_sym] = my_metadata[key]
end
if my_metadata["au"].present?
citation[:author] = my_metadata["au"].strip
elsif my_metadata["aulast"]
citation[:author] = my_metadata["aulast"].strip
if my_metadata["aufirst"].present?
citation[:author] += ', '+my_metadata["aufirst"].strip
else
if my_metadata["auinit"].present?
citation[:author] += ', '+my_metadata["auinit"].strip
else
if my_metadata["auinit1"].present?
citation[:author] += ', '+my_metadata["auinit1"].strip
end
if my_metadata["auinitm"].present?
citation[:author] += my_metadata["auinitm"].strip
end
end
end
elsif my_metadata["aucorp"]
citation[:author] = my_metadata["aucorp"]
end
if my_metadata['spage']
citation[:page] = my_metadata['spage']
citation[:page] += ' - ' + my_metadata['epage'] if ! my_metadata['epage'].blank?
end
citation[:identifiers] = []
self.identifiers.each do | id |
citation[:identifiers] << id unless (id.blank? || id.match(/^tag:/))
end
return citation
end
def type_of_thing
metadata = self.metadata
key = metadata["genre"]
key = self.format if key.blank?
key = key.downcase
if key == "journal" && metadata['atitle'].present?
key = 'article'
end
if key == "book" && metadata['atitle'].present?
key = "bookitem"
end
label = I18n.t(key, :scope => "umlaut.citation.genre", :default => "")
label = nil if label.blank?
return label
end
# Like type_of_thing, but if it's a contained item, give container name instead.
def container_type_of_thing
i18n_key = case self.metadata['genre']
when 'article' then 'journal'
when 'bookitem' then 'book'
else self.metadata['genre'] || self.format
end
label = I18n.t(i18n_key, :scope => "umlaut.citation.genre", :default => "")
label = nil if label.blank?
return label
end
def remove_value(key)
referent_values.where(:key_name => key).to_a.each do |rv|
rv.delete
referent_values.delete(rv)
end
end
# options => { :overwrite => false } to only enhance if not already there
def enhance_referent(key, value, metadata=true, private_data=false, options = {})
ActiveRecord::Base.connection_pool.with_connection do
return if value.nil?
matches = self.referent_values.to_a.find_all do |rv|
# We ignore #metadata and #private_data matches in overwriting
# existing value. We used to take them into account, but it triggered
# a bug in Jruby, and pretty much isn't neccesary, those fields
# are pretty useless and mostly not used and should prob be removed.
(rv.key_name == key) # && (rv.metadata == metadata) && (rv.private_data == private_data)
end
matches.each do |rv|
unless (options[:overwrite] == false || rv.value == value)
rv.value = value
rv.save!
end
end
if (matches.length == 0)
val = self.referent_values.create(:key_name => key, :value => value, :normalized_value => ReferentValue.normalize(value), :metadata => metadata, :private_data => private_data)
val.save!
end
if key.match((/(^[ajb]?title$)|(^is[sb]n$)|(^volume$)|(^date$)/))
case key
when 'date' then self.year = ReferentValue.normalize(value)
when 'volume' then self.volume = ReferentValue.normalize(value)
when 'issn' then self.issn = ReferentValue.normalize(value)
when 'isbn' then self.isbn = ReferentValue.normalize(value)
when 'atitle' then self.atitle = ReferentValue.normalize(value)
else self.title = ReferentValue.normalize(value)
end
self.save!
end
end
end
end