lib/relaton/db.rb
require "yaml"
require_relative "registry"
require_relative "db_cache"
module Relaton
class RelatonError < StandardError; end
class Db
# @param global_cache [String] directory of global DB
# @param local_cache [String] directory of local DB
def initialize(global_cache, local_cache)
@registry = Relaton::Registry.instance
gpath = global_cache && File.expand_path(global_cache)
@db = open_cache_biblio(gpath, type: :global)
lpath = local_cache && File.expand_path(local_cache)
@local_db = open_cache_biblio(lpath, type: :local)
@static_db = open_cache_biblio File.expand_path("../relaton/static_cache",
__dir__)
@queues = {}
@semaphore = Mutex.new
end
# Move global or local caches to anothe dirs
# @param new_dir [String, nil]
# @param type: [Symbol]
# @return [String, nil]
def mv(new_dir, type: :global)
case type
when :global
@db&.mv new_dir
when :local
@local_db&.mv new_dir
end
end
# Clear global and local databases
def clear
@db&.clear
@local_db&.clear
end
##
# The class of reference requested is determined by the prefix of the code:
# GB Standard for gbbib, IETF for ietfbib, ISO for isobib, IEC or IEV for
# iecbib,
#
# @param code [String] the ISO standard Code to look up (e.g. "ISO 9000")
# @param year [String] the year the standard was published (optional)
#
# @param opts [Hash] options
# @option opts [Boolean] :all_parts If all-parts reference is required
# @option opts [Boolean] :keep_year If undated reference should return
# actual reference with year
# @option opts [Integer] :retries (1) Number of network retries
#
# @return [nil, RelatonBib::BibliographicItem,
# RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem,
# RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem,
# RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem,
# RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem,
# RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem,
# RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem,
# RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem]
##
def fetch(code, year = nil, opts = {})
stdclass = standard_class(code) || return
processor = @registry.processors[stdclass]
ref = if processor.respond_to?(:urn_to_code)
processor.urn_to_code(code)&.first
else code
end
ref ||= code
result = combine_doc ref, year, opts, stdclass
result ||= check_bibliocache(ref, year, opts, stdclass)
result
end
# @see Relaton::Db#fetch
def fetch_db(code, year = nil, opts = {})
opts[:fetch_db] = true
fetch code, year, opts
end
# fetch all standards from DB
# @param test [String, nil]
# @param edition [String], nil
# @param year [Integer, nil]
# @return [Array]
def fetch_all(text = nil, edition: nil, year: nil)
result = @static_db.all do |file, yml|
search_yml file, yml, text, edition, year
end.compact
db = @db || @local_db
if db
result += db.all do |file, xml|
search_xml file, xml, text, edition, year
end.compact
end
result
end
# Fetch asynchronously
def fetch_async(code, year = nil, opts = {}, &block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
stdclass = standard_class code
if stdclass
unless @queues[stdclass]
processor = @registry.processors[stdclass]
threads = ENV["RELATON_FETCH_PARALLEL"]&.to_i || processor.threads
wp = WorkersPool.new(threads) do |args|
args[3].call fetch(*args[0..2])
rescue RelatonBib::RequestError => e
args[3].call e
end
@queues[stdclass] = { queue: Queue.new, workers_pool: wp }
Thread.new { process_queue @queues[stdclass] }
end
@queues[stdclass][:queue] << [code, year, opts, block]
else yield nil
end
end
# @param code [String]
# @param year [String, NilClass]
# @param stdclass [Symbol, NilClass]
#
# @param opts [Hash]
# @option opts [Boolean] :all_parts If all-parts reference is required
# @option opts [Boolean] :keep_year If undated reference should return
# actual reference with year
# @option opts [Integer] :retries (1) Number of network retries
#
# @return [nil, RelatonBib::BibliographicItem,
# RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem,
# RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem,
# RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem,
# RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem,
# RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem,
# RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem,
# RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem]
def fetch_std(code, year = nil, stdclass = nil, opts = {})
std = nil
@registry.processors.each do |name, processor|
std = name if processor.prefix == stdclass
end
std = standard_class(code) or return nil unless std
check_bibliocache(code, year, opts, std)
end
# The document identifier class corresponding to the given code
# @param code [String]
# @return [Array]
def docid_type(code)
stdclass = standard_class(code) or return [nil, code]
_prefix, code = strip_id_wrapper(code, stdclass)
[@registry.processors[stdclass].idtype, code]
end
# @param key [String]
# @return [Hash]
def load_entry(key)
unless @local_db.nil?
entry = @local_db[key]
return entry if entry
end
@db[key]
end
# @param key [String]
# @param value [String] Bibitem xml serialisation.
# @option value [String] Bibitem xml serialisation.
def save_entry(key, value)
@db.nil? || (@db[key] = value)
@local_db.nil? || (@local_db[key] = value)
end
# list all entries as a serialization
# @return [String]
def to_xml
db = @local_db || @db || return
Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
xml.documents do
xml.parent.add_child db.all.join(" ")
end
end.to_xml
end
private
# @param file [String] file path
# @param yml [String] content in YAML format
# @param text [String, nil] text to serach
# @param edition [String, nil] edition to filter
# @param year [Integer, nil] year to filter
# @return [BibliographicItem, nil]
def search_yml(file, yml, text, edition, year)
item = search_edition_year(file, yml, edition, year)
return unless item
item if match_xml_text(item.to_xml(bibdata: true), text)
end
# @param file [String] file path
# @param xml [String] content in XML format
# @param text [String, nil] text to serach
# @param edition [String, nil] edition to filter
# @param year [Integer, nil] year to filter
# @return [BibliographicItem, nil]
def search_xml(file, xml, text, edition, year)
return unless text.nil? || match_xml_text(xml, text)
search_edition_year(file, xml, edition, year)
end
# @param file [String] file path
# @param content [String] content in XML or YAmL format
# @param edition [String, nil] edition to filter
# @param year [Integer, nil] year to filter
# @return [BibliographicItem, nil]
def search_edition_year(file, content, edition, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
processor = @registry.processors[standard_class(file.split("/")[-2])]
item = if file.match?(/xml$/) then processor.from_xml(content)
else processor.hash_to_bib(YAML.safe_load(content))
end
item if (edition.nil? || item.edition == edition) && (year.nil? ||
item.date.detect { |d| d.type == "published" && d.on(:year).to_s == year.to_s })
end
# @param xml [String] content in XML format
# @param text [String, nil] text to serach
# @return [Boolean]
def match_xml_text(xml, text)
%r{((?<attr>=((?<apstr>')|"))|>).*?#{text}.*?(?(<attr>)(?(<apstr>)'|")|<)}mi.match?(xml)
end
# @param code [String]
# @param year [String, nil]
# @param stdslass [String]
#
# @param opts [Hash] options
# @option opts [Boolean] :all_parts If all-parts reference is required
# @option opts [Boolean] :keep_year If undated reference should return
# actual reference with year
# @option opts [Integer] :retries (1) Number of network retries
#
# @return [nil, RelatonBib::BibliographicItem,
# RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem,
# RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem,
# RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem,
# RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem,
# RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem,
# RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem,
# RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem]
def combine_doc(code, year, opts, stdclass) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
if (refs = code.split " + ").size > 1
reltype = "derivedFrom"
reldesc = nil
elsif (refs = code.split ", ").size > 1
reltype = "complements"
reldesc = RelatonBib::FormattedString.new content: "amendment"
else return
end
doc = @registry.processors[stdclass].hash_to_bib docid: { id: code }
ref = refs[0]
updates = check_bibliocache(ref, year, opts, stdclass)
if updates
doc.relation << RelatonBib::DocumentRelation.new(bibitem: updates,
type: "updates")
end
divider = stdclass == :relaton_itu ? " " : "/"
refs[1..-1].each_with_object(doc) do |c, d|
bib = check_bibliocache(ref + divider + c, year, opts, stdclass)
if bib
d.relation << RelatonBib::DocumentRelation.new(
type: reltype, description: reldesc, bibitem: bib
)
end
end
end
# @param code [String] code of standard
# @return [Symbol] standard class name
def standard_class(code)
@registry.processors.each do |name, processor|
return name if /^(urn:)?#{processor.prefix}/i.match?(code) ||
processor.defaultprefix.match(code)
end
allowed = @registry.processors.reduce([]) do |m, (_k, v)|
m << v.prefix
end
Util.log <<~WARN, :info
[relaton] #{code} does not have a recognised prefix: #{allowed.join(', ')}.
See https://github.com/relaton/relaton/ for instructions on prefixing and wrapping document identifiers to disambiguate them.
WARN
end
# TODO: i18n
# Fofmat ID
# @param code [String]
# @param year [String]
#
# @param opts [Hash]
# @option opts [Boolean] :all_parts If all-parts reference is required
# @option opts [Boolean] :keep_year If undated reference should return
# actual reference with year
# @option opts [Integer] :retries (1) Number of network retries
#
# @param stdClass [Symbol]
# @return [Array<String>] docid and code
def std_id(code, year, opts, stdclass)
prefix, code = strip_id_wrapper(code, stdclass)
ret = code
ret += (stdclass == :relaton_gb ? "-" : ":") + year if year
ret += " (all parts)" if opts[:all_parts]
["#{prefix}(#{ret.strip})", code]
end
# Find prefix and clean code
# @param code [String]
# @param stdClass [Symbol]
# @return [Array]
def strip_id_wrapper(code, stdclass)
prefix = @registry.processors[stdclass].prefix
code = code.sub(/\u2013/, "-").sub(/^#{prefix}\((.+)\)$/, "\\1")
[prefix, code]
end
# @param entry [String] XML string
# @param stdclass [Symbol]
# @return [nil, RelatonBib::BibliographicItem,
# RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem,
# RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem,
# RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem,
# RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem,
# RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem,
# RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem,
# RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem]
def bib_retval(entry, stdclass)
if entry.nil? || entry.match?(/^not_found/) then nil
else @registry.processors[stdclass].from_xml(entry)
end
end
# @param code [String]
# @param year [String]
#
# @param opts [Hash]
# @option opts [Boolean] :all_parts If all-parts reference is required
# @option opts [Boolean] :keep_year If undated reference should return
# actual reference with year
# @option opts [Integer] :retries (1) Number of network retries
#
# @param stdclass [Symbol]
# @return [nil, RelatonBib::BibliographicItem,
# RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem,
# RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem,
# RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem,
# RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem,
# RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem,
# RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem,
# RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem]
def check_bibliocache(code, year, opts, stdclass) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
id, searchcode = std_id(code, year, opts, stdclass)
yaml = @static_db[id]
if yaml
return @registry.processors[stdclass].hash_to_bib YAML.safe_load(yaml)
end
db = @local_db || @db
altdb = @local_db && @db ? @db : nil
if db.nil?
return if opts[:fetch_db]
bibentry = new_bib_entry(searchcode, year, opts, stdclass, db: db,
id: id)
return bib_retval(bibentry, stdclass)
end
@semaphore.synchronize do
db.delete(id) unless db.valid_entry?(id, year)
end
if altdb
return bib_retval(altdb[id], stdclass) if opts[:fetch_db]
@semaphore.synchronize do
db.clone_entry id, altdb if altdb.valid_entry? id, year
end
entry = new_bib_entry(searchcode, year, opts, stdclass, db: db, id: id) unless db[id]
@semaphore.synchronize do
db[id] ||= entry
altdb.clone_entry(id, db) if !altdb.valid_entry?(id, year)
end
else
return bib_retval(db[id], stdclass) if opts[:fetch_db]
db[id] ||= new_bib_entry(searchcode, year, opts, stdclass, db: db,
id: id)
end
bib_retval(db[id], stdclass)
end
# @param code [String]
# @param year [String]
#
# @param opts [Hash]
# @option opts [Boolean] :all_parts If all-parts reference is required
# @option opts [Boolean] :keep_year If undated reference should return
# actual reference with year
# @option opts [Integer] :retries (1) Number of network retries
#
# @param stdclass [Symbol]
# @param db [Relaton::DbCache,`NilClass]
# @param id [String] docid
# @return [String]
def new_bib_entry(code, year, opts, stdclass, **args) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
bib = net_retry(code, year, opts, stdclass, opts.fetch(:retries, 1))
bib_id = bib&.docidentifier&.first&.id
# when docid doesn't match bib's id then return a reference to bib's id
if args[:db] && args[:id] &&
bib_id && args[:id] !~ %r{#{Regexp.quote("(#{bib_id})")}}
bid = std_id(bib.docidentifier.first.id, nil, {}, stdclass).first
@semaphore.synchronize { args[:db][bid] ||= bib_entry bib }
"redirection #{bid}"
else bib_entry bib
end
end
# @raise [RelatonBib::RequestError]
def net_retry(code, year, opts, stdclass, retries)
@registry.processors[stdclass].get(code, year, opts)
rescue RelatonBib::RequestError => e
raise e unless retries > 1
net_retry(code, year, opts, stdclass, retries - 1)
end
# @param bib [RelatonBib::BibliographicItem,
# RelatonIsoBib::IsoBibliographicItem, RelatonItu::ItuBibliographicItem,
# RelatonIetf::IetfBibliographicItem, RelatonIec::IecBibliographicItem,
# RelatonIeee::IeeeBibliographicItem, RelatonNist::NistBibliongraphicItem,
# RelatonGb::GbbibliographicItem, RelatonOgc::OgcBibliographicItem,
# RelatonCalconnect::CcBibliographicItem, RelatinUn::UnBibliographicItem,
# RelatonBipm::BipmBibliographicItem, RelatonIho::IhoBibliographicItem,
# RelatonOmg::OmgBibliographicItem, RelatonW3c::W3cBibliographicItem]
# @return [String] XML or "not_found mm-dd-yyyy"
def bib_entry(bib)
if bib.respond_to? :to_xml
bib.to_xml(bibdata: true)
else
"not_found #{Date.today}"
end
end
# @param dir [String, nil] DB directory
# @param type [Symbol]
# @return [Relaton::DbCache, NilClass]
def open_cache_biblio(dir, type: :static) # rubocop:disable Metrics/MethodLength
return nil if dir.nil?
db = DbCache.new dir, type == :static ? "yml" : "xml"
return db if type == :static
Dir["#{dir}/*/"].each do |fdir|
next if db.check_version?(fdir)
FileUtils.rm_rf(fdir, secure: true)
Util.log(
"[relaton] WARNING: cache #{fdir}: version is obsolete and cache is "\
"cleared.",
:warning
)
end
db
end
# @param qwp [Hash]
# @option qwp [Queue] :queue The queue of references to fetch
# @option qwp [Relaton::WorkersPool] :workers_pool The pool of workers
def process_queue(qwp)
while args = qwp[:queue].pop; qwp[:workers_pool] << args end
end
class << self
# Initialse and return relaton instance, with local and global cache names
# local_cache: local cache name; none created if nil; "relaton" created
# if empty global_cache: boolean to create global_cache
# flush_caches: flush caches
def init_bib_caches(**opts) # rubocop:disable Metrics/CyclomaticComplexity
globalname = global_bibliocache_name if opts[:global_cache]
localname = local_bibliocache_name(opts[:local_cache])
flush_caches globalname, localname if opts[:flush_caches]
Relaton::Db.new(globalname, localname)
end
private
def flush_caches(gcache, lcache)
FileUtils.rm_rf gcache unless gcache.nil?
FileUtils.rm_rf lcache unless lcache.nil?
end
def global_bibliocache_name
"#{Dir.home}/.relaton/cache"
end
def local_bibliocache_name(cachename)
cachename = "relaton" if cachename.nil? || cachename.empty?
"#{cachename}/cache"
end
end
end
end