lib/rdf/format.rb
module RDF
##
# The base class for RDF serialization formats.
#
# @example Loading an RDF serialization format implementation
# require 'rdf/ntriples'
#
# @example Iterating over known RDF serialization formats
# RDF::Format.each { |klass| puts klass.name }
#
# @example Getting a serialization format class
# RDF::Format.for(:ntriples) #=> RDF::NTriples::Format
# RDF::Format.for("etc/doap.nt")
# RDF::Format.for(file_name: "etc/doap.nt")
# RDF::Format.for(file_extension: "nt")
# RDF::Format.for(content_type: "application/n-triples")
#
# @example Obtaining serialization format MIME types
# RDF::Format.content_types #=> {"application/n-triples" => [RDF::NTriples::Format]}
#
# @example Obtaining serialization format file extension mappings
# RDF::Format.file_extensions #=> {nt: [RDF::NTriples::Format]}
#
# @example Defining a new RDF serialization format class
# class RDF::NTriples::Format < RDF::Format
# content_type 'application/n-triples',
# extension: :nt,
# uri: RDF::URI("http://www.w3.org/ns/formats/N-Triples")
# content_encoding 'utf-8'
#
# reader RDF::NTriples::Reader
# writer RDF::NTriples::Writer
# end
#
# @example Instantiating an RDF reader or writer class (1)
# RDF::Format.for(:ntriples).reader.new($stdin) { |reader| ... }
# RDF::Format.for(:ntriples).writer.new($stdout) { |writer| ... }
#
# @example Instantiating an RDF reader or writer class (2)
# RDF::Reader.for(:ntriples).new($stdin) { |reader| ... }
# RDF::Writer.for(:ntriples).new($stdout) { |writer| ... }
#
# @abstract
# @see RDF::Reader
# @see RDF::Writer
# @see http://en.wikipedia.org/wiki/Resource_Description_Framework#Serialization_formats
class Format
extend ::Enumerable
##
# Enumerates known RDF serialization format classes.
#
# Given options from {Format.for}, it returns just those formats that match the specified criteria.
#
# @example finding all formats that have a writer supporting text/html
# RDF::Format.each(content_type: 'text/html', has_writer: true).to_a
# #=> RDF::RDFa::Format
#
# @param [String, #to_s] file_name (nil)
# @param [Symbol, #to_sym] file_extension (nil)
# @param [String, #to_s] content_type (nil)
# Content type may include wildcard characters, which will select among matching formats.
# Note that content_type will be taken from a URL opened using {RDF::Util::File.open_file}.
# @param [Boolean] has_reader (false)
# Only return a format having a reader.
# @param [Boolean] has_writer (false)
# Only return a format having a writer.
# @param [String, Proc] sample (nil)
# A sample of input used for performing format detection. If we find no formats, or we find more than one, and we have a sample, we can perform format detection to find a specific format to use, in which case we pick the last one we find
# @param [Boolean] all_if_none (true)
# Returns all formats if none match, otherwise no format. Note that having a `sample` overrides this, and will search through all formats, or all those filtered to find a sample that matches
# @yield [klass]
# @yieldparam [Class]
# @return [Enumerator]
def self.each(file_name: nil,
file_extension: nil,
content_type: nil,
has_reader: false,
has_writer: false,
sample: nil,
all_if_none: true,
**options,
&block)
formats = case
# Find a format based on the MIME content type:
when content_type
# @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
# @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7
mime_type = content_type.to_s.split(';').first.to_s # remove any media type parameters
# Ignore text/plain, a historical encoding for N-Triples, which is
# problematic in format detection, as many web servers will serve
# content by default text/plain.
if (mime_type == 'text/plain' && sample) || mime_type == '*/*'
# All content types
@@subclasses
elsif mime_type.end_with?('/*')
# All content types that have the first part of the mime-type as a prefix
prefix = mime_type[0..-3]
content_types.map do |ct, fmts|
ct.start_with?(prefix) ? fmts : []
end.flatten.uniq
else
content_types[mime_type]
end
# Find a format based on the file name:
when file_name
ext = File.extname(RDF::URI(file_name).path.to_s)[1..-1].to_s
file_extensions[ext.to_sym]
# Find a format based on the file extension:
when file_extension
file_extensions[file_extension.to_sym]
else
all_if_none ? @@subclasses : nil
end || (sample ? @@subclasses : []) # If we can sample, check all classes
# Subset by available reader or writer
formats = formats.select do |f|
has_reader ? f.reader : (has_writer ? f.writer : true)
end
# If we have multiple formats and a sample, use that for format detection
if formats.length != 1 && sample
sample = case sample
when Proc then sample.call.to_s
else sample.dup.to_s
end.dup.force_encoding(Encoding::ASCII_8BIT)
# Given a sample, perform format detection across the appropriate formats, choosing the last that matches
# Return last format that has a positive detection
formats = formats.select {|f| f.detect(sample)}
end
formats.each(&block)
end
##
# Finds an RDF serialization format class based on the given criteria. If multiple formats are identified, the last one found is returned; this allows descrimination of equivalent formats based on load order.
#
# @overload for(format)
# Finds an RDF serialization format class based on a symbolic name.
#
# @param [Symbol] format
# @return [Class]
#
# @overload for(filename)
# Finds an RDF serialization format class based on a file name.
#
# @param [String, RDF::URI] filename
# @return [Class]
#
# @overload for(options)
# Finds an RDF serialization format class based on various options.
#
# @param [Hash{Symbol => Object}] options ({})
# @option options [String, #to_s] :file_name (nil)
# @option options [Symbol, #to_sym] :file_extension (nil)
# @option options [String, #to_s] :content_type (nil)
# Content type may include wildcard characters, which will select among matching formats.
# Note that content_type will be taken from a URL opened using {RDF::Util::File.open_file}.
# @option options [Boolean] :has_reader (false)
# Only return a format having a reader.
# @option options [Boolean] :has_writer (false)
# Only return a format having a writer.
# @option options [String] :sample (nil)
# A sample of input used for performing format detection. If we find no formats, or we find more than one, and we have a sample, we can perform format detection to find a specific format to use, in which case we pick the last one we find
# @return [Class]
# @yieldreturn [String] another way to provide a sample, allows lazy for retrieving the sample.
#
# @return [Class]
def self.for(*arg, &block)
case arg.length
when 0 then arg = nil
when 1 then arg = arg.first
else
raise ArgumentError, "Format.for accepts zero or one argument, got #{arg.length}."
end
options = arg.is_a?(Hash) ? arg : {}
options = {sample: block}.merge(options) if block_given?
formats = case arg
when String, RDF::URI
# Find a format based on the file name
self.each(file_name: arg, **options).to_a
when Symbol
# Try to find a match based on the full class name
# We want this to work even if autoloading fails
classes = self.each(**options).select {|f| f.symbols.include?(arg)}
if classes.empty?
classes = case arg
when :ntriples then [RDF::NTriples::Format]
when :nquads then [RDF::NQuads::Format]
else []
end
end
classes
else
self.each(**options.merge(all_if_none: false)).to_a
end
# Return the last detected format
formats.last
end
##
# Returns MIME content types for known RDF serialization formats.
#
# @example retrieving a list of supported Mime types
#
# RDF::Format.content_types.keys
#
# @return [Hash{String => Array<Class>}]
def self.content_types
@@content_types
end
##
# Returns file extensions for known RDF serialization formats.
#
# @example retrieving a list of supported file extensions
#
# RDF::Format.file_extensions.keys
#
# @return [Hash{Symbol => Array<Class>}]
def self.file_extensions
@@file_extensions
end
##
# Returns the unique URI for the format.
#
# @example retrieving a list of supported file format URIs
#
# RDF::Format.uris.keys
#
# @see https://www.w3.org/ns/formats/
# @return [Hash{Symbol => URI}]
def self.uris
@@uris
end
##
# Returns the set of format symbols for available RDF::Reader subclasses.
#
# @example
#
# symbols = RDF::Format.reader_symbols
# format = RDF::Format.for(symbols.first)
#
# @return [Array<Symbol>]
def self.reader_symbols
@@readers.keys.map(&:symbols).flatten.uniq
end
##
# Returns the set of content types for available RDF::Reader subclasses.
#
# @example
#
# content_types = RDF::Format.reader_types
# format = RDF::Format.for(content_type: content_types.first)
#
# @return [Array<String>]
def self.reader_types
reader_symbols.flat_map {|s| RDF::Format.for(s).content_type}.uniq
end
##
# Returns the set of content types with quality for available RDF::Reader subclasses.
#
# @example
#
# accept_types = RDF::Format.accept_types
# # => %w(text/html;q=0.5 text/turtle ...)
#
# @return [Array<String>]
def self.accept_types
reader_symbols.flat_map {|s| RDF::Format.for(s).accept_type}.uniq
end
##
# Returns the set of format symbols for available RDF::Writer subclasses.
#
# @example
#
# symbols = RDF::Format.writer_symbols
# format = RDF::Format.for(symbols.first)
#
# @return [Array<Symbol>]
def self.writer_symbols
@@writers.keys.map(&:symbols).flatten.uniq
end
##
# Returns the set of content types for available RDF::Writer subclasses.
#
# @example
#
# content_types = RDF::Format.writer_types
# format = RDF::Format.for(content_type: content_types.first)
#
# @return [Array<String>]
def self.writer_types
writer_symbols.flat_map {|s| RDF::Format.for(s).content_type}.uniq
end
##
# Returns a symbol appropriate to use with `RDF::Format.for()`
#
# @note Defaults to the last element of the class name before `Format` downcased and made a symbol. Individual formats can override this.
# @return [Symbol]
def self.to_sym
elements = self.to_s.split("::")
sym = elements.pop
sym = elements.pop if sym == 'Format'
sym.downcase.to_s.to_sym if sym.is_a?(String)
end
##
# Returns the set of symbols for a writer appropriate for use with with `RDF::Format.for()`
#
# @note Individual formats can override this to provide an array of symbols; otherwise, it uses `self.to_sym`
# @return [Array<Symbol>]
# @see to_sym
# @since 2.0
def self.symbols
[self.to_sym]
end
##
# Returns a human-readable name for the format.
# Subclasses should override this to use something
# difererent than the Class name.
#
# @example
#
# RDF::NTriples::Format.name => "N-Triples"
#
# @return [Symbol]
def self.name
elements = self.to_s.split("::")
name = elements.pop
name = elements.pop if name == 'Format'
name.to_s
end
##
# Retrieves or defines the reader class for this RDF serialization
# format.
#
# @overload reader(klass)
# Defines the reader class for this RDF serialization format.
#
# The class should be a subclass of {RDF::Reader}, or implement the
# same interface.
#
# @param [Class] klass
# @return [void]
#
# @overload reader
# Defines the reader class for this RDF serialization format.
#
# The block should return a subclass of {RDF::Reader}, or a class that
# implements the same interface. The block won't be invoked until the
# reader class is first needed.
#
# @yield
# @yieldreturn [Class] klass
# @return [void]
#
# @overload reader
# Retrieves the reader class for this RDF serialization format.
#
# @return [Class]
#
# @return [void]
def self.reader(klass = nil, &block)
case
when klass
@@readers[self] = klass
when block_given?
@@readers[self] = block
else
klass = @@readers[self]
klass = @@readers[self] = klass.call if klass.is_a?(Proc)
klass
end
end
##
# Retrieves or defines the writer class for this RDF serialization
# format.
#
# @overload writer(klass)
# Defines the writer class for this RDF serialization format.
#
# The class should be a subclass of {RDF::Writer}, or implement the
# same interface.
#
# @param [Class] klass
# @return [void]
#
# @overload writer
# Defines the writer class for this RDF serialization format.
#
# The block should return a subclass of {RDF::Writer}, or a class that
# implements the same interface. The block won't be invoked until the
# writer class is first needed.
#
# @yield
# @yieldreturn [Class] klass
# @return [void]
#
# @overload writer
# Retrieves the writer class for this RDF serialization format.
#
# @return [Class]
#
# @return [void]
def self.writer(klass = nil, &block)
case
when klass
@@writers[self] = klass
when block_given?
@@writers[self] = block
else
klass = @@writers[self]
klass = @@writers[self] = klass.call if klass.is_a?(Proc)
klass
end
end
##
# Hash of CLI commands appropriate for this format
# @return [Hash{Symbol => {description: String, lambda: Lambda(Array, Hash)}}]
def self.cli_commands
{}
end
##
# Use a text sample to detect the format of an input file. Sub-classes implement
# a matcher sufficient to detect probably format matches, including disambiguating
# between other similar formats.
#
# Used to determine format class from loaded formats by {RDF::Format.for} when a
# match cannot be unambigiously found otherwise.
#
# @example
# RDF::NTriples::Format.detect("<a> <b> <c> .") #=> true
#
# @param [String] sample Beginning several bytes (~ 1K) of input.
# @return [Boolean]
def self.detect(sample)
false
end
class << self
alias_method :reader_class, :reader
alias_method :writer_class, :writer
end
##
# Retrieves or defines MIME content types for this RDF serialization format.
#
# @overload content_type(type, options)
# Retrieves or defines the MIME content type for this RDF serialization format.
#
# Optionally also defines alias MIME content types for this RDF serialization format.
#
# Optionally also defines a file extension, or a list of file
# extensions, that should be mapped to the given MIME type and handled
# by this class.
#
# Optionally, both `type`, `alias`, and `aliases`, may be parameterized
# for expressing quality.
#
# content_type "text/html;q=0.4"
#
# @param [String] type
# @param [Hash{Symbol => Object}] options
# @option options [String] :alias (nil)
# @option options [Array<String>] :aliases (nil)
# @option options [Symbol] :extension (nil)
# @option options [Array<Symbol>] :extensions (nil)
# @option options [URI] :uri (nil)
# @return [void]
#
# @overload content_type
# Retrieves the MIME content types for this RDF serialization format.
#
# The return is an array where the first element is the cannonical
# MIME type for the format and following elements are alias MIME types.
#
# @return [Array<String>]
def self.content_type(type = nil, options = {})
if type.nil?
[@@content_type[self], @@content_types.map {
|ct, cl| (cl.include?(self) && ct != @@content_type[self]) ? ct : nil }].flatten.compact
else
accept_type, type = type, type.split(';').first
@@content_type[self] = type
@@content_types[type] ||= []
@@content_types[type] << self unless @@content_types[type].include?(self)
@@accept_types[accept_type] ||= []
@@accept_types[accept_type] << self unless @@accept_types[accept_type].include?(self)
if extensions = (options[:extension] || options[:extensions])
extensions = Array(extensions).map(&:to_sym)
extensions.each do |ext|
@@file_extensions[ext] ||= []
@@file_extensions[ext] << self unless @@file_extensions[ext].include?(self)
end
end
if aliases = (options[:alias] || options[:aliases])
aliases = Array(aliases).each do |a|
aa = a.split(';').first
@@accept_types[a] ||= []
@@accept_types[a] << self unless @@accept_types[a].include?(self)
@@content_types[aa] ||= []
@@content_types[aa] << self unless @@content_types[aa].include?(self)
end
end
# URI identifying this format
if uri = options[:uri]
@@uris[RDF::URI(uri)] = self
end
end
end
##
# Returns an array of values appropriate for an Accept header.
# Same as `self.content_type`, if no parameter is given when defined.
#
# @return [Array<String>]
def self.accept_type
@@accept_types.map {|t, formats| t if formats.include?(self)}.compact
end
##
# Retrieves file extensions for this RDF serialization format.
#
# The return is an array where the first element is the cannonical
# file extension for the format and following elements are alias file extensions.
#
# @return [Array<String>]
def self.file_extension
@@file_extensions.map {|ext, formats| ext if formats.include?(self)}.compact
end
##
# Retrieves any format URI defined for this format..
#
# @return [URI]
def self.uri
@@uris.invert[self]
end
class << self
alias_method :to_uri, :uri
end
protected
##
# Defines a required Ruby library for this RDF serialization format.
#
# The given library will be required lazily, i.e. only when it is
# actually first needed, such as when instantiating a reader or parser
# instance for this format.
#
# @param [String, #to_s] library
# @return [void]
def self.require(library)
(@@requires[self] ||= []) << library.to_s
end
##
# Defines the content encoding for this RDF serialization format.
#
# When called without an encoding, it returns the currently defined
# content encoding for this format
#
# @param [#to_sym] encoding
# @return [void]
def self.content_encoding(encoding = nil)
@@content_encoding[self] = encoding.to_sym if encoding
@@content_encoding[self] || "utf-8"
end
private
private_class_method :new
@@requires = {} # @private
@@file_extensions = {} # @private
@@content_type = {} # @private
@@content_types = {} # @private
@@content_encoding = {} # @private
@@accept_types = {} # @private
@@readers = {} # @private
@@writers = {} # @private
@@subclasses = [] # @private
@@uris = {} # @private
##
# @private
# @return [void]
def self.inherited(child)
@@subclasses << child if child
super
end
end # Format
##
# The base class for RDF serialization format errors.
class FormatError < IOError
end # FormatError
end # RDF