lib/rdf/writer.rb
# -*- encoding: utf-8 -*-
module RDF
##
# The base class for RDF serializers.
#
# @example Loading an RDF writer implementation
# require 'rdf/ntriples'
#
# @example Iterating over known RDF writer classes
# RDF::Writer.each { |klass| puts klass.name }
#
# @example Obtaining an RDF writer class
# RDF::Writer.for(:ntriples) #=> RDF::NTriples::Writer
# RDF::Writer.for("spec/data/output.nt")
# RDF::Writer.for(file_name: "spec/data/output.nt")
# RDF::Writer.for(file_extension: "nt")
# RDF::Writer.for(content_type: "application/n-triples")
#
# @example Instantiating an RDF writer class
# RDF::Writer.for(:ntriples).new($stdout) { |writer| ... }
#
# @example Serializing RDF statements into a file
# RDF::Writer.open("spec/data/output.nt") do |writer|
# graph.each_statement do |statement|
# writer << statement
# end
# end
#
# @example Serializing RDF statements into a string
# RDF::Writer.for(:ntriples).buffer do |writer|
# graph.each_statement do |statement|
# writer << statement
# end
# end
#
# @example Detecting invalid output
# logger = Logger.new([])
# RDF::Writer.for(:ntriples).buffer(logger: logger) do |writer|
# statement = RDF::Statement.new(
# RDF::URI("https://rubygems.org/gems/rdf"),
# RDF::URI("http://purl.org/dc/terms/creator"),
# nil)
# writer << statement
# end # => RDF::WriterError
# logger.empty? => false
#
# @abstract
# @see RDF::Format
# @see RDF::Reader
class Writer
extend ::Enumerable
extend RDF::Util::Aliasing::LateBound
include RDF::Util::Logger
include RDF::Writable
##
# Enumerates known RDF writer classes.
#
# @yield [klass]
# @yieldparam [Class] klass
# @yieldreturn [void] ignored
# @return [Enumerator]
def self.each(&block)
RDF::Format.map(&:writer).reject(&:nil?).each(&block)
end
##
# Finds an RDF writer class based on the given criteria.
#
# @overload for(format)
# Finds an RDF writer class based on a symbolic name.
#
# @param [Symbol] format
# @return [Class]
#
# @overload for(filename)
# Finds an RDF writer class based on a file name.
#
# @param [String] filename
# @return [Class]
#
# @overload for(options = {})
# Finds an RDF writer class based on various options.
#
# @param [Hash{Symbol => Object}] options
# @option options [String, #to_s] :file_name (nil)
# @option options [Symbol, #to_sym] :file_extension (nil)
# @option options [String, #to_s] :content_type (nil)
# @return [Class]
#
# @return [Class]
def self.for(*arg, &block)
case arg.length
when 0 then arg = nil
when 1 then arg = arg.first
else
raise ArgumentError, "Format.for accepts zero or one argument, got #{arg.length}."
end
arg = arg.merge(has_writer: true) if arg.is_a?(Hash)
if format = self.format || Format.for(arg)
format.writer
end
end
##
# Retrieves the RDF serialization format class for this writer class.
#
# @return [Class]
def self.format(klass = nil)
if klass.nil?
Format.each do |format|
if format.writer == self
return format
end
end
nil # not found
end
end
##
# Options suitable for automatic Writer provisioning.
# @return [Array<RDF::CLI::Option>]
def self.options
[
RDF::CLI::Option.new(
symbol: :canonicalize,
datatype: TrueClass,
control: :checkbox,
on: ["--canonicalize"],
description: "Canonicalize input/output.") {true},
RDF::CLI::Option.new(
symbol: :encoding,
datatype: Encoding,
control: :text,
on: ["--encoding ENCODING"],
description: "The encoding of the input stream.") {|arg| Encoding.find arg},
RDF::CLI::Option.new(
symbol: :prefixes,
datatype: Hash,
multiple: true,
control: :none,
on: ["--prefixes PREFIX,PREFIX"],
description: "A comma-separated list of prefix:uri pairs.") do |arg|
arg.split(',').inject({}) do |memo, pfxuri|
pfx,uri = pfxuri.split(':', 2)
memo.merge(pfx.to_sym => RDF::URI(uri))
end
end,
RDF::CLI::Option.new(
symbol: :unique_bnodes,
datatype: TrueClass,
control: :checkbox,
on: ["--unique-bnodes"],
description: "Use unique Node identifiers.") {true},
]
end
class << self
alias_method :format_class, :format
##
# Use parameters from accept-params to determine if the parameters are acceptable to invoke this writer. The `accept_params` will subsequently be provided to the writer instance.
#
# @example rejecting a writer based on a profile
# JSON::LD::Writer.accept?(profile: "http://www.w3.org/ns/json-ld#compacted http://example.org/black-listed")
# # => false
#
# @param [Hash{Symbol => String}] accept_params
# @yield [accept_params] if a block is given, returns the result of evaluating that block
# @yieldparam [Hash{Symbol => String}] accept_params
# @return [Boolean]
# @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1
def accept?(accept_params)
block_given? ? yield(accept_params) : true
end
end
##
# @param [RDF::Enumerable, #each] data
# the graph or repository to dump
# @param [IO, File, String] io
# the output stream or file to write to
# @param [Hash{Symbol => Object}] options
# passed to {RDF::Writer#initialize} or {RDF::Writer.buffer}
# @return [void]
def self.dump(data, io = nil, **options)
io = File.open(io, 'w') if io.is_a?(String)
method = data.respond_to?(:each_statement) ? :each_statement : :each
if io
new(io, **options) do |writer|
io.set_encoding(writer.encoding) if io.respond_to?(:set_encoding)
data.send(method) do |statement|
writer << statement
end
writer.flush
end
else
buffer(**options) do |writer|
data.send(method) do |statement|
writer << statement
end
end
end
end
##
# Buffers output into a string buffer.
#
# @param [Hash{Symbol => Object}] options
# passed to {RDF::Writer#initialize}
# @yield [writer]
# @yieldparam [RDF::Writer] writer
# @yieldreturn [void]
# @return [String]
# @raise [ArgumentError] if no block is provided
def self.buffer(*args, **options, &block)
raise ArgumentError, "block expected" unless block_given?
StringIO.open do |buffer|
self.new(buffer, *args, **options) do |writer|
buffer.set_encoding(writer.encoding)
block.call(writer)
end
buffer.string
end
end
##
# Writes output to the given `filename`.
#
# @param [String, #to_s] filename
# @param [Symbol] format (nil)
# @param [Hash{Symbol => Object}] options
# any additional options (see {RDF::Writer#initialize} and {RDF::Format.for})
# @return [RDF::Writer]
def self.open(filename, format: nil, **options, &block)
File.open(filename, 'wb') do |file|
format_options = options.dup
format_options[:file_name] ||= filename
self.for(format || format_options).new(file, **options) do |writer|
file.set_encoding(writer.encoding)
block.call(writer)
end
end
end
##
# Returns a symbol appropriate to use with RDF::Writer.for()
# @return [Symbol]
def self.to_sym
self.format.to_sym
end
##
# Returns a symbol appropriate to use with RDF::Writer.for()
# @return [Symbol]
def to_sym
self.class.to_sym
end
##
# Initializes the writer.
#
# @param [IO, File] output
# the output stream
# @param [Hash{Symbol => Object}] options
# any additional options
# @option options [Encoding, String, Symbol] :encoding
# the encoding to use on the output stream.
# Defaults to the format associated with `content_encoding`.
# @option options [Boolean] :canonicalize (false)
# whether to canonicalize terms when serializing
# @option options [Boolean] :validate (false)
# whether to validate terms when serializing
# @option options [Hash] :prefixes (Hash.new)
# the prefix mappings to use (not supported by all writers)
# @option options [#to_s] :base_uri (nil)
# the base URI to use when constructing relative URIs (not supported
# by all writers)
# @option options [Boolean] :unique_bnodes (false)
# Use unique {Node} identifiers, defaults to using the identifier which the node was originall initialized with (if any). Implementations should ensure that Nodes are serialized using a unique representation independent of any identifier used when creating the node. See {NTriples::Writer#format_node}
# @option options [Hash{Symbol => String}] :accept_params
# Parameters from ACCEPT header entry for the media-range matching this writer.
# @yield [writer] `self`
# @yieldparam [RDF::Writer] writer
# @yieldreturn [void]
def initialize(output = $stdout, **options, &block)
@output, @options = output, options.dup
@nodes, @node_id, @node_id_map = {}, 0, {}
if block_given?
write_prologue
case block.arity
when 1 then block.call(self)
else instance_eval(&block)
end
write_epilogue
end
end
##
# Any additional options for this writer.
#
# @return [Hash]
# @since 0.2.2
attr_reader :options
##
# Returns the base URI used for this writer.
#
# @example
# writer.prefixes[:dc] #=> RDF::URI('http://purl.org/dc/terms/')
#
# @return [RDF::URI]
# @since 0.3.4
def base_uri
RDF::URI(@options[:base_uri]) if @options[:base_uri]
end
##
# Returns the URI prefixes currently defined for this writer.
#
# @example
# writer.prefixes[:dc] #=> RDF::URI('http://purl.org/dc/terms/')
#
# @return [Hash{Symbol => RDF::URI}]
# @since 0.2.2
def prefixes
@options[:prefixes] ||= {}
end
##
# Defines the given URI prefixes for this writer.
#
# @example
# writer.prefixes = {
# dc: RDF::URI('http://purl.org/dc/terms/'),
# }
#
# @param [Hash{Symbol => RDF::URI}] prefixes
# @return [Hash{Symbol => RDF::URI}]
# @since 0.3.0
def prefixes=(prefixes)
@options[:prefixes] = prefixes
end
##
# Defines the given named URI prefix for this writer.
#
# @example Defining a URI prefix
# writer.prefix :dc, RDF::URI('http://purl.org/dc/terms/')
#
# @example Returning a URI prefix
# writer.prefix(:dc) #=> RDF::URI('http://purl.org/dc/terms/')
#
# @overload prefix(name, uri)
# @param [Symbol, #to_s] name
# @param [RDF::URI, #to_s] uri
#
# @overload prefix(name)
# @param [Symbol, #to_s] name
#
# @return [RDF::URI]
def prefix(name, uri = nil)
name = name.to_s.empty? ? nil : (name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym)
uri.nil? ? prefixes[name] : prefixes[name] = uri
end
alias_method :prefix!, :prefix
##
# Returns the encoding of the output stream.
#
# @return [Encoding]
def encoding
case @options[:encoding]
when String, Symbol
Encoding.find(@options[:encoding].to_s)
when Encoding
@options[:encoding]
else
@options[:encoding] ||= Encoding.find(self.class.format.content_encoding.to_s)
end
end
##
# Returns `true` if statements and terms should be validated.
#
# @return [Boolean] `true` or `false`
# @since 1.0.8
def validate?
@options[:validate]
end
##
# Returns `true` if terms should be in canonical form.
#
# @note This is for term canonicalization, for graph/dataset canonicalization use `RDF::Normalize`.
#
# @return [Boolean] `true` or `false`
# @since 1.0.8
def canonicalize?
@options[:canonicalize]
end
##
# Flushes the underlying output buffer.
#
# @return [self]
def flush
@output.flush if @output.respond_to?(:flush)
self
end
alias_method :flush!, :flush
##
# @return [self]
# @abstract
def write_prologue
@logged_errors_at_prolog = log_statistics[:error].to_i
self
end
##
# @return [self]
# @raise [RDF::WriterError] if errors logged during processing.
# @abstract
def write_epilogue
if log_statistics[:error].to_i > @logged_errors_at_prolog
raise RDF::WriterError, "Errors found during processing"
end
self
end
##
# @param [String] text
# @return [self]
# @abstract
def write_comment(text)
self
end
##
# Add a statement to the writer. This will check to ensure that the statement is complete (no nil terms) and is valid, if the `:validation` option is set.
#
# Additionally, it will de-duplicate BNode terms sharing a common identifier.
#
# @param [RDF::Statement] statement
# @return [self]
# @note logs error if attempting to write an invalid {RDF::Statement} or if canonicalizing a statement which cannot be canonicalized.
def write_statement(statement)
statement = statement.canonicalize! if canonicalize?
# Make sure BNodes in statement use unique identifiers
if statement.node?
statement.to_quad.map do |term|
if term.is_a?(RDF::Node)
term = term.original while term.original
@nodes[term] ||= begin
# Account for duplicated nodes
@node_id_map[term.to_s] ||= term
if !@node_id_map[term.to_s].equal?(term)
# Rename node
term.make_unique!
@node_id_map[term.to_s] = term
end
end
else
term
end
end
statement = RDF::Statement.from(statement.to_quad)
end
if statement.incomplete?
log_error "Statement #{statement.inspect} is incomplete"
elsif validate? && statement.invalid?
log_error "Statement #{statement.inspect} is invalid"
elsif respond_to?(:write_quad)
write_quad(*statement.to_quad)
else
write_triple(*statement.to_triple)
end
self
rescue ArgumentError => e
log_error e.message
end
alias_method :insert_statement, :write_statement # support the RDF::Writable interface
##
# @param [Array<Array(RDF::Resource, RDF::URI, RDF::Term)>] triples
# @return [self]
# @note logs error if attempting to write an invalid {RDF::Statement} or if canonicalizing a statement which cannot be canonicalized.
def write_triples(*triples)
triples.each { |triple| write_triple(*triple) }
self
end
##
# @param [RDF::Resource] subject
# @param [RDF::URI] predicate
# @param [RDF::Term] object
# @return [self]
# @raise [NotImplementedError] unless implemented in subclass
# @note logs error if attempting to write an invalid {RDF::Statement} or if canonicalizing a statement which cannot be canonicalized.
# @abstract
def write_triple(subject, predicate, object)
raise NotImplementedError.new("#{self.class}#write_triple") # override in subclasses
end
##
# @param [RDF::Term] term
# @return [String]
# @since 0.3.0
def format_term(term, **options)
case term
when String then format_literal(RDF::Literal(term, **options), **options)
when RDF::List then format_list(term, **options)
when RDF::Literal then format_literal(term, **options)
when RDF::URI then format_uri(term, **options)
when RDF::Node then format_node(term, **options)
when RDF::Statement then format_quotedTriple(term, **options)
else nil
end
end
##
# @param [RDF::Node] value
# @param [Hash{Symbol => Object}] options = ({})
# @option options [Boolean] :unique_bnodes (false)
# Serialize node using unique identifier, rather than any used to create the node.
# @return [String]
# @raise [NotImplementedError] unless implemented in subclass
# @abstract
def format_node(value, **options)
raise NotImplementedError.new("#{self.class}#format_node") # override in subclasses
end
##
# @param [RDF::URI] value
# @param [Hash{Symbol => Object}] options = ({})
# @return [String]
# @raise [NotImplementedError] unless implemented in subclass
# @abstract
def format_uri(value, **options)
raise NotImplementedError.new("#{self.class}#format_uri") # override in subclasses
end
##
# @param [RDF::Literal, String, #to_s] value
# @param [Hash{Symbol => Object}] options = ({})
# @return [String]
# @raise [NotImplementedError] unless implemented in subclass
# @abstract
def format_literal(value, **options)
raise NotImplementedError.new("#{self.class}#format_literal") # override in subclasses
end
##
# @param [RDF::List] value
# @param [Hash{Symbol => Object}] options = ({})
# @return [String]
# @abstract
# @since 0.2.3
def format_list(value, **options)
format_term(value.subject, **options)
end
##
# Formats a referenced triple.
#
# @example
# <<<s> <p> <o>>> <p> <o> .
#
# @param [RDF::Statement] value
# @param [Hash{Symbol => Object}] options = ({})
# @return [String]
# @raise [NotImplementedError] unless implemented in subclass
# @abstract
def format_quotedTriple(value, **options)
raise NotImplementedError.new("#{self.class}#format_statement") # override in subclasses
end
protected
##
# @return [void]
def puts(*args)
@output.puts(*args.map {|s| s.encode(encoding)})
end
##
# @param [RDF::Resource] term
# @return [String]
def uri_for(term)
case
when term.is_a?(RDF::Node)
@nodes[term] ||= term.to_base
when term.respond_to?(:to_uri)
term.to_uri.to_s
else
term.to_s
end
end
##
# @return [String]
def node_id
"_:n#{@node_id += 1}"
end
##
# @param [String] string
# @return [String]
def escaped(string)
string.gsub('\\', '\\\\\\\\').
gsub("\b", '\\b').
gsub("\f", '\\f').
gsub("\t", '\\t').
gsub("\n", '\\n').
gsub("\r", '\\r').
gsub('"', '\\"')
end
##
# @param [String] string
# @return [String]
def quoted(string)
"\"#{string}\""
end
private
@@subclasses = [] # @private
##
# @private
# @return [void]
def self.inherited(child)
@@subclasses << child
super
end
end # Writer
##
# The base class for RDF serialization errors.
class WriterError < IOError
end # WriterError
end # RDF