lib/rdf/mixin/enumerable.rb
module RDF
##
# An RDF statement enumeration mixin.
#
# Classes that include this module must implement an `#each` method that
# yields {RDF::Statement RDF statements}.
#
# @example Checking whether any statements exist
# enumerable.empty?
#
# @example Checking how many statements exist
# enumerable.count
#
# @example Checking whether a specific statement exists
# enumerable.statement?(RDF::Statement(subject, predicate, object))
# enumerable.triple?([subject, predicate, object])
# enumerable.quad?([subject, predicate, object, graph_name])
#
# @example Checking whether a specific value exists
# enumerable.subject?(RDF::URI("https://rubygems.org/gems/rdf"))
# enumerable.predicate?(RDF::RDFS.label)
# enumerable.object?(RDF::Literal("A Ruby library for working with Resource Description Framework (RDF) data.", language: :en))
# enumerable.graph?(RDF::URI("http://ar.to/#self"))
#
# @example Enumerating all statements
# enumerable.each_statement do |statement|
# puts statement.inspect
# end
#
# @example Enumerating all statements in the form of triples
# enumerable.each_triple do |subject, predicate, object|
# puts [subject, predicate, object].inspect
# end
#
# @example Enumerating all statements in the form of quads
# enumerable.each_quad do |subject, predicate, object, graph_name|
# puts [subject, predicate, object, graph_name].inspect
# end
#
# @example Enumerating all terms
# enumerable.each_subject { |term| puts term.inspect }
# enumerable.each_predicate { |term| puts term.inspect }
# enumerable.each_object { |term| puts term.inspect }
# enumerable.each_term { |term| puts term.inspect }
#
# @example Obtaining all statements
# enumerable.statements #=> [RDF::Statement(subject1, predicate1, object1), ...]
# enumerable.triples #=> [[subject1, predicate1, object1], ...]
# enumerable.quads #=> [[subject1, predicate1, object1, graph_name1], ...]
#
# @example Obtaining all unique values
# enumerable.subjects(unique: true) #=> [subject1, subject2, subject3, ...]
# enumerable.predicates(unique: true) #=> [predicate1, predicate2, predicate3, ...]
# enumerable.objects(unique: true) #=> [object1, object2, object3, ...]
# enumerable.graph_names(unique: true) #=> [graph_name1, graph_name2, graph_name3, ...]
#
# @see RDF::Graph
# @see RDF::Repository
module Enumerable
extend RDF::Util::Aliasing::LateBound
include ::Enumerable
include RDF::Countable # NOTE: must come after ::Enumerable
# Extends Enumerator with {Queryable} and {Enumerable}, which is used by {Enumerable#each_statement} and {Queryable#enum_for}
class Enumerator < ::Enumerator
include RDF::Queryable
include RDF::Enumerable
##
# @return [Array]
# @note Make sure returned arrays are also queryable
def to_a
return super.to_a.extend(RDF::Queryable, RDF::Enumerable)
end
end
##
# Returns `true` if this enumerable supports the given `feature`.
#
# Supported features include:
# * `:graph_name` supports statements with a graph_name, allowing multiple named graphs
# * `:inference` supports RDFS inferrence of queryable contents.
# * `:literal_equality' preserves [term-equality](https://www.w3.org/TR/rdf11-concepts/#dfn-literal-term-equality) for literals. Literals are equal only if their lexical values and datatypes are equal, character by character. Literals may be "inlined" to value-space for efficiency only if `:literal_equality` is `false`.
# * `:validity` allows a concrete Enumerable implementation to indicate that it does or does not support valididty checking. By default implementations are assumed to support validity checking.
# * `:skolemize` supports [Skolemization](https://www.w3.org/wiki/BnodeSkolemization) of an `Enumerable`. Implementations supporting this feature must implement a `#skolemize` method, taking a base URI used for minting URIs for BNodes as stable identifiers and a `#deskolemize` method, also taking a base URI used for turning URIs having that prefix back into the same BNodes which were originally skolemized.
# * `:quoted_triples` supports RDF 1.2 quoted triples.
# * `:base_direction` supports RDF 1.2 directional language-tagged strings.
#
# @param [Symbol, #to_sym] feature
# @return [Boolean]
# @since 0.3.5
def supports?(feature)
feature == :validity || feature == :literal_equality
end
##
# Returns `true` if all statements are valid
#
# @return [Boolean] `true` or `false`
# @raise [NotImplementedError] unless enumerable supports validation
# @since 0.3.11
def valid?
raise NotImplementedError, "#{self.class} does not support validation" unless supports?(:validity)
each_statement do |s|
return false if s.invalid?
end
true
end
##
# Returns `true` if value is not valid
#
# @return [Boolean] `true` or `false`
# @raise [NotImplementedError] unless enumerable supports validation
# @since 0.2.1
def invalid?
!valid?
end
##
# Default validate! implementation, overridden in concrete classes
# @return [RDF::Enumerable] `self`
# @raise [ArgumentError] if the value is invalid
# @since 0.3.9
def validate!
raise ArgumentError if supports?(:validity) && invalid?
self
end
alias_method :validate, :validate!
##
# Returns all RDF statements.
#
# @param [Hash{Symbol => Boolean}] options
# @return [Array<RDF::Statement>]
# @see #each_statement
# @see #enum_statement
def statements(**options)
enum_statement.to_a
end
##
# @overload statement?
# Returns `false` indicating this is not an RDF::Statemenet.
# @return [Boolean]
# @see RDF::Value#statement?
# @overload statement?(statement)
# Returns `true` if `self` contains the given RDF statement.
#
# @param [RDF::Statement] statement
# @return [Boolean]
def statement?(*args)
case args.length
when 0 then false
when 1
args.first && !enum_statement.find { |s| s.eql?(args.first) }.nil?
else raise ArgumentError("wrong number of arguments (given #{args.length}, expected 0 or 1)")
end
end
alias_method :has_statement?, :statement?
alias_method :include?, :statement?
##
# Iterates the given block for each RDF statement.
#
# If no block was given, returns an enumerator.
#
# The order in which statements are yielded is undefined.
#
# @overload each_statement
# @yield [statement]
# each statement
# @yieldparam [RDF::Statement] statement
# @yieldreturn [void] ignored
# @return [void]
#
# @overload each_statement
# @return [Enumerator<RDF::Statement>]
#
# @see #enum_statement
def each_statement(&block)
if block_given?
# Invoke {#each} in the containing class:
each(&block)
end
enum_statement
end
##
# Returns an enumerator for {RDF::Enumerable#each_statement}.
# FIXME: enum_for doesn't seem to be working properly
# in JRuby 1.7, so specs are marked pending
#
# @return [Enumerator<RDF::Statement>]
# @see #each_statement
def enum_statement
# Ensure that statements are queryable, countable and enumerable
this = self
Queryable::Enumerator.new do |yielder|
this.send(:each_statement) {|y| yielder << y}
end
end
alias_method :enum_statements, :enum_statement
##
# Returns all RDF triples.
#
# @param [Hash{Symbol => Boolean}] options
# @return [Array<Array(RDF::Resource, RDF::URI, RDF::Term)>]
# @see #each_triple
# @see #enum_triple
def triples(**options)
enum_statement.map(&:to_triple) # TODO: optimize
end
##
# Returns `true` if `self` contains the given RDF triple.
#
# @param [Array(RDF::Resource, RDF::URI, RDF::Term)] triple
# @return [Boolean]
def triple?(triple)
triples.include?(triple)
end
alias_method :has_triple?, :triple?
##
# Iterates the given block for each RDF triple.
#
# If no block was given, returns an enumerator.
#
# The order in which triples are yielded is undefined.
#
# @overload each_triple
# @yield [subject, predicate, object]
# each triple
# @yieldparam [RDF::Resource] subject
# @yieldparam [RDF::URI] predicate
# @yieldparam [RDF::Term] object
# @yieldreturn [void] ignored
# @return [void]
#
# @overload each_triple
# @return [Enumerator<Array(RDF::Resource, RDF::URI, RDF::Term)>]
#
# @see #enum_triple
def each_triple
if block_given?
each_statement do |statement|
yield(*statement.to_triple)
end
end
enum_triple
end
##
# Returns an enumerator for {RDF::Enumerable#each_triple}.
#
# @return [Enumerator<Array(RDF::Resource, RDF::URI, RDF::Term)>]
# @see #each_triple
def enum_triple
Countable::Enumerator.new do |yielder|
each_triple {|s, p, o| yielder << [s, p, o]}
end
end
alias_method :enum_triples, :enum_triple
##
# Returns all RDF quads.
#
# @param [Hash{Symbol => Boolean}] options
# @return [Array<Array(RDF::Resource, RDF::URI, RDF::Term, RDF::Resource)>]
# @see #each_quad
# @see #enum_quad
def quads(**options)
enum_statement.map(&:to_quad) # TODO: optimize
end
##
# Returns `true` if `self` contains the given RDF quad.
#
# @param [Array(RDF::Resource, RDF::URI, RDF::Term, RDF::Resource)] quad
# @return [Boolean]
def quad?(quad)
quads.include?(quad)
end
alias_method :has_quad?, :quad?
##
# Iterates the given block for each RDF quad.
#
# If no block was given, returns an enumerator.
#
# The order in which quads are yielded is undefined.
#
# @overload each_quad
# @yield [subject, predicate, object, graph_name]
# each quad
# @yieldparam [RDF::Resource] subject
# @yieldparam [RDF::URI] predicate
# @yieldparam [RDF::Term] object
# @yieldparam [RDF::Resource] graph_name
# @yieldreturn [void] ignored
# @return [void]
#
# @overload each_quad
# @return [Enumerator<Array(RDF::Resource, RDF::URI, RDF::Term, RDF::Resource)>]
#
# @see #enum_quad
def each_quad
if block_given?
each_statement do |statement|
yield(*statement.to_quad)
end
end
enum_quad
end
##
# Returns an enumerator for {RDF::Enumerable#each_quad}.
#
# @return [Enumerator<Array(RDF::Resource, RDF::URI, RDF::Term, RDF::Resource)>]
# @see #each_quad
def enum_quad
Countable::Enumerator.new do |yielder|
each_quad {|s, p, o, c| yielder << [s, p, o, c]}
end
end
alias_method :enum_quads, :enum_quad
##
# Returns all unique RDF subject terms.
#
# @param unique (true)
# @return [Array<RDF::Resource>]
# @see #each_subject
# @see #enum_subject
def subjects(unique: true)
unless unique
enum_statement.map(&:subject) # TODO: optimize
else
enum_subject.to_a
end
end
##
# Returns `true` if `self` contains the given RDF subject term.
#
# @param [RDF::Resource] value
# @return [Boolean]
def subject?(value)
enum_subject.include?(value)
end
alias_method :has_subject?, :subject?
##
# Iterates the given block for each unique RDF subject term.
#
# If no block was given, returns an enumerator.
#
# The order in which values are yielded is undefined.
#
# @overload each_subject
# @yield [subject]
# each subject term
# @yieldparam [RDF::Resource] subject
# @yieldreturn [void] ignored
# @return [void]
#
# @overload each_subject
# @return [Enumerator<RDF::Resource>]
# @see #enum_subject
def each_subject
if block_given?
values = {}
each_statement do |statement|
value = statement.subject
unless value.nil? || values.include?(value.to_s)
values[value.to_s] = true
yield value
end
end
end
enum_subject
end
##
# Returns an enumerator for {RDF::Enumerable#each_subject}.
#
# @return [Enumerator<RDF::Resource>]
# @see #each_subject
def enum_subject
enum_for(:each_subject)
end
alias_method :enum_subjects, :enum_subject
##
# Returns all unique RDF predicate terms.
#
# @param unique (true)
# @return [Array<RDF::URI>]
# @see #each_predicate
# @see #enum_predicate
def predicates(unique: true)
unless unique
enum_statement.map(&:predicate) # TODO: optimize
else
enum_predicate.to_a
end
end
##
# Returns `true` if `self` contains the given RDF predicate term.
#
# @param [RDF::URI] value
# @return [Boolean]
def predicate?(value)
enum_predicate.include?(value)
end
alias_method :has_predicate?, :predicate?
##
# Iterates the given block for each unique RDF predicate term.
#
# If no block was given, returns an enumerator.
#
# The order in which values are yielded is undefined.
#
# @overload each_predicate
# @yield [predicate]
# each predicate term
# @yieldparam [RDF::URI] predicate
# @yieldreturn [void] ignored
# @return [void]
#
# @overload each_predicate
# @return [Enumerator<RDF::URI>]
# @see #enum_predicate
def each_predicate
if block_given?
values = {}
each_statement do |statement|
value = statement.predicate
unless value.nil? || values.include?(value.to_s)
values[value.to_s] = true
yield value
end
end
end
enum_predicate
end
##
# Returns an enumerator for {RDF::Enumerable#each_predicate}.
#
# @return [Enumerator<RDF::URI>]
# @see #each_predicate
def enum_predicate
enum_for(:each_predicate)
end
alias_method :enum_predicates, :enum_predicate
##
# Returns all unique RDF object terms.
#
# @param unique (true)
# @return [Array<RDF::Term>]
# @see #each_object
# @see #enum_object
def objects(unique: true)
unless unique
enum_statement.map(&:object) # TODO: optimize
else
enum_object.to_a
end
end
##
# Returns `true` if `self` contains the given RDF object term.
#
# @param [RDF::Term] value
# @return [Boolean]
def object?(value)
enum_object.include?(value)
end
alias_method :has_object?, :object?
##
# Iterates the given block for each unique RDF object term.
#
# If no block was given, returns an enumerator.
#
# The order in which values are yielded is undefined.
#
# @overload each_object
# @yield [object]
# each object term
# @yieldparam [RDF::Term] object
# @yieldreturn [void] ignored
# @return [void]
#
# @overload each_object
# @return [Enumerator<RDF::Term>]
#
# @see #enum_object
def each_object # FIXME: deduplication
if block_given?
values = {}
each_statement do |statement|
value = statement.object
unless value.nil? || values.include?(value)
values[value] = true
yield value
end
end
end
enum_object
end
##
# Returns an enumerator for {RDF::Enumerable#each_object}.
#
# @return [Enumerator<RDF::Term>]
# @see #each_object
def enum_object
enum_for(:each_object)
end
alias_method :enum_objects, :enum_object
##
# Returns all unique RDF terms (subjects, predicates, objects, and graph_names).
#
# @example finding all Blank Nodes used within an enumerable
# enumberable.terms.select(&:node?)
#
# @param unique (true)
# @return [Array<RDF::Resource>]
# @since 2.0
# @see #each_resource
# @see #enum_resource
def terms(unique: true)
unless unique
enum_statement.
map(&:terms).
flatten.
compact
else
enum_term.to_a
end
end
##
# @overload term?
# Returns `false` indicating this is not an RDF::Statemenet.
# @see RDF::Value#statement?
# @return [Boolean]
# @overload term?(value)
# Returns `true` if `self` contains the given RDF subject term.
#
# @param [RDF::Resource] value
# @return [Boolean]
# @since 2.0
def term?(*args)
case args.length
when 0 then super
when 1 then args.first && enum_term.include?(args.first)
else raise ArgumentError("wrong number of arguments (given #{args.length}, expected 0 or 1)")
end
end
alias_method :has_term?, :term?
##
# Iterates the given block for each unique RDF term (subject, predicate, object, or graph_name).
#
# If no block was given, returns an enumerator.
#
# The order in which values are yielded is undefined.
#
# @overload each_term
# @yield [term]
# each term
# @yieldparam [RDF::Term] term
# @yieldreturn [void] ignored
# @return [void]
#
# @overload each_term
# @return [Enumerator<RDF::Term>]
# @since 2.0
# @see #enum_term
def each_term
if block_given?
values = {}
each_statement do |statement|
statement.terms.each do |value|
unless values.include?(value.hash)
values[value.hash] = true
yield value
end
end
end
end
enum_term
end
##
# Returns an enumerator for {RDF::Enumerable#each_term}.
#
# @return [Enumerator<RDF::Term>]
# @see #each_term
# @since 2.0
def enum_term
enum_for(:each_term)
end
alias_method :enum_terms, :enum_term
##
# Returns all unique RDF graph names, other than the default graph.
#
# @param unique (true)
# @return [Array<RDF::Resource>]
# @see #each_graph
# @see #enum_graph
# @since 2.0
def graph_names(unique: true)
unless unique
enum_statement.map(&:graph_name).compact # TODO: optimize
else
enum_graph.map(&:graph_name).compact
end
end
##
# Returns `true` if `self` contains the given RDF graph_name.
#
# @param [RDF::Resource, false] graph_name
# Use value `false` to query for the default graph_name
# @return [Boolean]
def graph?(graph_name)
enum_statement.any? {|s| s.graph_name == graph_name}
end
alias_method :has_graph?, :graph?
##
# Limits statements to be from a specific graph.
#
# If no block was given, returns an enumerator.
#
# The order in which statements are yielded is undefined.
#
# @overload project_graph(graph_name)
# @param [RDF::Resource, nil] graph_name
# The name of the graph from which statements are taken.
# Use `nil` for the default graph.
# @yield [statement]
# each statement
# @yieldparam [RDF::Statement] statement
# @yieldreturn [void] ignored
# @return [void]
#
# @overload project_graph(graph_name)
# @param [RDF::Resource, false] graph_name
# The name of the graph from which statements are taken.
# Use `false` for the default graph.
# @return [Enumerable]
#
# @see #each_statement
# @since 3.0
def project_graph(graph_name)
if block_given?
self.each do |statement|
yield statement if statement.graph_name == graph_name
end
else
# Ensure that statements are queryable, countable and enumerable
this = self
Queryable::Enumerator.new do |yielder|
this.send(:project_graph, graph_name) {|y| yielder << y}
end
end
end
##
# Iterates the given block for each RDF graph in `self`.
#
# If no block was given, returns an enumerator.
#
# The order in which graphs are yielded is undefined.
#
# @overload each_graph
# @yield [graph]
# each graph
# @yieldparam [RDF::Graph] graph
# @yieldreturn [void] ignored
# @return [void]
#
# @overload each_graph
# @return [Enumerator<RDF::Graph>]
#
# @see #enum_graph
# @since 0.1.9
def each_graph
if block_given?
yield RDF::Graph.new(graph_name: nil, data: self)
# FIXME: brute force, repositories should override behavior
if supports?(:graph_name)
enum_statement.map(&:graph_name).uniq.compact.each do |graph_name|
yield RDF::Graph.new(graph_name: graph_name, data: self)
end
end
end
enum_graph
end
##
# Returns an enumerator for {RDF::Enumerable#each_graph}.
#
# @return [Enumerator<RDF::Graph>]
# @see #each_graph
# @since 0.1.9
def enum_graph
enum_for(:each_graph)
end
alias_method :enum_graphs, :enum_graph
##
# Enumerates each statement using its canonical representation.
#
# @note This is updated by `RDF::Normalize` to also canonicalize blank nodes.
#
# @return [RDF::Enumerable]
def canonicalize
this = self
Enumerable::Enumerator.new do |yielder|
this.send(:each_statement) {|y| yielder << y.canonicalize}
end
end
##
# Mutating canonicalization not supported
#
# @raise NotImplementedError
def canonicalize!
raise NotImplementedError, "Canonicalizing enumerables not supported"
end
##
# Returns all RDF statements in `self` as an array.
#
# Mixes in `RDF::Enumerable` into the returned object.
#
# @return [Array]
def to_a
super.extend(RDF::Enumerable)
end
##
# Returns all RDF statements in `self` as a set.
#
# Mixes in `RDF::Enumerable` into the returned object.
#
# @return [Set]
# @since 0.2.0
def to_set
require 'set' unless defined?(::Set)
super.extend(RDF::Enumerable)
end
##
# Returns all RDF object terms indexed by their subject and predicate
# terms.
#
# The return value is a `Hash` instance that has the structure:
# `{subject => {predicate => [*objects]}}`.
#
# @return [Hash]
def to_h
result = {}
each_statement do |statement|
result[statement.subject] ||= {}
values = (result[statement.subject][statement.predicate] ||= [])
values << statement.object unless values.include?(statement.object)
end
result
end
##
# Returns a serialized string representation of `self`.
#
# Before calling this method you may need to explicitly require a
# serialization extension for the specified format.
#
# @example Serializing into N-Triples format
# require 'rdf/ntriples'
# ntriples = enumerable.dump(:ntriples)
#
# @param [Array<Object>] args
# if the last argument is a hash, it is passed as options to
# {RDF::Writer.dump}.
# @return [String]
# @see RDF::Writer.dump
# @raise [RDF::WriterError] if no writer found
# @since 0.2.0
def dump(*args, **options)
writer = RDF::Writer.for(*args)
raise RDF::WriterError, "No writer found using #{args.inspect}" unless writer
writer.dump(self, nil, **options)
end
protected
##
# @overload #to_writer
# Implements #to_writer for each available instance of {RDF::Writer},
# based on the writer symbol.
#
# @return [String]
# @see {RDF::Writer.sym}
def method_missing(meth, *args)
writer = RDF::Writer.for(meth.to_s[3..-1].to_sym) if meth.to_s[0,3] == "to_"
if writer
writer.buffer(standard_prefixes: true) {|w| w << self}
else
super
end
end
##
# @note this instantiates an writer; it could probably be done more
# efficiently by refactoring `RDF::Reader` and/or `RDF::Format` to expose
# a list of valid format symbols.
def respond_to_missing?(name, include_private = false)
return RDF::Writer.for(name.to_s[3..-1].to_sym) if name.to_s[0,3] == 'to_'
super
end
##
# @private
# @param [Symbol, #to_sym] method
# @return [Enumerator]
# @see Object#enum_for
def enum_for(method = :each, *args)
# Ensure that enumerators are, themselves, queryable
this = self
Enumerable::Enumerator.new do |yielder|
this.send(method, *args) {|*y| yielder << (y.length > 1 ? y : y.first)}
end
end
alias_method :to_enum, :enum_for
end # Enumerable
end # RDF