lib/rdf/cli.rb
require 'rdf'
require 'rdf/ntriples'
require 'rdf/nquads'
require 'rdf/vocab/writer'
require 'logger'
require 'optparse'
begin
require 'linkeddata'
rescue LoadError
# Silently load without linkeddata, but try some others
%w(
json/ld
ld/patch
rdf/microdata
rdf/n3
rdf/ordered_repo
rdf/rdfa
rdf/rdfxml
rdf/reasoner
rdf/tabular
rdf/trig
rdf/trix
rdf/turtle
rdf/vocab
rdf/xsd
shacl
shex
yaml_ld
).each do |ser|
begin
require ser
rescue LoadError
end
end
end
class OptionParser
# Actual parsed options
def options; @options || {}; end
def options=(value); @options = value; end
# Arguments remaining after extracting options
def args; @args || []; end
def args=(value); @args = value; end
end
module RDF
# Individual formats can modify options by updating {Reader.options} or {Writer.options}. Format-specific commands are taken from {Format.cli_commands} for each loaded format, which returns an array of lambdas taking arguments and options.
#
# Status updates should be logged to `opts[:logger].info`. More complicated information can be added to `:messages` key within `opts`, if present.
#
# Other than `help`, all commands parse an input file.
#
# Multiple commands may be added in sequence to execute a pipeline.
#
# @example Creating Reader-specific options:
# class Reader
# def self.options
# [
# RDF::CLI::Option.new(
# symbol: :canonicalize,
# on: ["--canonicalize"],
# description: "Canonicalize URI/literal forms.") {true},
# RDF::CLI::Option.new(
# symbol: :uri,
# on: ["--uri STRING"],
# description: "URI.") {|v| RDF::URI(v)},
# ]
# end
#
# @example Creating Format-specific commands:
# class Format
# def self.cli_commands
# {
# count: {
# description: "",
# parse: true,
# lambda: ->(argv, opts) {}
# },
# }
# end
#
# @example Adding a command manually
# class MyCommand
# RDF::CLI.add_command(:count, description: "Count statements") do |argv, opts|
# count = 0
# RDF::CLI.parse(argv, opts) do |reader|
# reader.each_statement do |statement|
# count += 1
# end
# end
# options[:logger].info "Parsed #{count} statements"
# end
# end
#
# Format-specific commands should verify that the reader and/or output format are appropriate for the command.
class CLI
# Option description for use within Readers/Writers. See {RDF::Reader.options} and {RDF::Writer.options} for example usage.
class Option
# Symbol used for this option when calling `Reader.new`
# @return [Symbol]
attr_reader :symbol
# Arguments passed to OptionParser#on
# @return [Array<String>]
attr_reader :on
# Description of this option (optional)
# @return [String]
attr_reader :description
# Default value for this option
# @return [Object]
attr_reader :default
# Potential values (for select or radio) or Ruby datatype
# @return [Class, Array<String>]
attr_reader :datatype
# Associated HTML form control
# @return [:text, :textarea, :radio, :checkbox, :select, :url, :url2, :none]
attr_reader :control
# Use of this option
# @return [:optional, :disabled, :removed, :required]
attr_accessor :use
##
# Create a new option with optional callback.
#
# @param [Symbol] symbol
# @param [Array<String>] on
# @param [String] datatype
# @param [Object] default
# @param [String] control
# @param [String] description
# @param [[:optional, :disabled, :removed, :required]] use
# @yield value which may be used within `OptionParser#on`
# @yieldparam [Object] value The option value as parsed using `on` argument
# @yieldparam [OptionParser] options (nil) optional OptionParser
# @yieldreturn [Object] a possibly modified input value
def initialize(symbol: nil, on: nil, datatype: nil, control: nil,
description: nil, use: :optional, default: nil, **options, &block)
raise ArgumentError, "symbol is a required argument" unless symbol
raise ArgumentError, "on is a required argument" unless on
@symbol, @on, @datatype, @control, @description, @use, @default, @callback = symbol.to_sym, Array(on), datatype, control, description, use, default, block
end
def call(arg, options = {})
if @callback
case @callback.arity
when 0 then @callback.call
when 1 then @callback.call(arg)
when 2 then @callback.call(arg, options)
else arg
end
else
arg
end
end
# Return version of commands appropriate for use in JSON
def to_hash
{
symbol: symbol,
datatype: (datatype.is_a?(Class) ? datatype.name : datatype),
default: default,
control: control,
description: description,
use: use
}
end
end
# Built-in commands. Other commands are imported from the Format class of different readers/writers using {RDF::Format#cli_commands}. `COMMANDS` is a Hash who's keys are commands that may be executed by {RDF::CLI.exec}. The value is a hash containing the following keys:
# * `description` used for providing information about the command.
# * `parse` Boolean value to determine if input files should automatically be parsed into `repository`.
# * `help` used for the CLI help output.
# * `lambda` code run to execute command.
# * `filter` value is a Hash whose keys are matched against selected command options. All specified `key/value` pairs are compared against the equivalent key in the current invocation.
# If an Array, option value (as a string) must match any value of the array (as a string)
# If a Proc, it is passed the option value and must return `true`.
# Otherwise, the option value (as a string) must equal the `value` (as a string).
# * `control` Used to indicate how (if) command is displayed
# * `repository` Use this repository, if set
# * `options` an optional array of `RDF::CLI::Option` describing command-specific options.
# * `option_use`: A hash of option symbol to option usage, used for overriding the default status of an option for this command.
# @return [Hash{Symbol => Hash{Symbol => Object}}]
COMMANDS = {
count: {
description: "Count statements in parsed input",
parse: false,
control: :none,
help: "count [options] [args...]\nreturns number of parsed statements",
lambda: ->(argv, opts) do
unless repository.count > 0
start = Time.new
count = 0
self.parse(argv, **opts) do |reader|
reader.each_statement do |statement|
count += 1
end
end
secs = Time.new - start
opts[:output].puts "Parsed #{count} statements with #{@readers.join(', ')} in #{secs} seconds @ #{count/secs} statements/second."
end
end,
option_use: {output_format: :disabled}
},
help: {
description: "This message",
parse: false,
control: :none,
lambda: ->(argv, opts) {self.usage(self.options)}
},
lengths: {
description: "Lengths of each parsed statement",
parse: true,
control: :none,
help: "lengths [options] [args...]\nreturns lengths of each parsed statement",
lambda: ->(argv, opts) do
opts[:output].puts "Lengths"
repository.each_statement do |statement|
opts[:output].puts statement.to_s.size
end
end,
option_use: {output_format: :disabled}
},
objects: {
description: "Serialize each parsed object to N-Triples",
parse: true,
control: :none,
help: "objects [options] [args...]\nreturns unique objects serialized in N-Triples format",
lambda: ->(argv, opts) do
opts[:output].puts "Objects"
repository.each_object do |object|
opts[:output].puts object.to_ntriples
end
end,
option_use: {output_format: :disabled}
},
predicates: {
parse: true,
description: "Serialize each parsed predicate to N-Triples",
control: :none,
help: "predicates [options] [args...]\nreturns unique predicates serialized in N-Triples format",
lambda: ->(argv, opts) do
opts[:output].puts "Predicates"
repository.each_predicate do |predicate|
opts[:output].puts predicate.to_ntriples
end
end,
option_use: {output_format: :disabled}
},
serialize: {
description: "Serialize using output-format (or N-Triples)",
parse: true,
help: "serialize [options] [args...]\nserialize output using specified format (or N-Triples if not specified)",
lambda: ->(argv, opts) do
writer_class = RDF::Writer.for(opts[:output_format]) || RDF::NTriples::Writer
out = opts[:output]
writer_opts = {prefixes: {}, standard_prefixes: true}.merge(opts)
writer_class.new(out, **writer_opts) do |writer|
writer << repository
end
end
},
subjects: {
parse: true,
control: :none,
description: "Serialize each parsed subject to N-Triples",
help: "subjects [options] [args...]\nreturns unique subjects serialized in N-Triples format",
lambda: ->(argv, opts) do
opts[:output].puts "Subjects"
repository.each_subject do |subject|
opts[:output].puts subject.to_ntriples
end
end,
option_use: {output_format: :disabled}
},
validate: {
description: "Validate parsed input",
control: :none,
parse: true,
help: "validate [options] [args...]\nvalidates resulting repository (may also be used with --validate to check for parse-time errors)",
lambda: ->(argv, opts) do
opts[:output].puts "Input is " + (repository.valid? ? "" : "in") + "valid"
end,
option_use: {output_format: :disabled}
}
}
# Options to setup, may be modified by selected command. Options are also read from {RDF::Reader#options} and {RDF::Writer#options}. When a specific input- or ouput-format is selected, options are also discovered from the associated subclass reader or writer.
# @return [Array<RDF::CLI::Option>]
OPTIONS = ([
RDF::CLI::Option.new(
symbol: :debug,
control: :checkbox,
datatype: TrueClass,
on: ["-d", "--debug"],
description: 'Enable debug output for troubleshooting.'),
RDF::CLI::Option.new(
symbol: :verbose,
control: :checkbox,
datatype: TrueClass,
on: ['-v', '--verbose'],
description: 'Enable verbose output. May be given more than once.'),
RDF::CLI::Option.new(
symbol: :evaluate,
control: :none,
datatype: TrueClass,
on: ["-e", "--evaluate STRING"],
description: "Evaluate argument as RDF input, if no files are specified"),
RDF::CLI::Option.new(
symbol: :output,
control: :none,
on: ["-o", "--output FILE"],
description: "File to write output, defaults to STDOUT") {|arg| File.open(arg, "w")},
RDF::CLI::Option.new(
symbol: :ordered,
control: :checkbox,
datatype: TrueClass,
on: ["--ordered"],
description: "Use order preserving repository"),
RDF::CLI::Option.new(
symbol: :format,
control: :select,
datatype: RDF::Format.select {|ft| ft.reader}.map(&:to_sym).sort,
on: ["--input-format FORMAT", "--format FORMAT"],
description: "Format of input file, uses heuristic if not specified"
) do |arg, options|
unless reader = RDF::Reader.for(arg.downcase.to_sym)
RDF::CLI.abort "No reader found for #{arg.downcase.to_sym}. Available readers:\n #{RDF::CLI.formats(reader: true).join("\n ")}"
end
# Add format-specific reader options
reader.options.each do |cli_opt|
next if options.options.key?(cli_opt.symbol)
on_args = cli_opt.on || []
on_args << cli_opt.description if cli_opt.description
options.on(*on_args) do |opt_arg|
options.options[cli_opt.symbol] = cli_opt.call(opt_arg, options)
end
end if reader
arg.downcase.to_sym
end,
RDF::CLI::Option.new(
symbol: :output_format,
control: :select,
datatype: RDF::Format.select {|ft| ft.writer}.map(&:to_sym).sort,
on: ["--output-format FORMAT"],
description: "Format of output file, defaults to NTriples"
) do |arg, options|
unless writer = RDF::Writer.for(arg.downcase.to_sym)
RDF::CLI.abort "No writer found for #{arg.downcase.to_sym}. Available writers:\n #{self.formats(writer: true).join("\n ")}"
end
# Add format-specific writer options
writer.options.each do |cli_opt|
next if options.options.key?(cli_opt.symbol)
on_args = cli_opt.on || []
on_args << cli_opt.description if cli_opt.description
options.on(*on_args) do |opt_arg|
options.options[cli_opt.symbol] = cli_opt.call(opt_arg, options)
end
end if writer
arg.downcase.to_sym
end,
] + RDF::Reader.options + RDF::Writer.options).uniq(&:symbol)
class << self
# Repository containing parsed statements
# @return [RDF::Repository]
attr_accessor :repository
end
##
# @return [String]
def self.basename() File.basename($0) end
##
# Return OptionParser set with appropriate options
#
# The yield return should provide one or more commands from which additional options will be extracted.
# @overload options(argv)
# @param [Array<String>] argv
# @return [OptionParser]
# @overload options(argv, format: :json)
# @param [Array<String>] argv
# @param [:json] format (:json)
# @return [Array<RDF::CLI::Option>]
# Returns discovered options
def self.options(argv, format: nil)
options = OptionParser.new
cli_opts = OPTIONS.map(&:dup)
logger = Logger.new($stderr)
logger.level = Logger::WARN
logger.formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
opts = options.options = {logger: logger}
# Pre-load commands
load_commands
# Add options for the specified command(s)
cmds, args = argv.partition {|e| COMMANDS.include?(e.to_sym)}
cmds.each do |cmd|
Array(RDF::CLI::COMMANDS[cmd.to_sym][:options]).each do |option|
# Replace any existing option with the same symbol
cli_opts.delete_if {|cli_opt| cli_opt.symbol == option.symbol}
# Add the option, unless disabled or removed
cli_opts.unshift(option)
end
# Update usage of options for this command
RDF::CLI::COMMANDS[cmd.to_sym].fetch(:option_use, {}).each do |sym, use|
if opt = cli_opts.find {|cli_opt| cli_opt.symbol == sym}
opt.use = use
end
end
end
cli_opts.each do |cli_opt|
next if opts.key?(cli_opt.symbol)
on_args = cli_opt.on || []
on_args << cli_opt.description if cli_opt.description
options.on(*on_args) do |arg|
opts[cli_opt.symbol] = cli_opt.call(arg, options)
end
end
if format == :json
# Return options
cli_opts.map(&:to_hash)
else
options.banner = "Usage: #{self.basename} command+ [options] [args...]"
options.on_tail('-V', '--version', 'Display the RDF.rb version and exit.') do
puts RDF::VERSION; exit(0)
end
show_help = false
options.on_tail("-h", "--help", "Show this message") do
show_help = true
end
begin
args = options.parse!(args)
rescue OptionParser::InvalidOption, OptionParser::InvalidArgument, ArgumentError => e
abort e
end
# Make sure options are processed first
if show_help
self.usage(options); exit(0)
end
options.args = cmds + args
options
end
end
##
# Output usage message
def self.usage(options, cmd_opts: {}, banner: nil)
options.banner = banner if banner
$stdout.puts options
$stdout.puts "Note: available commands and options may be different depending on selected --input-format and/or --output-format."
$stdout.puts "Available commands:\n\t#{self.commands(**options.options).join("\n\t")}"
$stdout.puts "Available formats:\n\t#{(self.formats).join("\n\t")}"
end
##
# Execute one or more commands, parsing input as necessary
#
# @param [Array<String>] args
# @param [IO] output
# @param [OptionParser] option_parser
# @param [Hash{Symbol => Hash{Symbol => Array[String]}}] messages used for conveying non primary-output which is structured.
# @param [Hash{Symbol => Object}] options
# @return [Boolean]
def self.exec(args, output: $stdout, option_parser: nil, messages: {}, **options)
option_parser ||= self.options(args)
options[:logger] ||= option_parser.options[:logger]
output.set_encoding(Encoding::UTF_8) if output.respond_to?(:set_encoding) && RUBY_PLATFORM == "java"
# Separate commands from file options; arguments already extracted
cmds, args = args.partition {|e| COMMANDS.include?(e.to_sym)}
if cmds.empty?
usage(option_parser)
raise ArgumentError, "No command given"
end
if cmds.first == 'help'
on_cmd = cmds[1]
cmd_opts = COMMANDS.fetch(on_cmd.to_s.to_sym, {})
if on_cmd && cmd_opts[:help]
usage(option_parser, cmd_opts: cmd_opts, banner: "Usage: #{self.basename.split('/').last} #{COMMANDS[on_cmd.to_sym][:help]}")
elsif on_cmd
usage(option_parser, cmd_opts: cmd_opts)
else
usage(option_parser)
end
return
end
# Make sure any selected command isn't filtered out
cmds.each do |c|
COMMANDS[c.to_sym].fetch(:filter, {}).each do |opt, val|
case val
when Array
unless val.map(&:to_s).include?(options[opt].to_s)
usage(option_parser, banner: "Command #{c.inspect} requires #{opt} in #{val.map(&:to_s).inspect}, not #{options.fetch(opt, 'null')}")
raise ArgumentError, "Incompatible command #{c} used with option #{opt}=#{options[opt]}"
end
when Proc
unless val.call(options[opt])
usage(option_parser, banner: "Command #{c.inspect} #{opt} inconsistent with #{options.fetch(opt, 'null')}")
raise ArgumentError, "Incompatible command #{c} used with option #{opt}=#{options[opt]}"
end
else
unless val.to_s == options[opt].to_s
usage(option_parser, banner: "Command #{c.inspect} requires compatible value for #{opt}, not #{options.fetch(opt, 'null')}")
raise ArgumentError, "Incompatible command #{c} used with option #{opt}=#{options[opt]}"
end
end
end
# The command may specify a repository instance to use
options[:repository] ||= COMMANDS[c.to_sym][:repository]
end
# Hacks for specific options
options[:logger].level = Logger::INFO if options[:verbose]
options[:logger].level = Logger::DEBUG if options[:debug]
options[:format] = options[:format].to_sym if options[:format]
options[:output_format] = options[:output_format].to_sym if options[:output_format]
# Allow repository to be set via option.
# If RDF::OrderedRepo is present, use it if the `ordered` option is specified, otherwise extend an Array.
@repository = options[:repository] || case
when RDF.const_defined?(:OrderedRepo) then RDF::OrderedRepo.new
when options[:ordered] then [].extend(RDF::Enumerable, RDF::Queryable)
else RDF::Repository.new
end
# Parse input files if any command requires it
if cmds.any? {|c| COMMANDS[c.to_sym][:parse]}
start = Time.new
count = 0
self.parse(args, **options) do |reader|
reader.each_statement {|st| @repository << st}
# Remember prefixes from reading
options[:prefixes] ||= reader.prefixes
end
secs = Time.new - start
options[:logger].info "Parsed #{repository.count} statements with #{@readers.join(', ')} in #{secs} seconds @ #{count/secs} statements/second."
end
# Run each command in sequence
cmds.each do |command|
COMMANDS[command.to_sym][:lambda].call(args,
output: output,
messages: messages,
**options.merge(repository: repository))
end
# Normalize messages
messages.each do |kind, term_messages|
case term_messages
when Hash
when Array
messages[kind] = {result: term_messages}
else
messages[kind] = {result: [term_messages]}
end
end
if options[:statistics]
options[:statistics][:reader] = @readers.first unless (@readers || []).empty?
options[:statistics][:count] = @repository.count
end
end
##
# @overload commands(**options)
# @param [Hash{Symbol => Object}] options already set
# @return [Array<String>] list of executable commands
# @overload commands(format: :json, **options)
# Returns commands as JSON, for API usage.
# @param [:json] format
# @param [Hash{Symbol => Object}] options already set
# @return [Array{Object}]
# Returns an array of commands including the command symbol
def self.commands(format: nil, **options)
# First, load commands from other formats
load_commands
case format
when :json
COMMANDS.map do |k, v|
v = v.merge(symbol: k, options: v.fetch(:options, []).map(&:to_hash))
v.delete(:lambda)
v.delete(:help)
v.delete(:options) if v[:options].empty?
v[:control] == :none ? nil : v
end.compact
else
# Subset commands based on filter options
cmds = COMMANDS.reject do |k, c|
c.fetch(:filter, {}).any? do |opt, val|
case val
when Array
!val.map(&:to_s).include?(options[opt].to_s)
when Proc
!val.call(options[opt])
else
val.to_s != options[opt].to_s
end
end
end
sym_len = cmds.keys.map {|k| k.to_s.length}.max
cmds.keys.sort.map do |k|
"%*s: %s" % [sym_len, k, cmds[k][:description]]
end
end
end
##
# Load commands from formats
# @return [Hash{Symbol => Hash{Symbol => Object}}]
def self.load_commands
unless @commands_loaded
RDF::Format.each do |format|
format.cli_commands.each do |command, options|
options = {lambda: options} unless options.is_a?(Hash)
add_command(command, **options)
end
end
@commands_loaded = true
end
COMMANDS
end
##
# Add a command.
#
# @param [#to_sym] command
# @param [Hash{Symbol => String}] options
# @option options [String] description
# @option options [String] help string to display for help
# @option options [Boolean] parse parse input files in to Repository, or not.
# @option options [Array<RDF::CLI::Option>] options specific to this command
# @yield argv, opts
# @yieldparam [Array<String>] argv
# @yieldparam [Hash] opts
# @yieldreturn [void]
def self.add_command(command, **options, &block)
options[:lambda] = block if block_given?
COMMANDS[command.to_sym] ||= options
end
##
# @return [Array<String>] list of available formats
def self.formats(reader: false, writer: false)
f = RDF::Format.sort_by(&:to_sym).
select {|ft| (reader ? ft.reader : (writer ? ft.writer : (ft.reader || ft.writer)))}.
inject({}) do |memo, r|
memo.merge(r.to_sym => r.name)
end
sym_len = f.keys.map {|k| k.to_s.length}.max
f.map {|s, t| "%*s: %s" % [sym_len, s, t]}
end
##
# Parse each file, $stdin or specified string in `options[:evaluate]`
# yielding a reader
#
# @param [Array<String>] files
# @param [String] evaluate from command-line, rather than referenced file
# @param [Symbol] format (:ntriples) Reader symbol for finding reader
# @param [Encoding] encoding set on the input
# @param [Hash{Symbol => Object}] options sent to reader
# @yield [reader]
# @yieldparam [RDF::Reader]
# @return [nil]
def self.parse(files, evaluate: nil, format: nil, encoding: Encoding::UTF_8, **options, &block)
if files.empty?
# If files are empty, either use options[:execute]
input = evaluate ? StringIO.new(evaluate) : $stdin
input.set_encoding(encoding )
if !format
sample = input.read
input.rewind
end
r = RDF::Reader.for(format|| {sample: sample})
raise ArgumentError, "Unknown format for evaluated input" unless r
(@readers ||= []) << r
r.new(input, **options) do |reader|
yield(reader)
end
else
options[:format] = format if format
files.each do |file|
RDF::Reader.open(file, **options) do |reader|
(@readers ||= []) << reader.class.to_s
yield(reader)
end
end
end
end
##
# @param [String] msg
# @return [void]
def self.abort(msg)
Kernel.abort "#{basename}: #{msg}"
end
end
end