bin/wayback_archiver
#!/usr/bin/env ruby
require 'optparse'
require 'wayback_archiver'
# Default values
urls = nil
strategy = 'auto'
log = STDOUT
log_level = Logger::INFO
concurrency = WaybackArchiver.concurrency
limit = WaybackArchiver.max_limit
hosts = []
optparse = OptionParser.new do |parser|
parser.banner = 'Usage: wayback_archiver [<url>] [options]'
parser.on('--auto', 'Auto (default)') do |value|
strategy = 'auto'
end
parser.on('--crawl', 'Crawl') do |value|
strategy = 'crawl'
end
parser.on('--sitemap', 'Sitemap') do |value|
strategy = 'sitemap'
end
parser.on('--urls', '--url', 'URL(s)') do |value|
strategy = 'urls'
end
parser.on('--hosts=[example.com]', Array, 'Only spider links on certain hosts') do |value|
hosts = value.map { |v| Regexp.new(v) } if value
end
parser.on('--concurrency=1', Integer, 'Concurrency') do |value|
concurrency = value
end
parser.on('--limit=5', Integer, 'Max number of URLs to archive') do |value|
limit = value
end
parser.on('--log=output.log', String, 'Path to desired log file (if no argument is given it defaults to STDOUT)') do |path|
log = path
end
parser.on('--[no-]verbose', 'Verboes logs') do |value|
log_level = value ? Logger::DEBUG : Logger::WARN
end
parser.on('-h', '--help', 'How to use') do
puts parser
exit
end
# No argument, shows at tail. This will print an options summary.
parser.on_tail('-h', '--help', 'Show this message') do
puts parser
exit
end
parser.on_tail('--version', 'Show version') do
puts "WaybackArchiver version #{WaybackArchiver::VERSION}"
exit
end
end
optparse.parse!
urls = ARGV.map(&:strip).reject(&:empty?)
if urls.empty?
puts optparse.help
raise ArgumentError, "[<url>] is required"
end
WaybackArchiver.logger = Logger.new(log).tap do |logger|
logger.progname = 'WaybackArchiver'
logger.level = log_level
end
# If no strategy has explicitly been given, then default to 'auto'
strategy ||= 'auto'
urls.each do |url|
WaybackArchiver.archive(
url,
hosts: hosts,
strategy: strategy,
concurrency: concurrency,
limit: limit
)
end