lib/ronin/web/cli/commands/new/spider.rb
# frozen_string_literal: true
#
# Copyright (c) 2006-2023 Hal Brodigan (postmodern.mod3 at gmail.com)
#
# ronin-web is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ronin-web is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
#
require 'ronin/web/cli/command'
require 'ronin/web/root'
require 'ronin/core/cli/generator'
module Ronin
module Web
class CLI
module Commands
class New < Command
#
# Creates a new [ronin-web-spider] script.
#
# [ronin-web-spider]: https://github.com/ronin-rb/ronin-web-spider#readme
#
# ## Usage
#
# ronin-web spider [options] {--host[=HOST] | --domain[=DOMAIN] | --site[=URL]} FILE
#
# ## Options
#
# --host[=[HOST]] Spiders a host
# --domain[=[DOMAIN]] Spiders a domain
# --site[=[URL]] Spiders a site
# --every-link Adds a callback for every link
# --every-url Adds a callback for every URL
# --every-failed-url Adds a callback for every failed URL
# --every-url-like /REGEXP/ Adds a callback for every URL that matches the regexp
# --all-headers Adds a callback for all HTTP Headers
# --every-page Adds a callback for every page
# --every-ok-page Adds a callback for every HTTP 200 page
# --every-redirect-page Adds a callback for every redirect page
# --every-timedout-page Adds a callback for every timedout page
# --every-bad-request-page Adds a callback for every bad request page
# --every-unauthorized-page Adds a callback for every unauthorized page
# --every-forbidden-page Adds a callback for every forbidden page
# --every-missing-page Adds a callback for every missing page
# --every-internal-server-error-page
# Adds a callback for every internal server error page
# --every-txt-page Adds a callback for every TXT page
# --every-html-page Adds a callback for every HTML page
# --every-xml-page Adds a callback for every XML page
# --every-xsl-page Adds a callback for every XSL page
# --every-javascript-page Adds a callback for every JavaScript page
# --every-css-page Adds a callback for every CSS page
# --every-rss-page Adds a callback for every RSS page
# --every-atom-page Adds a callback for every Atom page
# --every-ms-word-page Adds a callback for every MS Wod page
# --every-pdf-page Adds a callback for every PDF page
# --every-zip-page Adds a callback for every ZIP page
# --every-doc Adds a callback for every HTML/XML document
# --every-html-doc Adds a callback for every HTML document
# --every-xml-doc Adds a callback for every XML document
# --every-xsl-doc Adds a callback for every XSL document
# --every-rss-doc Adds a callback for every RSS document
# --every-atom-doc Adds a callback for every Atom document
# -h, --help Print help information
#
# ## Arguments
#
# FILE The file to create
#
class Spider < Command
include Core::CLI::Generator
template_dir File.join(ROOT,'data','new')
usage '[options] {--host[=HOST] | --domain[=DOMAIN] | --site[=URL]} FILE'
option :host, equals: true,
value: {
type: String,
usage: 'HOST',
required: false
},
desc: 'Spiders a host' do |value|
@entry_point_method = :host
@entry_point_argument = value
end
option :domain, equals: true,
value: {
type: String,
usage: 'DOMAIN',
required: false
},
desc: 'Spiders a domain' do |value|
@entry_point_method = :domain
@entry_point_argument = value
end
option :site, equals: true,
value: {
type: String,
usage: 'URL',
required: false
},
desc: 'Spiders a site' do |value|
@entry_point_method = :site
@entry_point_argument = value
end
option :every_link, desc: 'Adds a callback for every link' do
@callbacks << [:every_link, [], :link]
end
option :every_url, desc: 'Adds a callback for every URL' do
@callbacks << [:every_url, [], :url]
end
option :every_failed_url, desc: 'Adds a callback for every failed URL' do
@callbacks << [:every_failed_url, [], :url]
end
option :every_url_like, value: {
type: Regexp,
usage: '/REGEXP/'
},
desc: 'Adds a callback for every URL that matches the regexp' do |regexp|
@callbacks << [:every_url_like, [regexp], :url]
end
option :all_headers, desc: 'Adds a callback for all HTTP Headers' do
@callbacks << [:all_headers, [], :headers]
end
option :every_page, desc: 'Adds a callback for every page' do
@callbacks << [:every_page, [], :page]
end
option :every_ok_page, desc: 'Adds a callback for every HTTP 200 page' do
@callbacks << [:every_ok_page, [], :page]
end
option :every_redirect_page, desc: 'Adds a callback for every redirect page' do
@callbacks << [:every_redirect_page, [], :page]
end
option :every_timedout_page, desc: 'Adds a callback for every timedout page' do
@callbacks << [:every_timedout_page, [], :page]
end
option :every_bad_request_page, desc: 'Adds a callback for every bad request page' do
@callbacks << [:every_bad_request_page, [], :page]
end
option :every_unauthorized_page, desc: 'Adds a callback for every unauthorized page' do
@callbacks << [:every_unauthorized_page, [], :page]
end
option :every_forbidden_page, desc: 'Adds a callback for every forbidden page' do
@callbacks << [:every_forbidden_page, [], :page]
end
option :every_missing_page, desc: 'Adds a callback for every missing page' do
@callbacks << [:every_missing_page, [], :page]
end
option :every_internal_server_error_page, desc: 'Adds a callback for every internal server error page' do
@callbacks << [:every_internal_server_error_page, [], :page]
end
option :every_txt_page, desc: 'Adds a callback for every TXT page' do
@callbacks << [:every_txt_page, [], :page]
end
option :every_html_page, desc: 'Adds a callback for every HTML page' do
@callbacks << [:every_html_page, [], :page]
end
option :every_xml_page, desc: 'Adds a callback for every XML page' do
@callbacks << [:every_xml_page, [], :page]
end
option :every_xsl_page, desc: 'Adds a callback for every XSL page' do
@callbacks << [:every_xsl_page, [], :page]
end
option :every_javascript_page, desc: 'Adds a callback for every JavaScript page' do
@callbacks << [:every_javascript_page, [], :page]
end
option :every_css_page, desc: 'Adds a callback for every CSS page' do
@callbacks << [:every_css_page, [], :page]
end
option :every_rss_page, desc: 'Adds a callback for every RSS page' do
@callbacks << [:every_rss_page, [], :page]
end
option :every_atom_page, desc: 'Adds a callback for every Atom page' do
@callbacks << [:every_atom_page, [], :page]
end
option :every_ms_word_page, desc: 'Adds a callback for every MS Wod page' do
@callbacks << [:every_ms_wod_page, [], :page]
end
option :every_pdf_page, desc: 'Adds a callback for every PDF page' do
@callbacks << [:every_pdf_page, [], :page]
end
option :every_zip_page, desc: 'Adds a callback for every ZIP page' do
@callbacks << [:every_zip_page, [], :page]
end
option :every_doc, desc: 'Adds a callback for every HTML/XML document' do
@callbacks << [:every_doc, [], :doc]
end
option :every_html_doc, desc: 'Adds a callback for every HTML document' do
@callbacks << [:every_html_doc, [], :doc]
end
option :every_xml_doc, desc: 'Adds a callback for every XML document' do
@callbacks << [:every_xml_doc, [], :doc]
end
option :every_xsl_doc, desc: 'Adds a callback for every XSL document' do
@callbacks << [:every_xsl_doc, [], :doc]
end
option :every_rss_doc, desc: 'Adds a callback for every RSS document' do
@callbacks << [:every_rss_doc, [], :doc]
end
option :every_atom_doc, desc: 'Adds a callback for every Atom document' do
@callbacks << [:every_atom_doc, [], :doc]
end
argument :file, required: true,
desc: 'The file to create'
description 'Creates a new ronin-web-spider script'
man_page 'ronin-web-new-spider.1'
# The entry point method for the web spider agent.
#
# @return [Symbol, nil]
attr_reader :entry_point_method
# The callbacks for the web spider agent.
#
# @return [Array<(Symbol, Array, Symbol)>]
attr_reader :callbacks
#
# Initializes the command.
#
# @param [Hash{Symbol => Object}] kwargs
# Additional keyword arguments for the command.
#
def initialize(**kwargs)
super(**kwargs)
@callbacks = []
end
#
# Runs the `ronin-web new spider` command.
#
# @param [String] path
# The path to the new script file to create.
#
def run(path)
unless @entry_point_method
print_error "must specify --host, --domain, or --site"
exit(-1)
end
erb 'spider.rb.erb', path
chmod '+x', path
end
#
# The entry method's argument.
#
# @return [String]
# The escaped entry point method argument or `ARGV[0]`.
#
def entry_point_argument
if @entry_point_argument
@entry_point_argument.inspect
else
'ARGV[0]'
end
end
end
end
end
end
end
end