lib/nokogiri/xml/sax/document.rb
# frozen_string_literal: true
module Nokogiri
module XML
###
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
# dealing with XML. If you want to do SAX style parsing using HTML, check out
# Nokogiri::HTML4::SAX.
#
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
# events we're interested in, then giving the parser some XML to process. The parser will notify
# you when it encounters events you said you would like to know about.
#
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
# methods for which you would like notification.
#
# For example, if I want to be notified when a document ends, and when an element starts, I
# would write a class like this:
#
# class MyDocument < Nokogiri::XML::SAX::Document
# def end_document
# puts "the document has ended"
# end
#
# def start_element name, attributes = []
# puts "#{name} started"
# end
# end
#
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
#
# # Create a new parser
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
#
# # Feed the parser some XML
# parser.parse(File.open(ARGV[0]))
#
# Now my document handler will be called when each node starts, and when then document ends. To
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
#
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
module SAX
###
# This class is used for registering types of events you are interested in handling. All of
# the methods on this class are available as possible events while parsing an XML document. To
# register for any particular event, just subclass this class and implement the methods you
# are interested in knowing about.
#
# To only be notified about start and end element events, write a class like this:
#
# class MyDocument < Nokogiri::XML::SAX::Document
# def start_element name, attrs = []
# puts "#{name} started!"
# end
#
# def end_element name
# puts "#{name} ended"
# end
# end
#
# You can use this event handler for any SAX style parser included with Nokogiri. See
# Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
class Document
###
# Called when an XML declaration is parsed
def xmldecl(version, encoding, standalone)
end
###
# Called when document starts parsing
def start_document
end
###
# Called when document ends parsing
def end_document
end
###
# Called at the beginning of an element
# * +name+ is the name of the tag
# * +attrs+ are an assoc list of namespaces and attributes, e.g.:
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
def start_element(name, attrs = [])
end
###
# Called at the end of an element
# +name+ is the tag name
def end_element(name)
end
###
# Called at the beginning of an element
# +name+ is the element name
# +attrs+ is a list of attributes
# +prefix+ is the namespace prefix for the element
# +uri+ is the associated namespace URI
# +ns+ is a hash of namespace prefix:urls associated with the element
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
###
# Deal with SAX v1 interface
name = [prefix, name].compact.join(":")
attributes = ns.map do |ns_prefix, ns_uri|
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
end + attrs.map do |attr|
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
end
start_element(name, attributes)
end
###
# Called at the end of an element
# +name+ is the element's name
# +prefix+ is the namespace prefix associated with the element
# +uri+ is the associated namespace URI
def end_element_namespace(name, prefix = nil, uri = nil)
###
# Deal with SAX v1 interface
end_element([prefix, name].compact.join(":"))
end
###
# Characters read between a tag. This method might be called multiple
# times given one contiguous string of characters.
#
# +string+ contains the character data
def characters(string)
end
###
# Called when comments are encountered
# +string+ contains the comment data
def comment(string)
end
###
# Called on document warnings
# +string+ contains the warning
def warning(string)
end
###
# Called on document errors
# +string+ contains the error
def error(string)
end
###
# Called when cdata blocks are found
# +string+ contains the cdata content
def cdata_block(string)
end
###
# Called when processing instructions are found
# +name+ is the target of the instruction
# +content+ is the value of the instruction
def processing_instruction(name, content)
end
end
end
end
end