lib/roadie/document.rb
# frozen_string_literal: true
module Roadie
# The main entry point for Roadie. A document represents a working unit and
# is built with the input HTML and the configuration options you need.
#
# A Document must never be used from two threads at the same time. Reusing
# Documents is discouraged.
#
# Stylesheets are added to the HTML from three different sources:
# 1. Stylesheets inside the document ( +<style>+ elements)
# 2. Stylesheets referenced by the DOM ( +<link>+ elements)
# 3. The internal stylesheet (see {#add_css})
#
# The internal stylesheet is used last and gets the highest priority. The
# rest is used in the same order as browsers are supposed to use them.
#
# The execution methods are {#transform} and {#transform_partial}.
#
# @attr [#call] before_transformation Callback to call just before {#transform}ation begins. Will be called with the parsed DOM tree and the {Document} instance.
# @attr [#call] after_transformation Callback to call just before {#transform}ation is completed. Will be called with the current DOM tree and the {Document} instance.
class Document
attr_reader :html, :asset_providers, :external_asset_providers
# URL options. If none are given no URL rewriting will take place.
# @see UrlGenerator#initialize
attr_accessor :url_options
attr_accessor :before_transformation, :after_transformation
# Should CSS that cannot be inlined be kept in a new `<style>` element in `<head>`?
attr_accessor :keep_uninlinable_css
# Merge media queries to increase performance and reduce email size if enabled.
# This will change specificity in some cases, like for example:
# @media(max-width: 600px) { .col-6 { display: block; } }
# @media(max-width: 400px) { .col-12 { display: inline-block; } }
# @media(max-width: 600px) { .col-12 { display: block; } }
# will become
# @media(max-width: 600px) { .col-6 { display: block; } .col-12 { display: block; } }
# @media(max-width: 400px) { .col-12 { display: inline-block; } }
# which would change the styling on the page
attr_accessor :merge_media_queries
# Integer representing a bitmap set of options used by Nokogiri during serialization.
# For the complete set of available options look into +Nokogiri::XML::Node::SaveOptions+.
attr_reader :serialization_options
# The mode to generate markup in. Valid values are `:html` (default) and `:xhtml`.
attr_reader :mode
# @param [String] html the input HTML
def initialize(html)
@keep_uninlinable_css = true
@merge_media_queries = true
@serialization_options =
Nokogiri::XML::Node::SaveOptions::NO_DECLARATION |
Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
@html = html
@asset_providers = ProviderList.wrap(FilesystemProvider.new)
@external_asset_providers = ProviderList.empty
@css = +""
@mode = :html
end
# Append additional CSS to the document's internal stylesheet.
# @param [String] new_css
def add_css(new_css)
@css << "\n\n" << new_css
end
# Transform the input HTML as a full document and returns the processed
# HTML.
#
# Before the transformation begins, the {#before_transformation} callback
# will be called with the parsed HTML tree and the {Document} instance, and
# after all work is complete the {#after_transformation} callback will be
# invoked in the same way.
#
# Most of the work is delegated to other classes. A list of them can be
# seen below.
#
# @see MarkupImprover MarkupImprover (improves the markup of the DOM)
# @see Inliner Inliner (inlines the stylesheets)
# @see UrlRewriter UrlRewriter (rewrites URLs and makes them absolute)
# @see #transform_partial Transforms partial documents (fragments)
#
# @return [String] the transformed HTML
def transform
dom = Nokogiri::HTML.parse html
callback before_transformation, dom
improve dom
inline dom, keep_uninlinable_in: :head
rewrite_urls dom
callback after_transformation, dom
remove_ignore_markers dom
serialize_document dom
end
# Transform the input HTML as a HTML fragment/partial and returns the
# processed HTML.
#
# Before the transformation begins, the {#before_transformation} callback
# will be called with the parsed HTML tree and the {Document} instance, and
# after all work is complete the {#after_transformation} callback will be
# invoked in the same way.
#
# The main difference between this and {#transform} is that this does not
# treat the HTML as a full document and does not try to fix it by adding
# doctypes, {<head>} elements, etc.
#
# Most of the work is delegated to other classes. A list of them can be
# seen below.
#
# @see Inliner Inliner (inlines the stylesheets)
# @see UrlRewriter UrlRewriter (rewrites URLs and makes them absolute)
# @see #transform Transforms full documents
#
# @return [String] the transformed HTML
def transform_partial
dom = Nokogiri::HTML.fragment html
callback before_transformation, dom
inline dom, keep_uninlinable_in: :root
rewrite_urls dom
callback after_transformation, dom
serialize_document dom
end
# Assign new normal asset providers. The supplied list will be wrapped in a {ProviderList} using {ProviderList.wrap}.
def asset_providers=(list)
@asset_providers = ProviderList.wrap(list)
end
# Assign new external asset providers. The supplied list will be wrapped in a {ProviderList} using {ProviderList.wrap}.
def external_asset_providers=(list)
@external_asset_providers = ProviderList.wrap(list)
end
# Integer representing a bitmap set of options used by Nokogiri during serialization.
# For the complete set of available options look into +Nokogiri::XML::Node::SaveOptions+.
# (To change the mode in which the document is generated use {#mode=} however.)
def serialization_options=(options)
@serialization_options = options || 0
end
# Change the mode. The mode affects how the resulting markup is generated.
#
# Valid modes:
# `:html` (default)
# `:xhtml`
# `:xml`
def mode=(mode)
if VALID_MODES.include?(mode)
@mode = mode
else
raise ArgumentError, "Invalid mode #{mode.inspect}. Valid modes are: #{VALID_MODES.inspect}"
end
end
private
VALID_MODES = %i[html xhtml xml].freeze
private_constant :VALID_MODES
def stylesheet
Stylesheet.new "(Document styles)", @css
end
def improve(dom)
MarkupImprover.new(dom, html).improve
end
def inline(dom, options = {})
keep_uninlinable_in = options.fetch(:keep_uninlinable_in)
dom_stylesheets = AssetScanner.new(dom, asset_providers, external_asset_providers).extract_css
Inliner.new(dom_stylesheets + [stylesheet], dom).inline(
keep_uninlinable_css: keep_uninlinable_css,
keep_uninlinable_in: keep_uninlinable_in,
merge_media_queries: merge_media_queries
)
end
def rewrite_urls(dom)
make_url_rewriter.transform_dom(dom)
end
def serialize_document(dom)
# #dup is called since it fixed a few segfaults in certain versions of Nokogiri
save_options = Nokogiri::XML::Node::SaveOptions
format = {
html: save_options::AS_HTML,
xhtml: save_options::AS_XHTML,
xml: save_options::AS_XML
}.fetch(mode)
dom.dup.to_html(save_with: (serialization_options | format))
end
def make_url_rewriter
if url_options
UrlRewriter.new(UrlGenerator.new(url_options))
else
NullUrlRewriter.new
end
end
def callback(callable, dom)
if callable.respond_to?(:call)
callable.call(dom, self)
end
end
def remove_ignore_markers(dom)
dom.css("[data-roadie-ignore]").each do |node|
node.remove_attribute "data-roadie-ignore"
end
end
end
end