ddfreyne/nanoc

View on GitHub
lib/nanoc/filters/colorize_syntax.rb

Summary

Maintainability
C
7 hrs
Test Coverage
module Nanoc::Filters
  # @api private
  class ColorizeSyntax < Nanoc::Filter
    identifier :colorize_syntax

    requires 'nokogiri', 'stringio', 'open3'

    # The default colorizer to use for a language if the colorizer for that
    # language is not overridden.
    DEFAULT_COLORIZER = :coderay

    # Syntax-highlights code blocks in the given content. Code blocks should
    # be enclosed in `pre` elements that contain a `code` element. The code
    # element should have an indication of the language the code is in. There
    # are two possible ways of adding such an indication:
    #
    # 1. A HTML class starting with `language-` and followed by the
    # code language, as specified by HTML5. For example, `<code class="language-ruby">`.
    #
    # 2. A comment on the very first line of the code block in the format
    # `#!language` where `language` is the language the code is in. For
    # example, `#!ruby`.
    #
    # Options for individual colorizers will be taken from the {#run}
    # options’ value for the given colorizer. For example, if the filter is
    # invoked with a `:coderay => coderay_options_hash` option, the
    # `coderay_options_hash` hash will be passed to the CodeRay colorizer.
    #
    # Currently, the following colorizers are supported:
    #
    # * `:coderay` for [Coderay](http://coderay.rubychan.de/)
    # * `:pygmentize` for [pygmentize](http://pygments.org/docs/cmdline/), the
    #   command-line frontend for [Pygments](http://pygments.org/)
    # * `:pygmentsrb` for [pygments.rb](https://github.com/tmm1/pygments.rb),
    #   a Ruby interface for [Pygments](http://pygments.org/)
    # * `:simon_highlight` for [Highlight](http://www.andre-simon.de/doku/highlight/en/highlight.html)
    # * `:rouge` for [Rouge](https://github.com/jayferd/rouge/)
    #
    # Additional colorizer implementations are welcome!
    #
    # @example Using a class to indicate type of code be highlighted
    #
    #     <pre><code class="language-ruby">
    #     def foo
    #       "asdf"
    #     end
    #     </code></pre>
    #
    # @example Using a comment to indicate type of code be highlighted
    #
    #     <pre><code>
    #     #!ruby
    #     def foo
    #       "asdf"
    #     end
    #     </code></pre>
    #
    # @example Invoking the filter with custom parameters
    #
    #     filter :colorize_syntax,
    #            :colorizers => { :ruby => :coderay },
    #            :coderay    => { :line_numbers => :list }
    #
    # @param [String] content The content to filter
    #
    # @option params [Symbol] :default_colorizer (DEFAULT_COLORIZER) The
    #   default colorizer, i.e. the colorizer that will be used when the
    #   colorizer is not overriden for a specific language.
    #
    # @option params [Symbol] :syntax (:html) The syntax to use, which can be
    #   `:html`, `:xml` or `:xhtml`, the latter two being the same.
    #
    # @option params [Hash] :colorizers ({}) A hash containing
    #   a mapping of programming languages (symbols, not strings) onto
    #   colorizers (symbols).
    #
    # @option params [Boolean] :outside_pre (false) `true` if the colorizer
    #   should be applied on `code` elements outside `pre` elements, false
    #   if only `code` elements inside` pre` elements should be colorized.
    #
    # @option params [Symbol] :is_fullpage (false) Whether to treat the input
    #   as a full HTML page or a page fragment. When true, HTML boilerplate
    #   such as the doctype, `html`, `head` and `body` elements will be added.
    #
    # @return [String] The filtered content
    def run(content, params = {})
      Nanoc::Extra::JRubyNokogiriWarner.check_and_warn

      # Take colorizers from parameters
      @colorizers = Hash.new(params[:default_colorizer] || DEFAULT_COLORIZER)
      (params[:colorizers] || {}).each_pair do |language, colorizer|
        @colorizers[language] = colorizer
      end

      # Determine syntax (HTML or XML)
      syntax = params[:syntax] || :html
      case syntax
      when :html
        klass = Nokogiri::HTML
      when :xml, :xhtml
        klass = Nokogiri::XML
      else
        raise "unknown syntax: #{syntax.inspect} (expected :html or :xml)"
      end

      # Colorize
      doc = parse(content, klass, params.fetch(:is_fullpage, false))
      selector = params[:outside_pre] ? 'code' : 'pre > code'
      doc.css(selector).each do |element|
        # Get language
        has_class = false
        language = nil
        if element['class']
          # Get language from class
          match = element['class'].match(/(^| )language-([^ ]+)/)
          language = match[2] if match
          has_class = true if language
        else
          # Get language from comment line
          match = element.inner_text.strip.split[0].match(/^#!([^\/][^\n]*)$/)
          language = match[1] if match
          element.content = element.content.sub(/^#!([^\/][^\n]*)$\n/, '') if language
        end

        # Give up if there is no hope left
        next if language.nil?

        # Highlight
        raw = strip(element.inner_text)
        highlighted_code = highlight(raw, language, params)
        element.children = Nokogiri::HTML.fragment(strip(highlighted_code), 'utf-8')

        # Add language-something class
        unless has_class
          klass = element['class'] || ''
          klass << ' ' unless [' ', nil].include?(klass[-1, 1])
          klass << "language-#{language}"
          element['class'] = klass
        end

        highlight_postprocess(language, element.parent)
      end

      method = "to_#{syntax}".to_sym
      doc.send(method, encoding: 'UTF-8')
    end

    # Parses the given content using the given class. This method also handles
    # an issue with Nokogiri on JRuby causing “cannot modify frozen string”
    # errors.
    #
    # @param [String] content The content to parse
    #
    # @param [Class] klass The Nokogiri parser class (either Nokogiri::HTML
    #   or Nokogiri::XML)
    #
    # @param [Boolean] is_fullpage true if the given content is a full page,
    #   false if it is a fragment
    def parse(content, klass, is_fullpage)
      if is_fullpage
        klass.parse(content, nil, 'UTF-8')
      else
        klass.fragment(content)
      end
    rescue => e
      if e.message =~ /can't modify frozen string/
        parse(content.dup, klass, is_fullpage)
      else
        raise e
      end
    end

    # Runs the code through [CodeRay](http://coderay.rubychan.de/).
    #
    # @param [String] code The code to colorize
    #
    # @param [String] language The language the code is written in
    #
    # @param [Hash] params Parameters to pass on to CodeRay
    #
    # @return [String] The colorized output
    def coderay(code, language, params = {})
      require 'coderay'

      ::CodeRay.scan(code, language).html(params)
    end

    # Returns the input itself, not performing any code highlighting.
    #
    # @param [String] code The code to colorize
    #
    # @param [String] language The language the code is written in (unused)
    #
    # @return [String] The colorized output, which is identical to the input
    #   in this case
    def dummy(code, language, params = {}) # rubocop:disable Lint/UnusedMethodArgument
      code
    end

    # Runs the content through [pygmentize](http://pygments.org/docs/cmdline/),
    # the command-line frontend for [Pygments](http://pygments.org/).
    #
    # @param [String] code The code to colorize
    #
    # @param [String] language The language the code is written in
    #
    # @option params [String, Symbol] :encoding The encoding of the code block
    #
    # @return [String] The colorized output
    def pygmentize(code, language, params = {})
      check_availability('pygmentize', '-V')

      params[:encoding] ||= 'utf-8'
      params[:nowrap] ||= 'True'

      cmd = ['pygmentize', '-l', language, '-f', 'html']
      cmd << '-O' << params.map { |k, v| "#{k}=#{v}" }.join(',') unless params.empty?

      stdout = StringIO.new
      stderr = $stderr
      piper = Nanoc::Extra::Piper.new(stdout: stdout, stderr: stderr)
      piper.run(cmd, code)

      stdout.string
    end

    # Runs the content through [Pygments](http://pygments.org/) via
    # [pygments.rb](https://github.com/tmm1/pygments.rb).
    #
    # @param [String] code The code to colorize
    #
    # @param [String] language The language the code is written in
    #
    # @return [String] The colorized output
    def pygmentsrb(code, language, params = {})
      require 'pygments'

      args = params.dup
      args[:lexer] ||= language
      args[:options] ||= {}
      args[:options][:encoding] ||= 'utf-8'
      args[:options][:nowrap] ||= 'True'

      Pygments.highlight(code, args)
    end

    SIMON_HIGHLIGHT_OPT_MAP = {
      wrap: '-W',
      include_style: '-I',
      line_numbers: '-l',
    }.freeze

    # Runs the content through [Highlight](http://www.andre-simon.de/doku/highlight/en/highlight.html).
    #
    # @param [String] code The code to colorize
    #
    # @param [String] language The language the code is written in
    #
    # @option params [String] :style The style to use
    #
    # @return [String] The colorized output
    def simon_highlight(code, language, params = {})
      check_availability('highlight', '--version')

      cmd = ['highlight', '--syntax', language, '--fragment']
      params.each do |key, _value|
        if SIMON_HIGHLIGHT_OPT_MAP[key]
          cmd << SIMON_HIGHLIGHT_OPT_MAP[key]
        else
          # TODO: allow passing other options
          case key
          when :style
            cmd << '--style' << params[:style]
          end
        end
      end

      stdout = StringIO.new
      stderr = $stderr
      piper = Nanoc::Extra::Piper.new(stdout: stdout, stderr: stderr)
      piper.run(cmd, code)

      stdout.string
    end

    # Wraps the element in <div class="CodeRay"><div class="code">
    def coderay_postprocess(_language, element)
      # Skip if we're a free <code>
      return if element.parent.nil?

      # <div class="code">
      div_inner = Nokogiri::XML::Node.new('div', element.document)
      div_inner['class'] = 'code'
      div_inner.children = element.dup

      # <div class="CodeRay">
      div_outer = Nokogiri::XML::Node.new('div', element.document)
      div_outer['class'] = 'CodeRay'
      div_outer.children = div_inner

      # orig element
      element.swap div_outer
    end

    # Runs the content through [Rouge](https://github.com/jayferd/rouge/.
    #
    # @param [String] code The code to colorize
    #
    # @param [String] language The language the code is written in
    #
    # @return [String] The colorized output
    def rouge(code, language, params = {})
      require 'rouge'

      if Rouge.version < '2' || params.fetch(:legacy, false)
        # Rouge 1.x or Rouge 2.x legacy options
        formatter_options = {
          css_class: params.fetch(:css_class, 'highlight'),
          inline_theme: params.fetch(:inline_theme, nil),
          line_numbers: params.fetch(:line_numbers, false),
          start_line: params.fetch(:start_line, 1),
          wrap: params.fetch(:wrap, false),
        }
        formatter_cls = Rouge::Formatters.const_get(Rouge.version < '2' ? 'HTML' : 'HTMLLegacy')
        formatter = formatter_cls.new(formatter_options)
      else
        formatter = params.fetch(:formatter, Rouge::Formatters::HTML.new)
      end

      lexer = Rouge::Lexer.find_fancy(language, code) || Rouge::Lexers::PlainText
      formatter.format(lexer.lex(code))
    end

    # Removes the double wrapping.
    #
    # Before:
    #
    #   <pre><code class="language-ruby"><pre class="highlight"><code>
    #
    # After:
    #
    #   <pre><code class="language-ruby highlight">
    def rouge_postprocess(_language, element)
      return if element.name != 'pre'

      code1 = element.xpath('code').first
      return if code1.nil?

      pre = code1.xpath('pre').first
      return if pre.nil?

      code2 = pre.xpath('code').first
      return if code2.nil?

      code1.inner_html = code2.inner_html
      code1['class'] = [code1['class'], pre['class']].compact.join(' ')
    end

    protected

    KNOWN_COLORIZERS = [:coderay, :dummy, :pygmentize, :pygmentsrb, :simon_highlight, :rouge].freeze

    # Removes the first blank lines and any whitespace at the end.
    def strip(s)
      s.lines.drop_while { |line| line.strip.empty? }.join.rstrip
    end

    def highlight(code, language, params = {})
      colorizer = @colorizers[language.to_sym]
      if KNOWN_COLORIZERS.include?(colorizer)
        send(colorizer, code, language, params[colorizer] || {})
      else
        raise "I don’t know how to highlight code using the “#{colorizer}” colorizer"
      end
    end

    def highlight_postprocess(language, element)
      colorizer = @colorizers[language.to_sym]
      if KNOWN_COLORIZERS.include?(colorizer)
        sym = (colorizer.to_s + '_postprocess').to_sym
        if respond_to?(sym)
          send(sym, language, element)
        end
      else
        raise "I don’t know how to highlight code using the “#{colorizer}” colorizer"
      end
    end

    def check_availability(*cmd)
      piper = Nanoc::Extra::Piper.new(stdout: StringIO.new, stderr: StringIO.new)
      piper.run(cmd, nil)
    end
  end
end