rwz/normalize_url

View on GitHub
lib/normalize_url/normalizer.rb

Summary

Maintainability
A
1 hr
Test Coverage
require "set"
require "addressable/uri"

module NormalizeUrl
  class Normalizer
    attr_reader :uri, :options

    TRACKING_QUERY_PARAMS = %w[
      utm_source
      utm_medium
      utm_term
      utm_content
      utm_campaign
      sms_ss
      awesm
      xtor
      PHPSESSID
    ].to_set.freeze

    def initialize(original_uri, options={})
      @uri = Addressable::URI.parse(original_uri).normalize
      @options = options
      fail_uri "only absolute URLs can be normalized" unless uri.absolute?
      fail_uri "only HTTP/HTTPS URLs can be normalized" unless uri.scheme =~ /https?/
    rescue Addressable::URI::InvalidURIError
      fail_uri "#{original_uri.inspect} is not a URL"
    end

    def normalize
      process :remove_trailing_slash
      process :remove_repeating_slashes
      process :remove_hash
      process_query
      uri.to_s
    end

    private

    def process(step)
      send "process_#{step}" if process?(step)
    end

    def process_query
      query_values = uri.query_values
      return if query_values.nil?

      query_values = remove_params(query_values, TRACKING_QUERY_PARAMS) if process?(:remove_tracking)
      query_values = remove_params(query_values, params_to_remove) if process?(:remove_params)
      query_values = query_values.to_a unless process?(:sort_query)

      uri.query_values = query_values.empty? ? nil : query_values
    end

    def process?(step)
      options.fetch(step, true)
    end

    def process_remove_trailing_slash
      uri.path = uri.path.chomp(?/) unless uri.path == ?/
    end

    def process_remove_hash
      uri.fragment = nil
    end

    def process_remove_repeating_slashes
      uri.path = uri.path.squeeze(?/) if uri.host
    end

    def remove_params(query_values, params)
      query_values.reject{ |key, _| params.include?(key) }
    end

    def fail_uri(message)
      fail InvalidURIError, message
    end

    def params_to_remove
      Array(options.fetch(:remove_params, nil)).map(&:to_s)
    end
  end
end