pivotal/LicenseFinder

View on GitHub
lib/license_finder/license/text.rb

Summary

Maintainability
A
0 mins
Test Coverage
# frozen_string_literal: true

module LicenseFinder
  class License
    module Text
      SPACES = /\s+/.freeze
      QUOTES = /['`"]{1,2}/.freeze
      YEAR_PLACEHOLDERS = /<year>/.freeze
      PLACEHOLDERS = /<[^<>]+>/.freeze
      SPECIAL_SINGLE_QUOTES = /[‘’]/.freeze
      SPECIAL_DOUBLE_QUOTES = /[“”„«»]/.freeze
      ALPHABET_ORDERED_LIST = /\\\([a-z]\\\)\\\s/.freeze
      ALPHABET_ORDERED_LIST_OPTIONAL = '(\([a-z]\)\s)?'
      LIST_BULLETS = /(\d{1,2}\\\.|\\\*|\\-)\\\s/.freeze
      LIST_BULLETS_OPTIONAL = '(\d{1,2}.|\*|\-)?\s*'
      NEWLINE_CHARACTER = /\n+/.freeze
      QUOTE_COMMENT_CHARACTER = /^\s*>+/.freeze
      ESCAPED_QUOTES = /\\"/.freeze
      SPECIAL_CHARACTERS = /§/.freeze
      SPECIAL_DASHES = /–/.freeze

      def self.normalize_punctuation(text)
        text.dup.force_encoding('UTF-8')
            .gsub(SPECIAL_DOUBLE_QUOTES, '"')
            .gsub(SPECIAL_SINGLE_QUOTES, "'")
            .gsub(QUOTE_COMMENT_CHARACTER, '')
            .gsub(SPACES, ' ')
            .gsub(NEWLINE_CHARACTER, ' ')
            .gsub(ESCAPED_QUOTES, '"')
            .gsub(QUOTES, '"')
            .gsub(SPECIAL_CHARACTERS, '?')
            .gsub(SPECIAL_DASHES, '-')
            .strip
      rescue ArgumentError => _e
        text
      end

      def self.compile_to_regex(text)
        Regexp.new(Regexp.escape(normalize_punctuation(text))
                       .gsub(YEAR_PLACEHOLDERS, '(\S*)')
                       .gsub(PLACEHOLDERS, '(.*)')
                       .gsub(',', '(,)?')
                       .gsub('HOLDER', '(HOLDER|OWNER)')
                       .gsub(ALPHABET_ORDERED_LIST, ALPHABET_ORDERED_LIST_OPTIONAL)
                       .gsub(LIST_BULLETS, LIST_BULLETS_OPTIONAL))
      end
    end
  end
end