lifull-dev/bucky-core

View on GitHub
lib/bucky/test_equipment/verifications/status_checker.rb

Summary

Maintainability
A
0 mins
Test Coverage
A
98%
# frozen_string_literal: true

require 'test/unit'
require 'nokogiri'
require 'parallel'
require_relative '../../utils/requests'
require_relative '../../utils/config'

REDIRECT_LIMIT = 5

# Check http status code from response
#   2xx -> OK
#   3xx -> request again
#   4xx~5xx -> NG

module Bucky
  module TestEquipment
    module Verifications
      module StatusChecker
        include Test::Unit::Assertions
        include Bucky::Utils::Requests

        # Check http status code
        # @param [String] url
        # @return [String] message
        def http_status_check(args)
          url = args[:url]
          device = args[:device]
          link_check_max_times = args[:link_check_max_times]
          url_log = args[:url_log]
          redirect_count = args[:redirect_count]
          redirect_url_list = args[:redirect_url_list]

          # If number of requests is over redirect limit
          return { error_message: "\n[Redirect Error] #{url} is redirected more than #{REDIRECT_LIMIT}" } if redirect_count > REDIRECT_LIMIT

          begin
            check_result = check_log_and_get_response(url, device, link_check_max_times, url_log)
          rescue Net::ReadTimeout => e
            return { error_message: "#{e.message} Please check this url: #{url}" }
          end
          # If result include response continue to check, else return result
          !check_result.key?(:response) ? (return check_result) : response = check_result[:response]

          # Store original url
          redirect_url_list << url
          case response.code
          when /2[0-9]{2}/
            url_log[url][:entity] = response.entity
            puts "  #{url} ...  [#{response.code}:OK]"
            { entity: response.entity }
          when /3[0-9]{2}/
            http_status_check_args = { url: url, device: device, link_check_max_times: link_check_max_times, url_log: url_log, redirect_count: redirect_count + 1, redirect_url_list: redirect_url_list }
            redirect_and_http_status_check(response, http_status_check_args)
          when /(4|5)[0-9]{2}/
            url_log[url][:error_message] = "[Status Error] http status returned #{response.code}.\ncheck this url: #{redirect_url_list.join(' -> ')}"
            puts "  #{url} ...  [#{response.code}:NG]"
            { error_message: url_log[url][:error_message] }
          else
            url_log[url][:error_message] = "[Status Code Invalid Error] Status Code is Invalid. \n Status:#{response.code}"
            { error_message: url_log[url][:error_message] }
          end
        end

        def linkstatus_check(args)
          url = args[:url]
          device = args[:device]
          exclude_urls = args[:exclude_urls]
          link_check_max_times = args[:link_check_max_times]
          url_log = args[:url_log]
          only_same_fqdn = args[:only_same_fqdn] ||= true

          # Extract base url and check if it is valid
          url_reg = %r{^(https?://([a-zA-Z0-9\-_.]+))}
          url_obj = url.match(url_reg)
          raise "Invalid URL #{url}" unless url_obj

          base_url  = url_obj[1]
          base_fqdn = url_obj[2]

          # Check base url
          http_status_check_args = { url: url, device: device, link_check_max_times: link_check_max_times, url_log: url_log, redirect_count: 0, redirect_url_list: [] }
          base_response = http_status_check(http_status_check_args)
          raise Test::Unit::AssertionFailedError, "Response of base URL is incorrect.\n#{base_response[:error_message]}" unless base_response[:error_message].nil?

          # Collect links
          links_args = { base_url: base_url, base_fqdn: base_fqdn, url_reg: url_reg, only_same_fqdn: only_same_fqdn, entity: base_response[:entity] }
          links = make_target_links(links_args)
          links = exclude(links, exclude_urls)

          errors = []
          Parallel.each(links.uniq, in_threads: Bucky::Utils::Config.instance[:linkstatus_thread_num]) do |link|
            http_status_check_args[:url] = link
            http_status_check_args[:redirect_url_list] = []
            link_response = http_status_check(http_status_check_args)
            errors << link_response[:error_message] if link_response[:error_message]
          end
          raise Test::Unit::AssertionFailedError, errors.join("\n") unless errors.empty?
        end

        def make_target_links(args)
          base_url = args[:base_url]
          base_fqdn = args[:base_fqdn]
          url_reg = args[:url_reg]
          # TODO: Add an option that can handle the check if href's fqdn is not same with base fqdn
          # only_same_fqdn = args[:only_same_fqdn]
          entity = args[:entity]
          doc = Nokogiri::HTML.parse(entity)
          links = []
          doc.xpath('//a').each do |node|
            href = node.attr('href')&.split(' ')&.first # Patch for nokogiri's bug
            next if exclude_href?(href)

            # Add fqdn if href doesn't include fqdn
            unless url_reg.match?(href)
              links << format_href(base_url, href)
              next
            end

            href_fqdn = href.match(url_reg)[2]
            # TODO: Enable after only_same_fqdn can be handle
            # links << href if only_same_fqdn == false || base_fqdn == href_fqdn
            links << href if base_fqdn == href_fqdn
          end
          links
        end

        # Exclude non test target url
        def exclude(links, exclude_urls)
          return links if exclude_urls.nil?

          excluded_links = links - exclude_urls

          # Exclude url if it has "*" in the last of it
          exclude_urls.each do |ex_url|
            if ex_url.end_with?('*')
              excluded_links.delete_if { |l| l.start_with?(ex_url.delete('*')) }
            elsif ex_url.start_with?('/') && ex_url.end_with?('/')
              ex_url.delete_prefix!('/').delete_suffix!('/')
              excluded_links.delete_if { |l| l.match(/#{ex_url}/) }
            end
          end

          excluded_links
        end

        private

        def exclude_href?(href)
          return true if href.nil?

          exclude_regexps = [/^javascript.+/i, /^tel:\d.+/i, /^mailto:.+/i]
          exclude_regexps.keep_if { |reg| reg.match?(href) }
          return true unless exclude_regexps.empty?

          false
        end

        # Check result hash and submit request or return result.
        # Return: 1.(if request submitted) respons 2. (code 2xx)entity 3. (reach max check times)error message
        def check_log_and_get_response(url, device, link_check_max_times, url_log)
          unless url_log.key?(url)
            response = get_response(url, device, Bucky::Utils::Config.instance[:linkstatus_open_timeout], Bucky::Utils::Config.instance[:linkstatus_read_timeout])
            url_log[url] = { code: response.code, entity: nil, error_message: nil, count: 1 }
            return { response: response }
          end

          if url_log[url][:code].match?(/2[0-9]{2}/)
            puts "  #{url} is already [#{url_log[url][:code]}:OK]"
            return { entity: url_log[url][:entity] }
          elsif url_log[url][:count] >= link_check_max_times
            puts "  #{url} reach maximum check times [#{url_log[url][:code]}:NG]"
            return { error_message: url_log[url][:error_message] }
          else
            response = get_response(url, device, Bucky::Utils::Config.instance[:linkstatus_open_timeout], Bucky::Utils::Config.instance[:linkstatus_read_timeout])
            url_log[url][:count] += 1
            return { response: response }
          end
        end

        def format_href(base_url, href)
          href.insert(0, '/') unless href.match(%r{^/|^#})
          base_url + href
        end

        def redirect_and_http_status_check(response, http_status_check_args)
          fqdn = http_status_check_args[:url][%r{^(https?:\/\/([a-zA-Z0-9\-_.]+))}]
          redirect_url = response['location']
          # Add fqdn if location doesn't include fqdn
          redirect_url = fqdn << redirect_url unless redirect_url.include?('http')
          puts "  #{http_status_check_args[:url]} ... redirect to #{redirect_url} [#{response.code}:RD]"
          http_status_check_args[:url] = redirect_url
          http_status_check(http_status_check_args)
        end
      end
    end
  end
end