rubocop-hq/rubocop

View on GitHub
lib/rubocop/cop/style/select_by_regexp.rb

Summary

Maintainability
A
1 hr
Test Coverage
A
100%
# frozen_string_literal: true

module RuboCop
  module Cop
    module Style
      # Looks for places where an subset of an Enumerable (array,
      # range, set, etc.; see note below) is calculated based on a `Regexp`
      # match, and suggests `grep` or `grep_v` instead.
      #
      # NOTE: Hashes do not behave as you may expect with `grep`, which
      # means that `hash.grep` is not equivalent to `hash.select`. Although
      # RuboCop is limited by static analysis, this cop attempts to avoid
      # registering an offense when the receiver is a hash (hash literal,
      # `Hash.new`, `Hash#[]`, or `to_h`/`to_hash`).
      #
      # NOTE: `grep` and `grep_v` were optimized when used without a block
      # in Ruby 3.0, but may be slower in previous versions.
      # See https://bugs.ruby-lang.org/issues/17030
      #
      # @safety
      #   Autocorrection is marked as unsafe because `MatchData` will
      #   not be created by `grep`, but may have previously been relied
      #   upon after the `match?` or `=~` call.
      #
      #   Additionally, the cop cannot guarantee that the receiver of
      #   `select` or `reject` is actually an array by static analysis,
      #   so the correction may not be actually equivalent.
      #
      # @example
      #   # bad (select or find_all)
      #   array.select { |x| x.match? /regexp/ }
      #   array.select { |x| /regexp/.match?(x) }
      #   array.select { |x| x =~ /regexp/ }
      #   array.select { |x| /regexp/ =~ x }
      #
      #   # bad (reject)
      #   array.reject { |x| x.match? /regexp/ }
      #   array.reject { |x| /regexp/.match?(x) }
      #   array.reject { |x| x =~ /regexp/ }
      #   array.reject { |x| /regexp/ =~ x }
      #
      #   # good
      #   array.grep(regexp)
      #   array.grep_v(regexp)
      class SelectByRegexp < Base
        extend AutoCorrector
        include RangeHelp

        MSG = 'Prefer `%<replacement>s` to `%<original_method>s` with a regexp match.'
        RESTRICT_ON_SEND = %i[select find_all reject].freeze
        REPLACEMENTS = { select: 'grep', find_all: 'grep', reject: 'grep_v' }.freeze
        OPPOSITE_REPLACEMENTS = { select: 'grep_v', find_all: 'grep_v', reject: 'grep' }.freeze
        REGEXP_METHODS = %i[match? =~ !~].to_set.freeze

        # @!method regexp_match?(node)
        def_node_matcher :regexp_match?, <<~PATTERN
          {
            (block call (args (arg $_)) ${(send _ %REGEXP_METHODS _) match-with-lvasgn})
            (numblock call $1 ${(send _ %REGEXP_METHODS _) match-with-lvasgn})
          }
        PATTERN

        # Returns true if a node appears to return a hash
        # @!method creates_hash?(node)
        def_node_matcher :creates_hash?, <<~PATTERN
          {
            (call (const _ :Hash) {:new :[]} ...)
            (block (call (const _ :Hash) :new ...) ...)
            (call _ { :to_h :to_hash } ...)
          }
        PATTERN

        # @!method env_const?(node)
        def_node_matcher :env_const?, <<~PATTERN
          (const {nil? cbase} :ENV)
        PATTERN

        # @!method calls_lvar?(node, name)
        def_node_matcher :calls_lvar?, <<~PATTERN
          {
            (send (lvar %1) ...)
            (send ... (lvar %1))
            (match-with-lvasgn regexp (lvar %1))
          }
        PATTERN

        # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
        def on_send(node)
          return unless (block_node = node.block_node)
          return if block_node.body&.begin_type?
          return if receiver_allowed?(block_node.receiver)
          return unless (regexp_method_send_node = extract_send_node(block_node))
          return if match_predicate_without_receiver?(regexp_method_send_node)

          replacement = replacement(regexp_method_send_node, node)
          return if target_ruby_version <= 2.2 && replacement == 'grep_v'

          regexp = find_regexp(regexp_method_send_node, block_node)

          register_offense(node, block_node, regexp, replacement)
        end
        # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
        alias on_csend on_send

        private

        def receiver_allowed?(node)
          return false unless node

          node.hash_type? || creates_hash?(node) || env_const?(node)
        end

        def replacement(regexp_method_send_node, node)
          opposite = opposite?(regexp_method_send_node)

          method_name = node.method_name

          opposite ? OPPOSITE_REPLACEMENTS[method_name] : REPLACEMENTS[method_name]
        end

        def register_offense(node, block_node, regexp, replacement)
          message = format(MSG, replacement: replacement, original_method: node.method_name)

          add_offense(block_node, message: message) do |corrector|
            # Only correct if it can be determined what the regexp is
            if regexp
              range = range_between(node.loc.selector.begin_pos, block_node.loc.end.end_pos)
              corrector.replace(range, "#{replacement}(#{regexp.source})")
            end
          end
        end

        def extract_send_node(block_node)
          return unless (block_arg_name, regexp_method_send_node = regexp_match?(block_node))

          block_arg_name = :"_#{block_arg_name}" if block_node.numblock_type?
          return unless calls_lvar?(regexp_method_send_node, block_arg_name)

          regexp_method_send_node
        end

        def opposite?(regexp_method_send_node)
          regexp_method_send_node.send_type? && regexp_method_send_node.method?(:!~)
        end

        def find_regexp(node, block)
          return node.child_nodes.first if node.match_with_lvasgn_type?

          if node.receiver.lvar_type? &&
             (block.numblock_type? || node.receiver.source == block.first_argument.source)
            node.first_argument
          elsif node.first_argument.lvar_type?
            node.receiver
          end
        end

        def match_predicate_without_receiver?(node)
          node.send_type? && node.method?(:match?) && node.receiver.nil?
        end
      end
    end
  end
end