rubocop-hq/rubocop

View on GitHub
lib/rubocop/cop/lint/out_of_range_regexp_ref.rb

Summary

Maintainability
A
25 mins
Test Coverage
A
98%
# frozen_string_literal: true

module RuboCop
  module Cop
    module Lint
      # Looks for references of Regexp captures that are out of range
      # and thus always returns nil.
      #
      # @safety
      #   This cop is unsafe because it is naive in how it determines what
      #   references are available based on the last encountered regexp, but
      #   it cannot handle some cases, such as conditional regexp matches, which
      #   leads to false positives, such as:
      #
      #   [source,ruby]
      #   ----
      #   foo ? /(c)(b)/ =~ str : /(b)/ =~ str
      #   do_something if $2
      #   # $2 is defined for the first condition but not the second, however
      #   # the cop will mark this as an offense.
      #   ----
      #
      #   This might be a good indication of code that should be refactored,
      #   however.
      #
      # @example
      #
      #   /(foo)bar/ =~ 'foobar'
      #
      #   # bad - always returns nil
      #
      #   puts $2 # => nil
      #
      #   # good
      #
      #   puts $1 # => foo
      #
      class OutOfRangeRegexpRef < Base
        MSG = '$%<backref>s is out of range (%<count>s regexp capture %<group>s detected).'

        REGEXP_RECEIVER_METHODS = %i[=~ === match].to_set.freeze
        REGEXP_ARGUMENT_METHODS = %i[=~ match grep gsub gsub! sub sub! [] slice slice! index rindex
                                     scan partition rpartition start_with? end_with?].to_set.freeze
        REGEXP_CAPTURE_METHODS = (REGEXP_RECEIVER_METHODS + REGEXP_ARGUMENT_METHODS).freeze
        RESTRICT_ON_SEND = REGEXP_CAPTURE_METHODS

        def on_new_investigation
          @valid_ref = 0
        end

        def on_match_with_lvasgn(node)
          check_regexp(node.children.first)
        end

        def after_send(node)
          @valid_ref = nil

          if regexp_first_argument?(node)
            check_regexp(node.first_argument)
          elsif regexp_receiver?(node)
            check_regexp(node.receiver)
          end
        end

        def on_when(node)
          regexp_conditions = node.conditions.select(&:regexp_type?)

          @valid_ref = regexp_conditions.filter_map { |condition| check_regexp(condition) }.max
        end

        def on_in_pattern(node)
          regexp_patterns = regexp_patterns(node)

          @valid_ref = regexp_patterns.filter_map { |pattern| check_regexp(pattern) }.max
        end

        def on_nth_ref(node)
          backref, = *node
          return if @valid_ref.nil? || backref <= @valid_ref

          message = format(
            MSG,
            backref: backref,
            count: @valid_ref.zero? ? 'no' : @valid_ref,
            group: @valid_ref == 1 ? 'group' : 'groups'
          )

          add_offense(node, message: message)
        end

        private

        def regexp_patterns(in_node)
          pattern = in_node.pattern
          if pattern.regexp_type?
            [pattern]
          else
            pattern.each_descendant(:regexp).to_a
          end
        end

        def check_regexp(node)
          return if node.interpolation?

          named_capture = node.each_capture(named: true).count
          @valid_ref = if named_capture.positive?
                         named_capture
                       else
                         node.each_capture(named: false).count
                       end
        end

        def regexp_first_argument?(send_node)
          send_node.first_argument&.regexp_type? \
            && REGEXP_ARGUMENT_METHODS.include?(send_node.method_name)
        end

        def regexp_receiver?(send_node)
          send_node.receiver&.regexp_type?
        end

        def nth_ref_receiver?(send_node)
          send_node.receiver&.nth_ref_type?
        end
      end
    end
  end
end