lib/fast.rb
# frozen_string_literal: true
require 'fileutils'
require 'astrolabe/builder'
require_relative 'fast/rewriter'
# suppress output to avoid parser gem warnings'
def suppress_output
original_stdout = $stdout.clone
original_stderr = $stderr.clone
$stderr.reopen File.new('/dev/null', 'w')
$stdout.reopen File.new('/dev/null', 'w')
yield
ensure
$stdout.reopen original_stdout
$stderr.reopen original_stderr
end
suppress_output do
require 'parser'
require 'parser/current'
end
# Fast is a tool to help you search in the code through the Abstract Syntax Tree
module Fast
# Literals are shortcuts allowed inside {ExpressionParser}
LITERAL = {
'...' => ->(node) { node&.children&.any? },
'_' => ->(node) { !node.nil? },
'nil' => nil
}.freeze
# Allowed tokens in the node pattern domain
TOKENIZER = %r/
[\+\-\/\*\\!] # operators or negation
|
===? # == or ===
|
\d+\.\d* # decimals and floats
|
"[^"]+" # strings
|
_ # something not nil: match
|
\.{3} # a node with children: ...
|
\[|\] # square brackets `[` and `]` for all
|
\^ # node has children with
|
\? # maybe expression
|
[\d\w_]+[=\\!\?]? # method names or numbers
|
\(|\) # parens `(` and `)` for tuples
|
\{|\} # curly brackets `{` and `}` for any
|
\$ # capture
|
\#\w[\d\w_]+[\\!\?]? # custom method call
|
\.\w[\d\w_]+\? # instance method call
|
\\\d # find using captured expression
|
%\d # bind extra arguments to the expression
/x.freeze
# Set some convention methods from file.
class Node < Astrolabe::Node
# @return [String] with path of the file or simply buffer name.
def buffer_name
expression.source_buffer.name
end
# @return [Parser::Source::Range] from the expression
def expression
location.expression
end
# @return [String] with the content of the #expression
def source
expression.source
end
# @return [Boolean] true if a file exists with the #buffer_name
def from_file?
File.exist?(buffer_name)
end
# @return [Array<String>] with authors from the current expression range
def blame_authors
`git blame -L #{expression.first_line},#{expression.last_line} #{buffer_name}`.lines.map do |line|
line.split('(')[1].split(/\d+/).first.strip
end
end
# @return [String] with the first element from #blame_authors
def author
blame_authors.first
end
# Search recursively into a node and its children using a pattern.
# @param [String] pattern
# @param [Array] *args extra arguments to interpolate in the pattern.
# @return [Array<Fast::Node>>] with files and results
def search(pattern, *args)
Fast.search(pattern, self, *args)
end
# Captures elements from search recursively
# @param [String] pattern
# @param [Array] *args extra arguments to interpolate in the pattern.
# @return [Array<Fast::Node>>] with files and results
def capture(pattern, *args)
Fast.capture(pattern, self, *args)
end
end
# Custom builder allow us to set a buffer name for each Node
class Builder < Astrolabe::Builder
attr_writer :buffer_name
# Generates {Node} from the given information.
#
# @return [Node] the generated node
def n(type, children, source_map)
Node.new(type, children, location: source_map, buffer_name: @buffer_name)
end
end
class << self
# @return [Fast::Node] from the parsed content
# @example
# Fast.ast("1") # => s(:int, 1)
# Fast.ast("a.b") # => s(:send, s(:send, nil, :a), :b)
def ast(content, buffer_name: '(string)')
buffer = Parser::Source::Buffer.new(buffer_name)
buffer.source = content
Parser::CurrentRuby.new(builder_for(buffer_name)).parse(buffer)
end
def builder_for(buffer_name)
builder = Builder.new
builder.buffer_name = buffer_name
builder
end
# @return [Fast::Node] parsed from file content
# caches the content based on the filename.
# Also, it can parse SQL files.
# @example
# Fast.ast_from_file("example.rb") # => s(...)
def ast_from_file(file)
@cache ||= {}
@cache[file] ||=
begin
method =
if file.end_with?('.sql')
require_relative 'fast/sql' unless respond_to?(:parse_sql)
:parse_sql
else
:ast
end
Fast.public_send(method, IO.read(file), buffer_name: file)
end
end
# Verify if a given AST matches with a specific pattern
# @return [Boolean] case matches ast with the current expression
# @example
# Fast.match?("int", Fast.ast("1")) # => true
def match?(pattern, ast, *args)
Matcher.new(pattern, ast, *args).match?
end
# Search with pattern directly on file
# @return [Array<Fast::Node>] that matches the pattern
def search_file(pattern, file)
node = ast_from_file(file)
return [] unless node
case node
when Array
node.map { |n| search(pattern, n) }.flatten.compact
else
search pattern, node
end
end
# Search with pattern on a directory or multiple files
# @param [String] pattern
# @param [Array<String>] *locations where to search. Default is '.'
# @return [Hash<String,Array<Fast::Node>>] with files and results
def search_all(pattern, locations = ['.'], parallel: true, on_result: nil)
group_results(build_grouped_search(:search_file, pattern, on_result),
locations, parallel: parallel)
end
# Capture with pattern on a directory or multiple files
# @param [String] pattern
# @param [Array<String>] locations where to search. Default is '.'
# @return [Hash<String,Object>] with files and captures
def capture_all(pattern, locations = ['.'], parallel: true, on_result: nil)
group_results(build_grouped_search(:capture_file, pattern, on_result),
locations, parallel: parallel)
end
# @return [Proc] binding `pattern` argument from a given `method_name`.
# @param [Symbol] method_name with `:capture_file` or `:search_file`
# @param [String] pattern to match in a search to any file
# @param [Proc] on_result is a callback that can be notified soon it matches
def build_grouped_search(method_name, pattern, on_result)
search_pattern = method(method_name).curry.call(pattern)
proc do |file|
results = search_pattern.call(file)
next if results.nil? || results.empty?
on_result&.(file, results)
{ file => results }
end
end
# Compact grouped results by file allowing parallel processing.
# @param [Proc] group_files allows to define a search that can be executed
# parallel or not.
# @param [Proc] on_result allows to define a callback for fast feedback
# while it process several locations in parallel.
# @param [Boolean] parallel runs the `group_files` in parallel
# @return [Hash[String, Array]] with files and results
def group_results(group_files, locations, parallel: true)
files = ruby_files_from(*locations)
if parallel
require 'parallel' unless defined?(Parallel)
Parallel.map(files, &group_files)
else
files.map(&group_files)
end.compact.inject(&:merge!)
end
# Capture elements from searches in files. Keep in mind you need to use `$`
# in the pattern to make it work.
# @return [Array<Object>] captured from the pattern matched in the file
def capture_file(pattern, file)
node = ast_from_file(file)
return [] unless node
case node
when Array
node.map { |n| capture(pattern, n) }.flatten.compact
else
capture pattern, node
end
end
# Search recursively into a node and its children.
# If the node matches with the pattern it returns the node,
# otherwise it recursively collect possible children nodes
# @yield node and capture if block given
def search(pattern, node, *args)
if (match = match?(pattern, node, *args))
yield node, match if block_given?
match != true ? [node, match] : [node]
else
case node
when Array
node.flat_map { |child| search(pattern, child, *args) }
else
node.each_child_node
.flat_map { |child| search(pattern, child, *args) }
.compact.flatten
end
end
end
# Only captures from a search
# @return [Array<Object>] with all captured elements.
def capture(pattern, node)
if (match = match?(pattern, node))
match == true ? node : match
else
node.each_child_node
.flat_map { |child| capture(pattern, child) }
.compact.flatten
end
end
def expression(string)
ExpressionParser.new(string).parse
end
attr_accessor :debugging
# Utility function to inspect search details using debug block.
#
# It prints output of all matching cases.
#
# @example
# Fast.debug do
# Fast.match?([:int, 1], s(:int, 1))
# end
# int == (int 1) # => true
# 1 == 1 # => true
def debug
return yield if debugging
self.debugging = true
result = nil
Find.class_eval do
alias_method :original_match_recursive, :match_recursive
alias_method :match_recursive, :debug_match_recursive
result = yield
alias_method :match_recursive, :original_match_recursive # rubocop:disable Lint/DuplicateMethods
end
self.debugging = false
result
end
# @return [Array<String>] with all ruby files from arguments.
# @param files can be file paths or directories.
# When the argument is a folder, it recursively fetches all `.rb` files from it.
def ruby_files_from(*files)
dir_filter = File.method(:directory?)
directories = files.select(&dir_filter)
if directories.any?
files -= directories
files |= directories.flat_map { |dir| Dir["#{dir}/**/*.rb"] }
files.uniq!
end
files.reject(&dir_filter)
end
# Extracts a node pattern expression from a given node supressing identifiers and primitive types.
# Useful to index abstract patterns or similar code structure.
# @see https://jonatas.github.io/fast/similarity_tutorial/
# @return [String] with an pattern to search from it.
# @param node [Fast::Node]
# @example
# Fast.expression_from(Fast.ast('1')) # => '(int _)'
# Fast.expression_from(Fast.ast('a = 1')) # => '(lvasgn _ (int _))'
# Fast.expression_from(Fast.ast('def name; person.name end')) # => '(def _ (args) (send (send nil _) _))'
def expression_from(node)
case node
when Parser::AST::Node
children_expression = node.children.map(&method(:expression_from)).join(' ')
"(#{node.type}#{" #{children_expression}" if node.children.any?})"
when nil, 'nil'
'nil'
when Symbol, String, Numeric
'_'
end
end
end
# ExpressionParser empowers the AST search in Ruby.
# All classes inheriting Fast::Find have a grammar shortcut that is processed here.
#
# @example find a simple int node
# Fast.expression("int")
# # => #<Fast::Find:0x00007ffae39274e0 @token="int">
# @example parens make the expression an array of Fast::Find and children classes
# Fast.expression("(int _)")
# # => [#<Fast::Find:0x00007ffae3a860e8 @token="int">, #<Fast::Find:0x00007ffae3a86098 @token="_">]
# @example not int token
# Fast.expression("!int")
# # => #<Fast::Not:0x00007ffae43f35b8 @token=#<Fast::Find:0x00007ffae43f35e0 @token="int">>
# @example int or float token
# Fast.expression("{int float}")
# # => #<Fast::Any:0x00007ffae43bbf00 @token=[
# # #<Fast::Find:0x00007ffae43bbfa0 @token="int">,
# # #<Fast::Find:0x00007ffae43bbf50 @token="float">
# # #]>
# @example capture something not nil
# Fast.expression("$_")
# # => #<Fast::Capture:0x00007ffae433a860 @captures=[], @token=#<Fast::Find:0x00007ffae433a888 @token="_">>
# @example capture a hash with keys that all are not string and not symbols
# Fast.expression("(hash (pair ([!sym !str] _))")
# # => [#<Fast::Find:0x00007ffae3b45010 @token="hash">,
# # [#<Fast::Find:0x00007ffae3b44f70 @token="pair">,
# # [#<Fast::All:0x00007ffae3b44cf0 @token=[
# # #<Fast::Not:0x00007ffae3b44e30 @token=#<Fast::Find:0x00007ffae3b44e80 @token="sym">>,
# # #<Fast::Not:0x00007ffae3b44d40 @token=#<Fast::Find:0x00007ffae3b44d68 @token="str">>]>,
# # #<Fast::Find:0x00007ffae3b44ca0 @token="_">]]]")")
# @example of match using string expression
# Fast.match?(Fast.ast("{1 => 1}"),"(hash (pair ([!sym !str] _))") => true")")
class ExpressionParser
# @param expression [String]
def initialize(expression)
@tokens = expression.scan TOKENIZER
end
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/MethodLength
def parse
case (token = next_token)
when '(' then parse_until_peek(')')
when '{' then Any.new(parse_until_peek('}'))
when '[' then All.new(parse_until_peek(']'))
when /^"/ then FindString.new(token[1..-2])
when /^#\w/ then MethodCall.new(token[1..])
when /^\.\w[\w\d_]+\?/ then InstanceMethodCall.new(token[1..])
when '$' then Capture.new(parse)
when '!' then (@tokens.any? ? Not.new(parse) : Find.new(token))
when '?' then Maybe.new(parse)
when '^' then Parent.new(parse)
when '\\' then FindWithCapture.new(parse)
when /^%\d/ then FindFromArgument.new(token[1..])
else Find.new(token)
end
end
# rubocop:enable Metrics/CyclomaticComplexity
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/MethodLength
private
def next_token
@tokens.shift
end
def parse_until_peek(token)
list = []
list << parse until @tokens.empty? || @tokens.first == token
next_token
list
end
end
# Find is the top level class that respond to #match?(node) interface.
# It matches recurively and check deeply depends of the token type.
class Find
attr_accessor :token
def initialize(token)
self.token = token
end
def match?(node)
match_recursive(valuate(token), node)
end
def match_recursive(expression, node)
case expression
when Proc then expression.call(node)
when Find then expression.match?(node)
when Symbol then compare_symbol_or_head(expression, node)
when Enumerable
expression.each_with_index.all? do |exp, i|
match_recursive(exp, i.zero? ? node : node.children[i - 1])
end
else
node == expression
end
end
def compare_symbol_or_head(expression, node)
case node
when Parser::AST::Node
node.type == expression.to_sym
when String
node == expression.to_s
when TrueClass
expression == :true
when FalseClass
expression == :false
else
node == expression
end
end
def debug_match_recursive(expression, node)
match = original_match_recursive(expression, node)
debug(expression, node, match)
match
end
def debug(expression, node, match)
puts "#{expression} == #{node} # => #{match}"
end
def to_s
"f[#{[*token].map(&:to_s).join(', ')}]"
end
def ==(other)
return false if other.nil? || !other.respond_to?(:token)
token == other.token
end
private
def valuate(token)
if token.is_a?(String)
return valuate(LITERAL[token]) if LITERAL.key?(token)
typecast_value(token)
else
token
end
end
def typecast_value(token)
case token
when /^\d+\.\d*/ then token.to_f
when /^\d+/ then token.to_i
else token.to_sym
end
end
end
# Find literal strings using double quotes
class FindString < Find
def initialize(token)
@token = token
end
def match?(node)
node == token
end
end
# Find using custom methods
class MethodCall < Find
def initialize(method_name)
@method_name = method_name
end
def match?(node)
Kernel.send(@method_name, node)
end
end
# Search using custom instance methods
class InstanceMethodCall < Find
def initialize(method_name)
@method_name = method_name
end
def match?(node)
node.send(@method_name)
end
end
# Allow use previous captures while searching in the AST.
# Use `\\1` to point the match to the first captured element
# or sequential numbers considering the order of the captures.
#
# @example check comparision of integers that will always return true
# ast = Fast.ast("1 == 1") => s(:send, s(:int, 1), :==, s(:int, 1))
# Fast.match?("(send $(int _) == \1)", ast) # => [s(:int, 1)]
class FindWithCapture < Find
attr_writer :previous_captures
def initialize(token)
token = token.token if token.respond_to?(:token)
raise 'You must use captures!' unless token
@capture_index = token.to_i
end
def match?(node)
node == @previous_captures[@capture_index - 1]
end
def to_s
"fc[\\#{@capture_index}]"
end
end
# Allow the user to interpolate expressions from external stuff.
# Use `%1` in the expression and the Matcher#prepare_arguments will
# interpolate the argument in the expression.
# @example interpolate the node value 1
# Fast.match?("(int %1)", Fast.ast("1"), 1) # => true
# Fast.match?("(int %1)", Fast.ast("1"), 2) # => false
# @example interpolate multiple arguments
# Fast.match?("(%1 %2)", Fast.ast("1"), :int, 1) # => true
class FindFromArgument < Find
attr_writer :arguments
def initialize(token)
token = token.token if token.respond_to?(:token)
raise 'You must define index' unless token
@capture_argument = token.to_i - 1
raise 'Arguments start in one' if @capture_argument.negative?
end
def match?(node)
raise 'You must define arguments to match' unless @arguments
compare_symbol_or_head @arguments[@capture_argument], node
end
def to_s
"find_with_arg[\\#{@capture_argument}]"
end
end
# Capture some expression while searching for it.
#
# The captures behaves exactly like Fast::Find and the only difference is that
# when it {#match?} stores #captures for future usage.
#
# @example capture int node
# capture = Fast::Capture.new("int") => #<Fast::Capture:0x00...e0 @captures=[], @token="int">
# capture.match?(Fast.ast("1")) # => [s(:int, 1)]
#
# @example binding directly in the Fast.expression
# Fast.match?(Fast.ast("1"), "(int $_)") # => [1]
#
# @example capture the value of a local variable assignment
# (${int float} _)
# @example expression to capture only the node type
# (${int float} _)
# @example expression to capture entire node
# $({int float} _)
# @example expression to capture only the node value of int or float nodes
# ({int float} $_)
# @example expression to capture both node type and value
# ($_ $_)
#
# You can capture stuff in multiple levels and
# build expressions that reference captures with Fast::FindWithCapture.
class Capture < Find
# Stores nodes that matches with the current expression.
attr_reader :captures
def initialize(token)
super
@captures = []
end
# Append the matching node to {#captures} if it matches
def match?(node)
@captures << node if super
end
def to_s
"c[#{token}]"
end
end
# Sometimes you want to check some children but get the parent element,
# for such cases, parent can be useful.
# Example: You're searching for `int` usages in your code.
# But you don't want to check the integer itself, but who is using it:
# `^^(int _)` will give you the variable being assigned or the expression being used.
class Parent < Find
alias match_node match?
def match?(node)
node.each_child_node.any?(&method(:match_node))
end
def to_s
"^#{token}"
end
end
# Matches any of the internal expressions. Works like a **OR** condition.
# @example Matchig int or float
# Fast.expression("{int float}")
class Any < Find
def match?(node)
token.any? { |expression| Fast.match?(expression, node) }
end
def to_s
"any[#{token.map(&:to_s).join(', ')}]"
end
end
# Intersect expressions. Works like a **AND** operator.
class All < Find
def match?(node)
token.all? { |expression| expression.match?(node) }
end
def to_s
"all[#{token}]"
end
end
# Negates the current expression
# `!int` is equilvalent to "not int"
class Not < Find
def match?(node)
!super
end
end
# True if the node does not exist
# When exists, it should match.
class Maybe < Find
def match?(node)
node.nil? || super
end
end
# Joins the AST and the search expression to create a complete matcher that
# recusively check if the node pattern expression matches with the given AST.
#
### Using captures
#
# One of the most important features of the matcher is find captures and also
# bind them on demand in case the expression is using previous captures.
#
# @example simple match
# ast = Fast.ast("a = 1")
# expression = Fast.expression("(lvasgn _ (int _))")
# Matcher.new(expression, ast).match? # true
#
# @example simple capture
# ast = Fast.ast("a = 1")
# expression = Fast.expression("(lvasgn _ (int $_))")
# Matcher.new(expression, ast).match? # => [1]
#
class Matcher
def initialize(pattern, ast, *args)
@ast = ast
@expression = if pattern.is_a?(String)
Fast.expression(pattern)
else
[*pattern].map(&Find.method(:new))
end
@captures = []
prepare_arguments(@expression, args) if args.any?
end
# @return [true] if the @param ast recursively matches with expression.
# @return #find_captures case matches
def match?(expression = @expression, ast = @ast)
head, *tail_expression = expression
return false unless head.match?(ast)
return find_captures if tail_expression.empty?
match_tail?(tail_expression, ast.children)
end
# @return [true] if all children matches with tail
def match_tail?(tail, child)
tail.each_with_index.all? do |token, i|
prepare_token(token)
token.is_a?(Array) ? match?(token, child[i]) : token.match?(child[i])
end && find_captures
end
# Look recursively into @param expression to check if the expression is have
# captures.
# @return [true] if any sub expression have captures.
def captures?(expression = @expression)
case expression
when Capture then true
when Array then expression.any?(&method(:captures?))
when Find then captures?(expression.token)
end
end
# Find search captures recursively.
#
# @return [Array<Object>] of captures from the expression
# @return [true] in case of no captures in the expression
# @see Fast::Capture
# @see Fast::FindFromArgument
def find_captures(expression = @expression)
return true if expression == @expression && !captures?(expression)
case expression
when Capture then expression.captures
when Array then expression.flat_map(&method(:find_captures)).compact
when Find then find_captures(expression.token)
end
end
private
# Prepare arguments case the expression needs to bind extra arguments.
# @return [void]
def prepare_arguments(expression, arguments)
case expression
when Array
expression.each do |item|
prepare_arguments(item, arguments)
end
when Fast::FindFromArgument
expression.arguments = arguments
when Fast::Find
prepare_arguments expression.token, arguments
end
end
# Prepare token with previous captures
# @param [FindWithCapture] token set the current captures
# @return [void]
# @see [FindWithCapture#previous_captures]
def prepare_token(token)
case token
when Fast::FindWithCapture
token.previous_captures = find_captures
end
end
end
end