pogodevorg/docs.pogodev.org

View on GitHub
bin/match-auto

Summary

Maintainability
Test Coverage
#!/usr/bin/env ruby
# Try to gen mapping between AeonLucis repo and bare exported proto

require 'optparse'
require 'yaml'
require 'json'
require 'colorize'

USAGE = "Usage: #{__FILE__} path_to_aeonlucid_repo"
options = {}
OptionParser.new do |opts|
  opts.banner = USAGE

  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
    options[:verbose] = v
  end
end.parse!

file_arg = ARGV[0]
unless file_arg
  puts USAGE
  exit 1
end

require 'tmpdir'
Dir.mktmpdir do |dir|
  ENV['INLINEDIR'] = dir
  require 'fuzzystringmatch'

  JAROW = FuzzyStringMatch::JaroWinkler.create( :native )
  API_SOURCE = File.expand_path(File.join(file_arg, 'src/POGOProtos/'))
  class String
    def distance(t)
      ((1 - JAROW.getDistance(self, t)) * 100)
    end

    def _distance(t)
      m = self.length
      n = t.length
      return m if n == 0
      return n if m == 0
      d = Array.new(m+1) {Array.new(n+1)}

      (0..m).each {|i| d[i][0] = i}
      (0..n).each {|j| d[0][j] = j}
      (1..n).each do |j|
        (1..m).each do |i|
          d[i][j] = if self[i-1] == t[j-1]  # adjust index into string
                      d[i-1][j-1]           # no operation required
                    else
                      [ d[i-1][j]+1,        # deletion
                        d[i][j-1]+1,        # insertion
                        d[i-1][j-1]+1,      # substitution
                      ].min
                    end
        end
      end
      d[m][n]
    end
  end

  def colored_score(score, name_score = -1)
    color = :green
    if score > 50
      color = :red
    elsif score > 15
      color = :yellow
    elsif score > 0
      color = :light_blue
    end
    color = :green if name_score == 0
    "[#{score.round}%]".colorize(color)
  end

  def strip_proto_file(source)
    stripped_src = File.read(source)
      .gsub(/[\t]/, ' ')
      .gsub(/^(syntax|package|import).*/, '')
      .gsub(' [packed=true]', '')
      .gsub(/\/\/.*$/, '')
      .gsub(/[ ]*oneof Action {/, '')
      .gsub(/[ ]+enum[ ]+[a-zA-Z0-9_-]+[ \n]*{.+?}/m, '')
      .gsub(/\.[^ ]*\./, '')
      .gsub(/[\n{}]/, '')
      .gsub(/[ ]+/, ' ')
      .gsub(' ;', ';')
      .strip
    # puts "|#{stripped_src}|"
    return stripped_src
  end

  def strip_proto_yaml_enum(enum)
    "enum #{enum.name} #{enum.values.map{|v| "#{v.name} = #{v.tag};"}.join(' ')}"
      .gsub(/[ \t]+/, ' ')
  end

  def strip_proto_yaml_message(message)
    "message #{message.name} #{message.attributes.map{|a| "#{a.modifier} #{a.type} #{a.name} = #{a.tag};"}.join(' ')}"
      .gsub(/[ \t]+/, ' ')
  end

  def shorten_path(file)
    source = File.expand_path(API_SOURCE)
    basename = File.basename(file, '.proto')
    dirname = File.dirname(file).gsub(source, '').split('/').map{|d| d[0..2]}.join('/')
    "#{dirname}/#{basename}"
  end

  stripped_files = {}
  source = API_SOURCE
  proto_files = Dir["#{source}/**/*.proto"]
  proto_files.each{|f| stripped_files[f] = strip_proto_file(f)}
  base_names = proto_files.map{ |f| File.basename(f) }

  stripped_enums = {}
  stripped_messages = {}
  hash = YAML.load_file(File.expand_path('../../_data/api_latest.yml', __FILE__))
  # I prefer objects with direct access to hashes...
  json = hash.to_json
  proto = JSON.parse(json, object_class: OpenStruct)
  # enums = proto.enums.map(&:name)
  # messages = proto.messages.map(&:name)
  proto.enums.each{|e| stripped_enums[e.name] = strip_proto_yaml_enum(e)}
  proto.messages.each{|m| stripped_messages[m.name] = strip_proto_yaml_message(m)}

  sorted_matches = {}
  # mappings = {}
  # matched_names = []
  matched_files = []

  puts "ENUMS: "
  stripped_enums.each do |name, content|
    # next unless name == 'Method'
    best = 100
    sorted_matches[name] = stripped_files.map do |file, file_content|
      next if matched_files.include?(file)
      next if best < 10
      next if file_content !~ /^enum/

      # puts file
      # next unless file =~ /POGOProtos\/Enums\/BadgeType\.proto/

      score = content.distance(file_content)
      name_score = name.gsub(/Proto/, '').distance(File.basename(file, '.proto'))

      best = score if score < best
      matched_files << file if score == 0

      # puts content
      # puts file_content
      {
        score: score,
        name_score: name_score,
        file: file.gsub(source, '')
      }
    end.compact.sort_by{|m| m[:score]}

    bests = sorted_matches[name].take(3)
    if bests.first[:score] == 0
      next # Comment to show it anyway
      bests.reject!{|b| b[:score] > 0}
    end
    if bests.first[:name_score] == 0
      next # Comment to show it anyway
      bests.reject!{|b| b[:name_score] > 0}
    end
    bests.map!{|best| "#{shorten_path(best[:file])} #{colored_score(best[:score], best[:name_score])}"}
    puts " - #{name} -> #{bests.join(", ")}"
  end

  puts "MESSAGES: "
  stripped_messages.each do |name, content|
    # next unless name == 'Method'
    best = 1000
    sorted_matches[name] = stripped_files.map do |file, file_content|
      next if matched_files.include?(file)
      next if best < 10
      next if file_content !~ /^message/

      score = content.distance(file_content)
      name_score = name.gsub(/Proto/, '').distance(File.basename(file, '.proto'))

      best = score if score < best
      matched_files << file if score == 0

      {
        score: score,
        name_score: name_score,
        file: file.gsub(source, '')
      }
    end.compact.sort_by{|m| m[:score]}

    bests = sorted_matches[name].take(3)
    if bests.first[:score] == 0
      next # Comment to show it anyway
      bests.reject!{|b| b[:score] > 0}
    end
    if bests.first[:name_score] == 0
      next # Comment to show it anyway
      bests.reject!{|b| b[:name_score] > 0}
    end
    bests.map!{|best| "#{shorten_path(best[:file])} #{colored_score(best[:score], best[:name_score])}"}
    puts "  - #{name} -> #{bests.join(", ")}"
  end

  puts "Generating YML file..."
  File.write(File.expand_path('../../_data/sorted_matches.yml', __FILE__), YAML.dump(sorted_matches))
  puts "done.".green
  puts "You should run ./bin/match-review to generate the mappings file now."

  # sorted_matches = {}
  # mappings = {}
  # matched_names = []
  # matched_files = []

  # puts 'First pass, look & sort matches...'
  # enums.each do |name|
  #   sorted_matches[name] = enum_files.map do |file|
  #     {
  #       score: name.gsub(/Proto/, '').distance(File.basename(file).gsub('.proto', '')),
  #       src: file.gsub(source, '')
  #     }
  #   end.sort_by{|m| m[:score]}
  # end
  #
  # messages.each do |name|
  #   sorted_matches[name] = message_files.map do |file|
  #     {
  #       score: name.gsub(/Proto/, '').distance(File.basename(file).gsub('.proto', '')),
  #       src: file.gsub(source, '')
  #     }
  #   end.sort_by{|m| m[:score]}
  # end
  #
  # puts 'Second pass, looking for perfect matches...'
  # sorted_matches.each do |name, matches|
  #   if (best = matches.first)[:score] == 0
  #     mappings[name] = best
  #     matched_names << name
  #     matched_files << best[:src]
  #   end
  # end
  #
  # puts 'Third pass, find the best matches'
  # sorted_matches.each do |name, matches|
  #   next if matched_names.include?(name)
  #   best = sorted_matches[name].find do |partial_match|
  #     !matched_files.include?(partial_match[:src])
  #     # Don't mark partially matched files
  #     # matched_names << name
  #     # matched_files << partial_match[:src]
  #   end
  #   mappings[name] = best
  # end
  #
  # puts "# ENUMS:"
  # enums.each do |name|
  #   best = mappings[name]
  #   puts "  - #{name} -> #{best[:src]} #{colored_score(best[:score])}"
  # end
  #
  # puts "\n# MESSAGES:"
  # messages.each do |name|
  #   best = mappings[name]
  #   puts "  - #{name} -> #{best[:src]} #{colored_score(best[:score])}"
  # end
end