18F/identity-idp

View on GitHub
scripts/changelog_check.rb

Summary

Maintainability
A
0 mins
Test Coverage
C
74%
#!/usr/bin/env ruby
# frozen_string_literal: true

require 'open3'
require 'optparse'

CHANGELOG_REGEX =
  %r{^(?:\* )?changelog: ?(?<category>[\w -]{2,}), ?(?<subcategory>[^,]{2,}), ?(?<change>.+)$}i
CATEGORIES = [
  'User-Facing Improvements',
  'Bug Fixes',
  'Internal',
  'Upcoming Features',
].freeze
MAX_CATEGORY_DISTANCE = 3
SKIP_CHANGELOG_MESSAGE = '[skip changelog]'
DEPENDABOT_COMMIT_MESSAGE = 'Signed-off-by: dependabot[bot] <support@github.com>'
REVERT_COMMIT_MESSAGE = /This reverts commit ([a-z\d]+)./
SECURITY_CHANGELOG = {
  category: 'Internal',
  subcategory: 'Dependencies',
  change: 'Update dependencies to latest versions',
}.freeze
REVERT_CHANGELOG = {
  category: 'Bug Fixes',
  subcategory: 'Code Revert',
  change: 'Revert changes introduced in %s',
}.freeze

SquashedCommit = Struct.new(:title, :commit_messages, keyword_init: true)
ChangelogEntry = Struct.new(:category, :subcategory, :change, :pr_number, keyword_init: true)
CategoryDistance = Struct.new(:category, :distance)

# A valid entry has a line in a commit message in the form of:
# changelog: CATEGORY, SUBCATEGORY, CHANGE_DESCRIPTION
def build_changelog(line, find_revert: false)
  if line == DEPENDABOT_COMMIT_MESSAGE
    SECURITY_CHANGELOG
  elsif find_revert && (commit = REVERT_COMMIT_MESSAGE.match(line)&.[](1))
    REVERT_CHANGELOG.dup.merge(change: REVERT_CHANGELOG[:change] % [commit])
  else
    CHANGELOG_REGEX.match(line)
  end
end

def revert_commit?(commit)
  commit.title.start_with?('Revert ')
end

def build_changelog_from_commit(commit)
  [*commit.commit_messages, commit.title].
    lazy.
    map { |message| build_changelog(message, find_revert: revert_commit?(commit)) }.
    find(&:itself)
end

def get_git_log(base_branch, source_branch)
  format = '--pretty=title: %s%nbody:%b%nDELIMITER'
  log, status = Open3.capture2(
    'git', 'log', format, "#{base_branch}..#{source_branch}"
  )

  raise 'git log failed' unless status.success?
  log
end

# Transforms a formatted git log into structured objects.
# The git format ends up printing a single commit as:
#
# title: Remove unused IdV controller view (#5922)
# body:**Why**: Because it's unused.
# * Add changelog, change constant name
# DELIMITER
# The string is first split on DELIMITER, and then the body is split into
# individual lines.
def build_structured_git_log(git_log)
  git_log.strip.split('DELIMITER').map do |commit|
    commit.split("\nbody:").map do |commit_message_lines|
      commit_message_lines.split(%r{[\r\n]}).filter { |line| line != '' }
    end
  end.map do |title_and_commit_messages|
    title = title_and_commit_messages.first.first.delete_prefix('title: ')
    messages = title_and_commit_messages[1]
    SquashedCommit.new(
      title: title,
      commit_messages: messages,
    )
  end
end

def commit_messages_contain_skip_changelog?(base_branch, source_branch)
  log, status = Open3.capture2(
    'git', 'log', '--pretty=\'%B\'', "#{base_branch}..#{source_branch}"
  )
  raise 'git log failed' unless status.success?

  log.include?(SKIP_CHANGELOG_MESSAGE)
end

def generate_invalid_changes(git_log)
  log = build_structured_git_log(git_log)
  log.reject do |commit|
    commit.title.include?(SKIP_CHANGELOG_MESSAGE) ||
      commit.commit_messages.any? { |message| message.include?(SKIP_CHANGELOG_MESSAGE) } ||
      build_changelog_from_commit(commit)
  end.map(&:title)
end

def closest_change_category(change)
  CATEGORIES.
    map do |category|
      CategoryDistance.new(
        category,
        DidYouMean::Levenshtein.distance(change[:category], category),
      )
    end.
    filter { |category_distance| category_distance.distance <= MAX_CATEGORY_DISTANCE }.
    max { |category_distance| category_distance.distance }&.
    category
end

# Get the last valid changelog line for every Pull Request and tie it to the commit subject.
# Each PR should be squashed, which results in every PR being one commit. The commit messages
# in a squashed PR are concatencated with a leading "*" for each commit. Example:
#
# commit b7cc1cdaf697decb9908cb125538e75bddc46489
# Author: IDP Committer <idp.committer@gsa.gov>
# Date:   Wed Feb 2 09:14:29 2022 -0500
#
#     LG-9998: Update Authentication (#9999)
#
#     * Update Authentication commit #1
#
#     changelog: Authentication: Updating Authentication (LG-9998)
#
#     * Authentication commit #2
def generate_changelog(git_log)
  log = build_structured_git_log(git_log)

  changelog_entries = []
  log.each do |item|
    # Skip this commit if the skip changelog message appears
    next if item.title.include?(SKIP_CHANGELOG_MESSAGE)
    next if item.commit_messages.any? { |message| message.include?(SKIP_CHANGELOG_MESSAGE) }
    change = build_changelog_from_commit(item)
    next unless change
    category = closest_change_category(change)
    next unless category

    pr_number = %r{\(#(?<pr>\d+)\)}.match(item[:title])

    changelog_entry = ChangelogEntry.new(
      category: category,
      subcategory: change[:subcategory],
      pr_number: pr_number&.named_captures&.fetch('pr'),
      change: change[:change].sub(/./, &:upcase),
    )

    changelog_entries << changelog_entry
  end

  changelog_entries
end

# Turns a list of ChangeLogEntry objects into a formatted string that is fit to be pasted
# directly into release notes.
# Entries with the same category and change are grouped into one changelog line so that we can
# support multi-PR changes.
def format_changelog(changelog_entries)
  changelog_entries = changelog_entries.
    sort_by(&:subcategory).
    group_by { |entry| [entry.category, entry.change] }

  changelog = +''
  CATEGORIES.each do |category|
    category_changes = changelog_entries.
      filter { |(changelog_category, _change), _changes| changelog_category == category }

    next if category_changes.empty?
    changelog.concat("## #{category}\n")
    category_changes.each do |_group, entries|
      change = entries.first.change
      subcategory = entries.first.subcategory
      pr_numbers = entries.map(&:pr_number).compact.sort
      if pr_numbers.count > 0
        formatted_pr_numbers = pr_numbers.map do |number|
          "[##{number}](https://github.com/18F/identity-idp/pull/#{number})"
        end.join(', ')
        formatted_pr_numbers = " (#{formatted_pr_numbers})"
      else
        formatted_pr_numbers = ''
      end

      changelog.concat("- #{subcategory}: #{change}#{formatted_pr_numbers}\n")
    end

    changelog.concat("\n")
  end

  changelog.strip
end

def parsed_options(args)
  options = { base_branch: 'main', source_branch: 'HEAD' }
  basename = File.basename($0)

  optparse = OptionParser.new do |opts|
    opts.banner = <<-EOM
      usage: #{basename} -s my-feature-branch [OPTIONS]

    EOM
    opts.on('-h', '--help', 'Display this message') do
      warn opts
      exit
    end

    opts.on('-b', '--base_branch BASE_BRANCH', 'Name of base branch, defaults to main') do |val|
      options[:base_branch] = val
    end

    opts.on(
      '-s',
      '--source_branch SOURCE_BRANCH',
      'Name of source branch, defaults to HEAD',
    ) do |val|
      options[:source_branch] = val
    end
  end

  optparse.parse!(args)
  options
end

def main(args)
  options = parsed_options(args)

  abort(optparse.help) if options[:source_branch].nil?

  git_log = get_git_log(options[:base_branch], options[:source_branch])
  changelog_entries = generate_changelog(git_log)
  invalid_changelog_entries = generate_invalid_changes(git_log)

  skip_check = commit_messages_contain_skip_changelog?(
    options[:base_branch],
    options[:source_branch],
  )

  if skip_check || changelog_entries.count > 0
    formatted_changelog = format_changelog(changelog_entries)
    puts format_changelog(changelog_entries) if formatted_changelog.length > 0
    if invalid_changelog_entries.count > 0
      puts "\n!!! Invalid Changelog Entries !!!"
      puts invalid_changelog_entries.join("\n")
    end

    exit 0
  else
    warn(
      <<~ERROR,
        A valid changelog line was not found.
        A commit message should contain a line in the form of:

        changelog: CATEGORY, SUBCATEGORY, CHANGE_DESCRIPTION

        example:
        changelog: User-Facing Improvements, WebAuthn, Improve error flow for WebAuthn (LG-5515)

        categories:
        #{CATEGORIES.map { |category| "- #{category}" }.join("\n")}

        Include "[skip changelog]" in a commit message to bypass this check.

        Note: the changelog message must be separated from any other commit message by a blank line.
      ERROR
    )

    exit 1
  end
end

main(ARGV) if __FILE__ == $0