Myoldmopar/decent_ci

View on GitHub
cleanup.rb

Summary

Maintainability
D
2 days
Test Coverage
# encoding: UTF-8 

require 'octokit'
require 'json'
require 'open3'
require 'pathname'
require 'active_support/core_ext/hash'
require 'find'
require 'logger'
require 'fileutils'
require 'ostruct'
require 'yaml'
require 'base64'


def count_files(client, results_repository, results_path)
  files = github_query(client) { client.contents(results_repository, :path=>results_path) }

  file_count = 0

  files.each{ |file|
    if file.type == "dir"
      # Scan sub-folder
      file_count += count_files(client, results_repository, file.path)
    elsif file.type == "file"
      file_count += 1
    end
  }

  file_count
end

def clean_up(client, repository, results_repository, results_path, age_limit, limits)
  if $logger.nil?
    logger = Logger.new(STDOUT)
  else
    logger = $logger
  end

  file_count = count_files(client, results_repository, results_path)
  limit_reached = (file_count >= limits["history_total_file_limit"])

  logger.info("File limits: total files found: #{file_count}, limits set to: #{limits["history_total_file_limit"]}, history file limit hit: #{limit_reached}")

  branches = limits["history_long_running_branch_names"]
  feature_branch_limit = limits["history_feature_branch_file_limit"]
  long_running_branch_limit = limits["history_long_running_branch_file_limit"]

  if limit_reached 
    logger.info("Total file limits reached, long running branch names: '#{branches}', feature branch file limit: '#{feature_branch_limit}', long running branch file limit: '#{long_running_branch_limit}'")
  end

  # todo properly handle paginated results from github
  _branches = github_query(client) { client.branches(repository, :per_page => 200) }
  _releases = github_query(client) { client.releases(repository, :per_page => 200) }
  _pull_requests = github_query(client) { client.pull_requests(repository, :state=>"open", :per_page => 50) }

  clean_up_impl(client, repository, results_repository, results_path, age_limit,
                      limit_reached, branches, feature_branch_limit, long_running_branch_limit, _branches, _releases, _pull_requests)
end

def clean_up_impl(client, repository, results_repository, results_path, age_limit,
                 limit_reached, long_running_branches, feature_branch_limit, long_running_branch_limit, branches, releases, pull_requests)
  if $logger.nil?
    logger = Logger.new(STDOUT)
  else
    logger = $logger
  end

  # be sure to get files first and branches second so we don't race
  # to delete the results for a branch that doesn't yet exist
  files = github_query(client) { client.contents(results_repository, :path=>results_path) }

  folder_contains_files = false

  files.each{ |file|
    if file.type == "dir"
      # Scan sub-folder
      clean_up_impl(client, repository, results_repository, file.path, age_limit, 
                    limit_reached, long_running_branches, feature_branch_limit, long_running_branch_limit, branches, releases, pull_requests)
    elsif file.type == "file"
      folder_contains_files = true
    end
  }

  unless folder_contains_files
    # No reason to continue from here if no files are found
    return
  end

  branch_history_limit = 20
  file_age_limit = 9000

  if limit_reached
    if results_path.end_with?(*long_running_branches)
      branch_history_limit = long_running_branch_limit
      logger.info("Long running branch limit reached: #{branch_history_limit}")
    else
      branch_history_limit = feature_branch_limit
      logger.info("Feature branch limit reached: #{branch_history_limit}")
    end
  end

  # These are the absolute failsafe limits required by github
  # if we are approaching 1000 files in a single directory
  if files.size > 800
    if files.size > 999
      branch_history_limit = 1
      file_age_limit = age_limit + 1
    else
      branch_history_limit = [5, branch_history_limit].min
      file_age_limit = 60
    end
    logger.info("Hitting directory size limit #{files.size}, reducing history to #{branch_history_limit} data points")
  end

  files_for_deletion = []
  branches_deleted = Set.new
  prs_deleted = Set.new
  file_branch = Hash.new
  branch_files = Hash.new

  releases.each { |release|
    logger.debug("Loaded release: '#{release.tag_name}'")
  }

  files.each { |file| 
    if file.type == "file"
      logger.debug("Examining file #{file.sha} #{file.path}")

      file_data = nil

      begin
        file_content = Base64.decode64(github_query(client) { client.blob(results_repository, file.sha).content })
        #      file_content = Base64.decode64(github_query(client) { client.contents(results_repository, :path=>file.path) })
        file_data = YAML.load(file_content)
      rescue Psych::SyntaxError
        logger.info("Results file has bad data, deleting. #{file.path}")
        files_for_deletion << file
        next
      end

      branch_name = file_data["branch_name"]


      days_old = (DateTime.now - file_data["date"].to_datetime).to_f
      if days_old > file_age_limit
        logger.debug("Results file has been around for #{days_old} days. Deleting.")
        files_for_deletion << file
        next
      end

      if file.path =~ /DailyTaskRun$/
        logger.debug("DailyTaskRun created on: #{file_data["date"]}")
        days_since_run = (DateTime.now - file_data["date"].to_datetime).to_f
        if days_since_run > 5
          logger.debug("Deleting old DailyTaskRun file #{file.path}")
          files_for_deletion << file
        end

      elsif !file_data["pending"]
        if !file_data["pull_request_issue_id"].nil? && file_data["pull_request_issue_id"] != ""
          pr_found = false
          pull_requests.each{ |pr|
#            logger.debug("Comparing '#{pr.number}' to '#{file_data["pull_request_issue_id"]}'");
            if pr.number.to_s == file_data["pull_request_issue_id"].to_s
              # matching open pr found
              pr_found = true
              break
            end
          }
          unless pr_found
            logger.info("PR #{file_data["pull_request_issue_id"]} not found, queuing results file for deletion: #{file_data["title"]}")
            files_for_deletion << file
            prs_deleted << file_data["pull_request_issue_id"]
          end

#        elsif branch_name.nil? && file_data["pull_request_issue_id"].nil? && file_data["tag_name"].nil?
#          logger.error("Found file with no valid tracking data... deleting #{file_data["title"]}")
#          files_for_deletion << file

        elsif !branch_name.nil? && branch_name != "" && (file_data["pull_request_issue_id"].nil? || file_data["pull_request_issue_id"] == "")
          logger.debug("Examining branch #{branch_name} commit #{file_data["commit_sha"]}")

          file_key = {:device_id => file_data["device_id"], :branch_name => branch_name}
          file_data = {:date => file_data["date"], :file => file}

          if branch_files[file_key].nil?
            branch_files[file_key] = []
          end
          branch_files[file_key] << file_data

          branch_found = false
          branches.each{ |b|
            if b.name == branch_name
              branch_found = true
              break
            end
          }

          unless branch_found
            logger.debug("Branch not found, queuing results file for deletion: #{file_data["title"]}")
            files_for_deletion << file
            file_branch[file.path] = branch_name
            branches_deleted << branch_name
          end
        end

      else 
        # is pending
        logger.debug("Pending build was created on: #{file_data["date"]}")
        days_pending = (DateTime.now - file_data["date"].to_datetime).to_f
        if days_pending > 1
          logger.debug("Build has been pending for > 1 day, deleting pending file to try again: #{file_data["title"]}")
          files_for_deletion << file
        end

      end

    end
  }

  logger.info("#{files.size} files found. #{branches.size} active branches found. #{branches_deleted.size} deleted branches found (#{branches_deleted}). #{prs_deleted.size} deleted pull requests found (#{prs_deleted}). #{files_for_deletion.size} files queued for deletion")

  branch_files.each { |key, file_data|
    logger.info("Examining branch data: #{key}")
    file_data.sort_by! { |i| i[:date] }

    # allow at most branch_history_limit results for each device_id / branch_name combination. The newest, specifically
    if file_data.size > branch_history_limit
      file_data[0..file_data.size - (branch_history_limit + 1)].each { |file|
        logger.debug("Marking old branch results file for deletion #{file[:file].path}")
        files_for_deletion << file[:file]
      }
    end
  }


  files_for_deletion.each { |file|
    logger.info("Deleting results file: #{file.path}. Source branch #{file_branch[file.path]} removed, or file too old")
    begin
      github_query(client) { client.delete_contents(results_repository, file.path, "Source branch #{file_branch[file.path]} removed. Deleting results.", file.sha) }
    rescue => e
      logger.error("Error deleting file: #{file.path} for branch #{file_branch[file.path]} message: #{e}")
    end
  }

  true
end