lib/backup/syncer/cloud/local_file.rb
require "digest/md5"
module Backup
module Syncer
module Cloud
class LocalFile
attr_reader :path
attr_accessor :md5
class << self
# Returns a Hash of LocalFile objects for each file within +dir+,
# except those matching any of the +excludes+.
# Hash keys are the file's path relative to +dir+.
def find(dir, excludes = [])
dir = File.expand_path(dir)
hash = {}
find_md5(dir, excludes).each do |file|
hash[file.path.sub(dir + "/", "")] = file
end
hash
end
# Return a new LocalFile object if it's valid.
# Otherwise, log a warning and return nil.
def new(*args)
file = super
if file.invalid?
Logger.warn("\s\s[skipping] #{file.path}\n" \
"\s\sPath Contains Invalid UTF-8 byte sequences")
file = nil
end
file
end
private
# Returns an Array of file paths and their md5 hashes.
def find_md5(dir, excludes)
found = []
(Dir.entries(dir) - %w[. ..]).map { |e| File.join(dir, e) }.each do |path|
if File.directory?(path)
unless exclude?(excludes, path)
found += find_md5(path, excludes)
end
elsif File.file?(path)
if file = new(path)
unless exclude?(excludes, file.path)
file.md5 = Digest::MD5.file(file.path).hexdigest
found << file
end
end
end
end
found
end
# Returns true if +path+ matches any of the +excludes+.
# Note this can not be called if +path+ includes invalid UTF-8.
def exclude?(excludes, path)
excludes.any? do |ex|
if ex.is_a?(String)
File.fnmatch?(ex, path)
elsif ex.is_a?(Regexp)
ex.match(path)
end
end
end
end
# If +path+ contains invalid UTF-8, it will be sanitized
# and the LocalFile object will be flagged as invalid.
# This is done so @file.path may be logged.
def initialize(path)
@path = sanitize(path)
end
def invalid?
!!@invalid
end
private
def sanitize(str)
str.each_char.map do |char|
begin
char.unpack("U")
char
rescue
@invalid = true
"\xEF\xBF\xBD" # => "\uFFFD"
end
end.join
end
end
end
end
end