lib/anon/text.rb
# encoding: utf-8
require 'anon/base'
module Anon
# Anonymises any detected e-mail address in a text stream
class Text < Base
# From the email regex research: http://fightingforalostcause.net/misc/2006/compare-email-regex.php
# Authors: James Watts and Francisco Jose Martin Moreno
EMAIL_REGEX = /([\w\!\#\z\%\&\'\*\+\-\/\=\?\\A\`{\|\}\~]+\.)*[\w\+-]+@((((([a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(\d{1,3}\.){3}\d{1,3}(\:\d{1,5})?)/i # rubocop:disable Metrics/LineLength
# Returns a new instance of the Text anonymiser
# @param input [IO, #gets] the stream to read from
# @param output [IO, #puts] the stream to write to
def initialize(input, output)
@input = input
@output = output
end
# Anonymises any e-mail addresses found in the text
def anonymise!
start_progress
map_lines do |line|
line = anonymise_line(line)
increment_progress
line
end
complete_progress
end
private
attr_reader :input, :output
# Reads each line from the incoming file, processes it using the block
# and saves the return value of the block to the outgoing file.
def map_lines
while (inline = input.gets)
output.puts yield(inline)
end
end
def anonymise_line(line)
line.gsub(EMAIL_REGEX) { |email| anonymous_email(email) }
end
end
end