lib/well_read_faker/source.rb
require "mutex_m"
module WellReadFaker
class Source
include Mutex_m
attr_reader :path_to_book_or_io, :options
def initialize path_to_book_or_io, options = {}
super()
@path_to_book_or_io = path_to_book_or_io
@options = options
end
def paragraph
ensure_loaded
@paragraphs_arr[inc_paragraphs]
end
def ensure_loaded
@loaded or mu_synchronize{ @loaded or load }
end
private
def load
if path_to_book_or_io.respond_to? :read
raw = path_to_book_or_io.read
else
raw = File.read path_to_book_or_io
end
process_raw_text raw
@loaded = true
end
def process_raw_text raw
trimmed = trim_text_by_regexps raw, options[:begin], options[:end]
@paragraphs_arr = trimmed.split(/\n\s*\n/)
@paragraphs_arr.map!{ |m| m.gsub /\s*\n\s*/, " " }
@paragraphs_arr.uniq!
if options[:min_words]
@paragraphs_arr.select!{ |m| /(\w+\b\W*){#{options[:min_words]}}/ =~ m }
end
shuffle_array @paragraphs_arr
@paragraphs_idx = -1
end
def trim_text_by_regexps source_text, begin_rx, end_rx
retval = source_text.dup
if begin_rx
match_data = begin_rx.match(retval) or raise(
ArgumentError,
"Regular expression #{begin_rx.inspect} not found in text."
)
retval[0..match_data.begin(0)-1] = ""
end
if end_rx
match_data = end_rx.match(retval) or raise(
ArgumentError,
"Regular expression #{end_rx.inspect} not found in text."
)
retval[match_data.begin(0)..-1] = ""
end
retval.strip!
retval
end
def inc_paragraphs
mu_synchronize do
return @paragraphs_idx = (@paragraphs_idx + 1) % @paragraphs_arr.size
end
end
# Extracted to a separate method to make stubbing easy (sometimes we want
# to preserve natural order in specs)
def shuffle_array array
array.sort_by!{ rand }
end
end
end