lib/regexp-examples/unicode_char_ranges.rb
require 'pstore'
require 'singleton'
module RegexpExamples
# Interface to the retrieve the character sets that match a regex named property.
# E.g. `/\p{Alpha}/`
# These matching values are stored, compressed, in a PStore. They are specific to
# the ruby minor version.
class UnicodeCharRanges
include Singleton
# These values were generated by: scripts/unicode_lister.rb
# Note: Only the first 128 results are listed, for performance.
# Also, some groups seem to have no matches (weird!)
STORE_FILENAME = "unicode_ranges_#{RbConfig::CONFIG['UNICODE_VERSION']}.pstore".freeze
attr_reader :range_store
def initialize
@range_store = PStore.new(unicode_ranges_file)
end
def get(key)
range_store.transaction(true) do
ranges_to_unicode(range_store[key])
end
end
alias [] get
private
# The method is written like this to future-proof it a little,
# i.e. the gem won't completely break for a new ruby version release
def unicode_ranges_file
db_path = File.join(__dir__, '../../db')
Dir["#{db_path}/*.pstore"].sort.select do |file|
file <= "#{db_path}/#{STORE_FILENAME}"
end.last
end
# TODO: Document example input/output of this method
# It's pretty simple, but this code is a little confusing!!
def ranges_to_unicode(ranges)
result = []
ranges.each do |range|
if range.is_a? Integer # Small hack to increase data compression
result << hex_to_unicode(range.to_s(16))
else
range.each { |num| result << hex_to_unicode(num.to_s(16)) }
end
end
result
end
def hex_to_unicode(hex)
[hex.to_i(16)].pack('U')
end
end
end