lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
module RegexpExamples
# A collection of related helper methods, utilised by the `Parser` class
module ParseAfterBackslashGroupHelper
protected
def parse_after_backslash_group
@current_position += 1
if rest_of_string =~ /\A(\d{1,3})/
parse_regular_backreference_group(Regexp.last_match(1))
elsif rest_of_string =~ /\Ak['<]([\w-]+)['>]/
parse_named_backreference_group(Regexp.last_match(1))
elsif CharSets::BackslashCharMap.keys.include?(next_char)
parse_backslash_special_char
elsif rest_of_string =~ /\A(c|C-)(.)/
parse_backslash_control_char(Regexp.last_match(1), Regexp.last_match(2))
elsif rest_of_string =~ /\Ax(\h{1,2})/
parse_backslash_escape_sequence(Regexp.last_match(1))
elsif rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
parse_backslash_unicode_sequence(Regexp.last_match(1))
elsif rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i
parse_backslash_named_property(
Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3)
)
elsif next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
PlaceHolderGroup.new
elsif next_char == 'R'
parse_backslash_linebreak
elsif next_char == 'g'
parse_backslash_subexpresion_call
elsif next_char =~ /[bB]/
parse_backslash_anchor
elsif next_char =~ /[AG]/
parse_backslash_start_of_string
elsif next_char =~ /[zZ]/
parse_backslash_end_of_string
else
parse_single_char_group(next_char)
end
end
def parse_regular_backreference_group(group_id)
@current_position += (group_id.length - 1) # In case of 10+ backrefs!
parse_backreference_group(group_id)
end
def parse_named_backreference_group(group_name)
@current_position += (group_name.length + 2)
group_id = if group_name.to_i < 0
# RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
@num_groups + group_name.to_i + 1
else
group_name
end
parse_backreference_group(group_id)
end
def parse_backreference_group(group_id)
BackReferenceGroup.new(group_id)
end
def parse_backslash_special_char
CharGroup.new(
CharSets::BackslashCharMap[next_char].dup,
@ignorecase
)
end
def parse_backslash_control_char(control_syntax, control_code)
@current_position += control_syntax.length
parse_single_char_group(parse_control_character(control_code))
end
def parse_backslash_escape_sequence(escape_sequence)
@current_position += escape_sequence.length
parse_single_char_group(parse_unicode_sequence(escape_sequence))
end
def parse_control_character(char)
(char.ord % 32).chr # Black magic!
# eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
end
def parse_unicode_sequence(match)
[match.to_i(16)].pack('U')
end
def parse_backslash_unicode_sequence(full_hex_sequence)
@current_position += full_hex_sequence.length
sequence = full_hex_sequence.match(/\h{1,4}/)[0] # Strip off "{" and "}"
parse_single_char_group(parse_unicode_sequence(sequence))
end
def parse_backslash_named_property(p_negation, caret_negation, property_name)
@current_position += (caret_negation.length + # 0 or 1, of '^' is present
property_name.length +
2) # Length of opening and closing brackets (always 2)
# Beware of double negatives! E.g. /\P{^Space}/
is_negative = (p_negation == 'P') ^ (caret_negation == '^')
CharGroup.new(
negate_if(CharSets::NamedPropertyCharMap[property_name.downcase], is_negative),
@ignorecase
)
end
def parse_backslash_linebreak
CharGroup.new(
["\r\n", "\n", "\v", "\f", "\r"],
@ignorecase
) # Using "\r\n" as one character is little bit hacky...
end
def parse_backslash_subexpresion_call
raise IllegalSyntaxError,
'Subexpression calls (\\g) cannot be supported, as they are not regular'
end
def parse_backslash_anchor
raise_anchors_exception!
end
def parse_backslash_start_of_string
if @current_position == 1
PlaceHolderGroup.new
else
raise_anchors_exception!
end
end
def parse_backslash_end_of_string
if @current_position == (regexp_string.length - 1)
if next_char == 'z'
PlaceHolderGroup.new
else # next_char == 'Z'
QuestionMarkRepeater.new(SingleCharGroup.new("\n", @ignorecase))
end
else
raise_anchors_exception!
end
end
def raise_anchors_exception!
raise IllegalSyntaxError,
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
end
end
end