lib/pericope/parsing.rb
require "pericope/range"
require "pericope/verse"
class Pericope
module Parsing
# Differs from Pericope.new in that it won't raise an exception
# if text does not contain a pericope but will return nil instead.
def parse_one(text)
parse(text) do |pericope|
return pericope
end
nil
end
def parse(text)
pericopes = []
match_all(text) do |attributes|
pericope = Pericope.new(attributes)
if block_given?
yield pericope
else
pericopes << pericope
end
end
block_given? ? text : pericopes
end
def split(text)
segments = []
start = 0
match_all(text) do |attributes, match|
pretext = text.slice(start...match.begin(0))
if pretext.length > 0
segments << pretext
yield pretext if block_given?
end
pericope = Pericope.new(attributes)
segments << pericope
yield pericope if block_given?
start = match.end(0)
end
pretext = text.slice(start...text.length)
if pretext.length > 0
segments << pretext
yield pretext if block_given?
end
segments
end
def match_one(text)
match_all(text) do |attributes|
return attributes
end
nil
end
def match_all(text)
text.scan(Pericope.regexp) do
match = Regexp.last_match
book = BOOK_IDS[match.captures.find_index(&:itself)]
ranges = parse_reference(book, match[67])
next if ranges.empty?
attributes = {
:original_string => match.to_s,
:book => book,
:ranges => ranges
}
yield attributes, match
end
end
def parse_reference(book, reference)
parse_ranges(book, normalize_reference(reference).split(/[,;]/))
end
def normalize_reference(reference)
normalizations.reduce(reference.to_s) { |reference, (regex, replacement)| reference.gsub(regex, replacement) }
end
def parse_ranges(book, ranges)
default_chapter = nil
default_chapter = 1 unless book_has_chapters?(book)
default_verse = nil
ranges.map do |range|
range_begin_string, range_end_string = range.split("-")
# treat 12:4 as 12:4-12:4
range_end_string ||= range_begin_string
range_begin = parse_reference_fragment(range_begin_string, default_chapter: default_chapter, default_verse: default_verse)
# no verse specified; this is a range of chapters, start with verse 1
chapter_range = false
if range_begin.needs_verse?
range_begin.verse = 1
chapter_range = true
end
range_begin.chapter = to_valid_chapter(book, range_begin.chapter)
range_begin.verse = to_valid_verse(book, range_begin.chapter, range_begin.verse)
if range_begin_string == range_end_string && !chapter_range
range_end = range_begin.dup
else
range_end = parse_reference_fragment(range_end_string, default_chapter: (range_begin.chapter unless chapter_range))
range_end.chapter = to_valid_chapter(book, range_end.chapter)
# treat Mark 3-1 as Mark 3-3 and, eventually, Mark 3:1-35
range_end.chapter = range_begin.chapter if range_end.chapter < range_begin.chapter
# this is a range of chapters, end with the last verse
if range_end.needs_verse?
range_end.verse = get_max_verse(book, range_end.chapter)
else
range_end.verse = to_valid_verse(book, range_end.chapter, range_end.verse)
end
end
# e.g. parsing 11 in 12:1-8,11 => remember that 12 is the chapter
default_chapter = range_end.chapter
# e.g. parsing c in 9:12a, c => remember that 12 is the verse
default_verse = range_end.verse
range = Range.new(range_begin.to_verse(book: book), range_end.to_verse(book: book))
# an 'a' at the beginning of a range is redundant
range.begin.letter = nil if range.begin.letter == "a" && range.end.to_i > range.begin.to_i
# a 'c' at the end of a range is redundant
range.end.letter = nil if range.end.letter == max_letter && range.end.to_i > range.begin.to_i
range
end
end
def parse_reference_fragment(input, default_chapter: nil, default_verse: nil)
chapter, verse, letter = input.match(Pericope.fragment_regexp).captures
chapter = default_chapter unless chapter
chapter, verse = [verse, nil] unless chapter
verse = default_verse unless verse
letter = nil unless verse
ReferenceFragment.new(chapter.to_i, verse&.to_i, letter)
end
def to_valid_chapter(book, chapter)
coerce_to_range(chapter, 1..get_max_chapter(book))
end
def to_valid_verse(book, chapter, verse)
coerce_to_range(verse, 1..get_max_verse(book, chapter))
end
def coerce_to_range(number, range)
return range.begin if number < range.begin
return range.end if number > range.end
number
end
ReferenceFragment = Struct.new(:chapter, :verse, :letter) do
def needs_verse?
verse.nil?
end
def to_verse(book:)
Verse.new(book, chapter, verse, letter)
end
end
end
BOOK_PATTERN = %r{\b(?:
(?:(?:3|iii|third|3rd)\s*(?:
(john|joh|jon|jhn|jh|jo|jn)
))|
(?:(?:2|ii|second|2nd)\s*(?:
(samuels|samuel|sam|sa|sm)|
(kings|king|kngs|kgs|kg|k)|
(chronicles|chronicle|chron|chrn|chr)|
(john|joh|jon|jhn|jh|jo|jn)|
(corinthians?|cor?|corint?h?|corth)|
(thessalonians?|thes{1,}|the?s?)|
(timothy|tim|tm|ti)|
(peter|pete|pet|ptr|pe|pt|pr)
))|
(?:(?:1|i|first|1st)\s*(?:
(samuels|samuel|sam|sa|sm)|
(kings|king|kngs|kgs|kg|k)|
(chronicles|chronicle|chron|chrn|chr)|
(john|joh|jon|jhn|jh|jo|jn)|
(corinthians?|cor?|corint?h?|corth)|
(thessalonians?|thes{1,}|the?s?)|
(timothy|tim|tm|ti)|
(peter|pete|pet|ptr|pe|pt|pr)
))|
(genesis|gen|gn|ge)|
(exodus|exod|exo|exd|ex)|
(leviticus|lev|levi|le|lv)|
(numbers|number|numb|num|nmb|nu|nm)|
(deuteronomy|deut|deu|dt)|
(joshua|josh|jsh|jos)|
(judges|jdgs|judg|jdg)|
(ruth|rut|rth|ru)|
(isaiah|isa|is|ia|isai|isah)|
(ezra|ezr)|
(nehemiah|neh|ne)|
(esther|esth|est|es)|
(job|jb)|
(psalms|psalm|pslms|pslm|psm|psa|ps)|
(proverbs|proverb|prov|prv|prvb|prvbs|pv)|
(ecclesiastes|eccles|eccl|ecc|ecl)|
((?:the\s?)?song\s?of\s?solomon|(?:the\s?)?song\s?of\s?songs|sn?gs?|songs?|so?s|sol?|son|s\s?of\s?\ss)|
(jeremiah?|jer?|jr|jere)|
(lamentations?|lam?|lm)|
(ezekiel|ezek|eze|ezk)|
(daniel|dan|dn|dl|da)|
(hosea|hos|ho|hs)|
(joel|jl)|
(amos|amo|ams|am)|
(obadiah|obadia|obad|oba|obd|ob)|
(jonah|jon)|
(micah|mica|mic|mi)|
(nahum|nah|nahu|na)|
(habakk?uk|habk?)|
(zephaniah?|ze?ph?)|
(haggai|ha?gg?)|
(zechariah?|ze?ch?)|
(malachi|mal)|
(matthew|matt|mat|ma|mt)|
(mark|mrk|mk)|
(luke|luk|lk|lu)|
(john|joh|jon|jhn|jh|jo|jn)|
(acts|act|ac)|
(romans|roman|roms|rom|rms|ro|rm)|
(galatians|galatian|galat|gala|gal|ga)|
(ephesians?|eph?|ephe?s?)|
(philippians?|phi?l|php|phi|philipp?)|
(colossi?ans?|col?)|
(titus|tit|ti)|
(philemon|phl?mn?|philem?)|
(hebrews|hebrew|heb)|
(james|jam|jas|jm|js|ja)|
(jude)|
(revelations|revelation|revel|rev|rv|re)
)}ix.freeze
# The order books of the Bible are matched
BOOK_IDS = [ 64, 10, 12, 14, 63, 47, 53, 55, 61, 9, 11, 13, 62, 46, 52, 54, 60, 1, 2, 3, 4, 5, 6, 7, 8, 23, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 50, 51, 56, 57, 58, 59, 65, 66 ].freeze
BOOK_NAMES = [nil, "Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Esther", "Job", "Psalm", "Proverbs", "Ecclesiastes", "Song of Solomon", "Isaiah", "Jeremiah", "Lamentations", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John", "2 John", "3 John", "Jude", "Revelation"].freeze
end