lib/stupidedi/reader/token_reader.rb
# frozen_string_literal: true
module Stupidedi
using Refinements
module Reader
class TokenReader
# @private
SEGMENT_ID = /\A[A-Z][A-Z0-9]{1,2}\Z/
include Inspect
# @return [String, Input]
attr_reader :input
# @return [Separators]
attr_reader :separators
# @return [SegmentDict]
attr_accessor :segment_dict
def initialize(input, separators, segment_dict = SegmentDict.empty)
@input, @separators, @segment_dict =
input, separators, segment_dict
end
# @return [TokenReader]
def copy(changes = {})
TokenReader.new \
changes.fetch(:input, @input),
changes.fetch(:separators, @separators),
changes.fetch(:segment_dict, @segment_dict)
end
# @return false
def stream?
false
end
# @return [StreamReader]
def stream
StreamReader.new(@input)
end
def empty?
@input.empty?
end
# If `s` is a prefix of {#input}, then `s` is skipped and the remaining
# input is returned as a new `TokenReader` wrapped by `Either.success`.
# Otherwise, an {Either::Failure} is returned.
#
# @return [Either<TokenReader>]
def consume_prefix(s)
return success(self) if s.empty?
position = 0
buffer = ""
while @input.defined_at?(position)
character = @input.at(position)
position += 1
unless is_control?(character)
buffer = buffer + character
if s.length == buffer.length
if s == buffer
return success(advance(position))
else
return failure("found #{buffer.inspect} instead of #{s.inspect}")
end
end
end
end
failure("reached end of input without finding #{s.inspect}")
end
def consume_control_chars
position = 0
while @input.defined_at?(position) and is_control?(@input.at(position))
position += 1
end
if position.zero?
success(self)
else
success(advance(position))
end
end
# If `s` occurs within {#input}, then the input up to and including `s`
# is skipped and the remaining input is returned as a new `TokenReader`
# wrapped by `Either.success`. Otherwise, {Either::Failure} is returned.
#
# @return [Either<TokenReader>]
def consume(s)
return success(self) if s.empty?
position = 0
buffer = " " * s.length
while @input.defined_at?(position)
character = @input.at(position)
unless is_control?(character)
# Slide the "window" forward one character
buffer = buffer.slice(1..-1) + character
end
position += 1
if s == buffer
return success(advance(position))
end
end
failure("reached end of input without finding #{s.inspect}")
end
# Returns a single character and the remaining input as a {Result} with
# a `value` of the character and a `remainder` of the reamining input as
# a new instance of {TokenReader}. If {#input} has less than a single
# character, returns an {Either::Failure}
#
# @return [Either<Result<String>>]
def read_character
position = 0
while @input.defined_at?(position)
character = @input.at(position)
position += 1
if is_control?(character)
next
end
return result(character, advance(position))
end
failure("less than one character available")
end
# @return [Either<Result<SegmentTok, TokenReader>>]
def read_segment
consume_control_chars.flatmap do |start|
# We might start reading a segment at "\nNM1...", where the "\n" is on
# line 5, but "NM1" is on line 6. So to ensure the segment position is
# line 6, we start with consume_control_characters.
start.read_segment_id.flatmap do |segment_id, aR|
if @segment_dict.defined_at?(segment_id)
element_uses = @segment_dict.at(segment_id).element_uses
else
element_uses = []
end
aR.read_delimiter.flatmap do |delim, bR|
case delim
when @separators.element
rest = bR.read_elements(segment_id, element_uses)
rest.map{|es, cR| segment(segment_id, start.input, cR.input, es) }
when @separators.segment
remainder =
if segment_id == :IEA
bR.stream
else
bR
end
# Consume the segment terminator
result(segment(segment_id, start.input, bR.input), remainder)
end
end
end
end
end
# @return [Either<Result<Array<SimpleElementTok, CompositeElementTok>, TokenReader>>]
def read_elements(segment_id, element_uses)
if element_uses.empty?
read_simple_element
else
element_use = element_uses.head
repeatable = element_use.repeatable?
if element_use.composite?
read_composite_element(repeatable)
else
read_simple_element(repeatable)
end
end.flatmap do |element, aR|
aR.read_delimiter.flatmap do |delim, bR|
case delim
when @separators.segment
remainder =
if segment_id == :IEA
bR.stream
else
bR
end
# This is the last element before the segment terminator, make
# it into a singleton list and _do_ consume the delimiter
result(element.cons, remainder)
when @separators.element
# There is another element following the delimiter
rest = bR.read_elements(segment_id, element_uses.tail)
rest.map{|es, _| element.cons(es) }
end
end
end
end
# @return [Either<Result<Array<ComponentElementTok, TokenReader>>>]
def read_component_elements(repeatable = false)
read_component_element(repeatable).flatmap do |component, aR|
aR.read_delimiter.flatmap do |delim, bR|
case delim
when @separators.segment,
@separators.element,
@separators.repetition
# This is the last component element within the composite element,
# so make it into a singleton list and don't consume the delimiter
result(component.cons, aR)
when @separators.component
rest = bR.read_component_elements(repeatable)
rest.map{|es, _| component.cons(es) }
end
end
end
end
# @return [Either<Result<Symbol, TokenReader>>]
def read_segment_id
position = 0
buffer = ""
while true
unless @input.defined_at?(position)
return eof("reached end of input without finding a segment identifier")
end
character = @input.at(position)
position += 1
if is_delimiter?(character)
break
end
unless is_control?(character)
if buffer.length == 3
break
end
buffer = buffer + character
end
end
# We only arrive here if {character} is a delimiter, or if we read
# three characters into {buffer} and an additional into {character}
if buffer =~ SEGMENT_ID
remainder = advance(position - 1)
case character
when @separators.segment,
@separators.element
# Don't consume the delimiter
result(buffer.upcase.to_sym, remainder)
else
failure("found #{character.inspect} following segment identifier")
end
else
failure("found #{(buffer + character).inspect} instead of segment identifier")
end
end
# @return [Either<Result<Character, TokenReader>>]
def read_delimiter
position = 0
while @input.defined_at?(position)
character = @input.at(position)
position += 1
if is_control?(character)
next
end
if is_delimiter?(character)
return result(character, advance(position))
else
return failure("found #{character.inspect} instead of a delimiter")
end
end
failure("reached end of input without finding a delimiter")
end
# @return [Either<Result<SimpleElementToken, TokenReader>>]
def read_simple_element(repeatable = false)
position = 0
buffer = ""
while @input.defined_at?(position)
character = @input.at(position)
position += 1
if is_control?(character)
next
end
case character
when @separators.segment,
@separators.element
# These delimiters mark the end of the element. We don't consume
# the delimiter because the next reader can use the delimiter to
# know which token to next expect.
token = simple(buffer, @input, @input.drop(position))
token = token.repeated if repeatable
return result(token, advance(position - 1))
when @separators.repetition
if repeatable
token = simple(buffer, @input, @input.drop(position))
rest = advance(position).read_simple_element(repeatable)
return rest.map{|e, _| e.repeated(token) }
# else
# # @todo: Read this as data but sound the alarms
end
# when @separators.component
# # @todo: Read this as data but sound the alarms
end
buffer = buffer + character
end
failure("reached end of input without finding a simple data element")
end
# @return [Either<Result<ComponentElementTok, TokenReader>>]
def read_component_element(repeatable = false)
position = 0
buffer = ""
while @input.defined_at?(position)
character = @input.at(position)
position += 1
if is_control?(character)
next
end
case character
when @separators.element,
@separators.segment,
@separators.component
# Don't consume the separator/terminator
token = component(buffer, @input, @input.drop(position))
return result(token, advance(position - 1))
when @separators.repetition
if repeatable
# Don't consume the repetition separator
token = component(buffer, @input, @input.drop(position))
return result(token, advance(position - 1))
# else
# # @todo: Read this as data but sound the alarms
end
end
buffer = buffer + character
end
failure("reached end of input without finding a component data element")
end
# @return [Either<Result<CompositeElementTok, TokenReader>>]
def read_composite_element(repeatable = false)
read_component_elements(repeatable).flatmap do |components, aR|
token = composite(components, @input, aR.input)
aR.read_delimiter.flatmap do |delim, bR|
case delim
when @separators.segment,
@separators.element
token = token.repeated if repeatable
result(token, aR)
when @separators.repetition
bR.read_composite_element(repeatable).map do |c, cR|
c.repeated(token)
end
end
end
end
end
# @return [void]
def pretty_print(q)
q.text("TokenReader")
q.group(2, "(", ")") do
q.breakable ""
q.pp @input
q.text ","
q.breakable
q.pp @separators
end
end
private
# @return [TokenReader]
def advance(n)
unless @input.defined_at?(n-1)
raise IndexError, "less than #{n} characters available"
else
TokenReader.new(@input.drop(n), @separators, @segment_dict)
end
end
def is_delimiter?(character)
character == @separators.segment or
character == @separators.element or
character == @separators.component or
character == @separators.repetition
end
def is_control?(character)
Reader.is_control_character?(character) and not is_delimiter?(character)
end
def failure(message, remainder = @input)
Result.failure(message, remainder, true)
end
def eof(message, remainder = @input)
Result.failure(message, remainder, false)
end
def success(value)
Either.success(value)
end
def result(value, remainder)
Result.success(value, remainder)
end
def segment(segment_id, input, remainder, elements = [])
SegmentTok.build(segment_id, elements, input.position, remainder.position)
end
def simple(value, input, remainder)
SimpleElementTok.build(value, input.position, remainder.position)
end
def component(value, input, remainder)
ComponentElementTok.build(value, input.position, remainder.position)
end
def composite(value, input, remainder)
CompositeElementTok.build(value, input.position, remainder.position)
end
end
end
end