GlobalNamesArchitecture/tag_along

View on GitHub
lib/tag_along/tagged_text.rb

Summary

Maintainability
A
45 mins
Test Coverage
class TagAlong

  class TaggedText
    CHR = { '<' => 60, '>' => 62 }
    SPACES = { 9 => true, 10 => true, 11 => true, 12 => true,
               13 => true, 32 => true, 133 => true, 160 => true,
               5760 => true, 6158 => true, 8192 => true,
               8193 => true, 8194 => true, 8195 => true,
               8196 => true, 8197 => true, 8198 => true,
               8199 => true, 8200 => true, 8201 => true,
               8202 => true, 8232 => true, 8233 => true,
               8239 => true, 8287 => true, 12288 => true }

    attr_reader :tagged_text, :offsets

    def initialize(tagged_text, opts = {})
      @normalize_spaces = true if opts[:normalize_spaces]
      @tagged_text = tagged_text
      @inside_tag = false
      @inside_space = false
      @offsets  = []
      @text = []
      @text_offset = 0
      @current_offset = { type: :text, start: 0, end: nil,
                          text_start: 0, text_end: nil }
      process_tagged_text
    end

    def plain_text
      @text.pack('U*')
    end

    def adjust_offsets(plain_text_offsets)
      plain_text_offsets = plain_text_offsets.is_a?(Offsets) ?
                            plain_text_offsets :
                            Offsets.new(plain_text_offsets)
      adjusted_offsets = TagAlong::Offsets.new([])
      @offsets.each do |offset|
        next if offset[:type] == :tag
        process_offset(plain_text_offsets, offset, adjusted_offsets)
        break if plain_text_offsets.empty?
      end
      adjusted_offsets
    end


    private

    def process_offset(plain_text_offsets, offset, adjusted_offsets)
      o  = plain_text_offsets[0]

      return if !o || o.offset_start > offset[:text_start]
      unless o.adj_start
        delta = o.offset_start - offset[:text_start]
        o.adj_start = offset[:start] + delta
      end

      if o.offset_end <= offset[:text_end]
        delta = o.offset_end - offset[:text_end]
        o = plain_text_offsets.shift
        o.offset_start = o.delete_field(:adj_start)
        o.offset_end = offset[:end] + delta
        adjusted_offsets << o
      end
    end

    def process_tagged_text
      opts = { count: 0, chr: nil }
      while opts[:chr] = tagged_text_ary.shift
        @inside_tag ?  process_inside_tag(opts) : process_outside_tag(opts)
        opts[:count] += 1
      end
    end

    def tagged_text_ary
      @tagged_text_ary ||= @tagged_text.unpack('U*')
    end

    def process_outside_tag(opts)
      if opts[:chr] == CHR['<']
        @inside_tag = true
        if opts[:count] > 0
          @current_offset[:end] = opts[:count] - 1
          @current_offset[:text_end] = @text_offset - 1
          @offsets << @current_offset
        end
        @current_offset = { type: :tag, start: opts[:count], end: nil }
      else
        process_text(opts)
      end
    end

    def process_inside_tag(opts)
      if opts[:chr] == CHR['>']
        @inside_tag = false
        @current_offset[:end] = opts[:count]
        @offsets << @current_offset
        @current_offset = { type: :text, start: opts[:count] + 1, end: nil,
                            text_start: @text_offset, text_end: nil }
      end
    end

    def process_text(opts)
      if @normalize_spaces
        process_normalized_spaces_text(opts)
      else
        add_to_text(opts)
      end
    end

    def add_to_text(opts)
      @text_offset += 1
      @text << opts[:chr]
    end

    def process_normalized_spaces_text(opts)
      @inside_space ? process_inside_space(opts) : process_outside_space(opts)
    end

    def process_inside_space(opts)
      #TODO
    end

    def process_outside_space(opts)
      #TODO
    end
  end
end