hck/open_nlp

View on GitHub
lib/open_nlp/parser/parse.rb

Summary

Maintainability
A
0 mins
Test Coverage
module OpenNlp
  class Parser::Parse
    include JavaClass

    attr_reader :j_instance

    self.java_class = Java::opennlp.tools.parser.Parse

    # Initializes instance of Parser::Parse
    #
    # @param [Java::opennlp.tools.parser.Parse] java_instance
    def initialize(java_instance)
      java_instance.is_a?(self.class.java_class) ||
        raise(ArgumentError, "java_instance must be an instance of #{self.class.java_class.name}")

      @j_instance = java_instance
    end

    # Composes tree bank string, nested string representation of sentence parts, parts-of-speech and words,
    # for example:
    #   '(TOP (S (NP (DT The) (JJ red) (NN fox)) (VP (VBZ sleeps) (ADVP (RB soundly))) (. .)))'
    #
    # @return [String]
    def tree_bank_string
      span = j_instance.getSpan
      text = j_instance.getText
      type = j_instance.getType
      res = ''
      start = span.getStart

      res << "(#{type} " if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE

      j_instance.getChildren.each do |child|
        child_span = child.span
        res << text[start..child_span.getStart - 1] if start < child_span.getStart
        res << self.class.new(child).tree_bank_string
        start = child_span.getEnd
      end

      res << text[start..span.getEnd - 1] if start < span.getEnd
      res << ')' if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE

      res
    end

    # Composes array representation of sentence tree where
    # each hash has following fields:
    #
    #  :type => <[String] node type>,
    #  :parent_type => <[String] type of parent node>,
    #  :token => <[String] current token>,
    #  :children => <Array[Hash] array of child nodes hashes>
    #
    # @return [Array<Hash>]
    def code_tree
      kids = j_instance.getChildren

      kids.each_with_object([]) do |kid, acc|
        data = { type: kid.getType, parent_type: j_instance.getType, token: kid.toString }
        subtree = self.class.new(kid).code_tree
        data[:children] = subtree unless subtree.empty?
        acc << data
      end
    end
  end
end