apraditya/indonesian_stemmer

View on GitHub
lib/indonesian_stemmer.rb

Summary

Maintainability
A
2 hrs
Test Coverage
require "indonesian_stemmer/version"
require "indonesian_stemmer/morphological_utility"

module IndonesianStemmer

  class << self
    include MorphologicalUtility

    attr_accessor :number_of_syllables

    def stem(word, derivational_stemming = true)
      @flags = 0

      if word =~ /\s/
        word.split(' ').map { |w| stem(w) }
      else
        @number_of_syllables = total_syllables word

        remove_particle(word) if still_has_many_syllables?
        remove_possessive_pronoun(word) if still_has_many_syllables?

        stem_derivational(word) if derivational_stemming

        word
      end
    end


    private
      def stem_derivational(word)
        previous_size = word.size
        remove_first_order_prefix(word) if still_has_many_syllables?
        if previous_size != word.size
          previous_size = word.size
          remove_suffix(word) if still_has_many_syllables?

          if previous_size != word.size
            remove_second_order_prefix(word) if still_has_many_syllables?
          end
        else
          remove_second_order_prefix(word) if still_has_many_syllables?
          remove_suffix(word) if still_has_many_syllables?
        end
      end

      def still_has_many_syllables?
        @number_of_syllables > 2
      end
  end
end

class String
  def stem
    IndonesianStemmer.stem(self.dup)
  end

  def stem!
    IndonesianStemmer.stem(self)
  end
end