malavbhavsar/sentimentalizer

View on GitHub
lib/engine/corpus.rb

Summary

Maintainability
A
0 mins
Test Coverage
require "#{File.dirname(__FILE__)}/document"

class Corpus

  def initialize
    @tokens = {}
  end

  def entry_count
    @tokens.values.inject(0, :+)
  end

  def add document
    document.each_token do |token|
      @tokens[token] = token_count(token) + 1
    end
  end

  def load_from_directory directory
    Dir.glob("#{directory}/*.txt") do |entry|
      IO.foreach(entry, encoding: Encoding::UTF_8) do |line|
        add Document.new(line)
      end
    end
  end

  def token_count token
    @tokens[token] || 0
  end
end