louismullie/treat

View on GitHub
lib/treat/config/data/languages/english.rb

Summary

Maintainability
A
0 mins
Test Coverage
{
  dependencies: [
    'rbtagger', 
    'ruby-stemmer', 
    'punkt-segmenter', 
    'tactful_tokenizer',
    'nickel', 
    'rwordnet', 
    'uea-stemmer', 
    'engtagger', 
    'activesupport',
    'srx-english',
    'scalpel'
  ],
  workers: {
    extractors: {
      time: [:chronic, :kronic, :ruby, :nickel],
      topics: [:reuters],
      name_tag: [:stanford]
    },
    inflectors: {
      conjugators: [:linguistics],
      declensors: [:english, :linguistics],
      stemmers: [:porter, :porter_c, :uea],
      ordinalizers:  [:linguistics],
      cardinalizers:  [:linguistics]
    },
    lexicalizers: {
      taggers: [:lingua, :brill, :stanford],
      sensers: [:wordnet],
      categorizers: [:from_tag]
    },
    processors: {
      parsers: [:stanford],
      segmenters: [:scalpel, :srx, :tactful, :punkt, :stanford],
      tokenizers: [:ptb, :stanford, :punkt, :open_nlp]
    }
  },
  stop_words:
    [
      "about",
      "also",
      "are",
      "away",
      "because",
      "been",
      "beside",
      "besides",
      "between",
      "but",
      "cannot",
      "could",
      "did",
      "etc",
      "even",
      "ever",
      "every",
      "for",
      "had",
      "have",
      "how",
      "into",
      "isn",
      "maybe",
      "non",
      "nor",
      "now",
      "should",
      "such",
      "than",
      "that",
      "then",
      "these",
      "this",
      "those",
      "though",
      "too",
      "was",
      "wasn",
      "were",
      "what",
      "when",
      "where",
      "which",
      "while",
      "who",
      "whom",
      "whose",
      "will",
      "with",
      "would",
      "wouldn",
      "yes"
    ]
}