lib/treat/config/data/languages/english.rb
{
dependencies: [
'rbtagger',
'ruby-stemmer',
'punkt-segmenter',
'tactful_tokenizer',
'nickel',
'rwordnet',
'uea-stemmer',
'engtagger',
'activesupport',
'srx-english',
'scalpel'
],
workers: {
extractors: {
time: [:chronic, :kronic, :ruby, :nickel],
topics: [:reuters],
name_tag: [:stanford]
},
inflectors: {
conjugators: [:linguistics],
declensors: [:english, :linguistics],
stemmers: [:porter, :porter_c, :uea],
ordinalizers: [:linguistics],
cardinalizers: [:linguistics]
},
lexicalizers: {
taggers: [:lingua, :brill, :stanford],
sensers: [:wordnet],
categorizers: [:from_tag]
},
processors: {
parsers: [:stanford],
segmenters: [:scalpel, :srx, :tactful, :punkt, :stanford],
tokenizers: [:ptb, :stanford, :punkt, :open_nlp]
}
},
stop_words:
[
"about",
"also",
"are",
"away",
"because",
"been",
"beside",
"besides",
"between",
"but",
"cannot",
"could",
"did",
"etc",
"even",
"ever",
"every",
"for",
"had",
"have",
"how",
"into",
"isn",
"maybe",
"non",
"nor",
"now",
"should",
"such",
"than",
"that",
"then",
"these",
"this",
"those",
"though",
"too",
"was",
"wasn",
"were",
"what",
"when",
"where",
"which",
"while",
"who",
"whom",
"whose",
"will",
"with",
"would",
"wouldn",
"yes"
]
}