configs/default/components/models/language/sentence_embeddings.yml
# This file defines the default values for the Sentence Embeddings.
####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################
# Folder where task will store data (LOADED)
data_folder: '~/data/'
# Source files that will be used to create the vocabulary (LOADED)
source_vocabulary_files: ''
# Additional tokens that will be added to vocabulary (LOADED)
# This list can be extended, but <PAD> and <EOS> are special tokens.
# <PAD> is ALWAYS used for padding shorter sequences.
additional_tokens: '<PAD>'
# Enable <EOS> (end of sequence) token.
eos_token: False
export_pad_index_to_globals: False
# File containing word (LOADED)
word_mappings_file: 'word_mappings.csv'
# If set, component will always (re)generate the vocabulary (LOADED)
regenerate: False
# Flag informing whether word mappings will be imported from globals (LOADED)
import_word_mappings_from_globals: False
# Flag informing whether word mappings will be exported to globals (LOADED)
export_word_mappings_to_globals: False
# Fixed padding length
# -1 -> For each batch, automatically pad to the length of the longest sequence of the batch
# (variable from batch to batch)
# > 0 -> Pad each pad to the chosen length (fixed for all batches)
fixed_padding: -1
# File containing pretrained embeddings (LOADED)
# Empty means that no embeddings will be loaded.
# Options:
# '' | glove.6B.50d.txt | glove.6B.100d.txt | glove.6B.200d.txt | glove.6B.300d.txt |
# glove.42B.300d.txt | glove.840B.300d.txt | glove.twitter.27B.txt | mimic.fastText.no_clean.300d.pickled
pretrained_embeddings_file: ''
streams:
####################################################################
# 2. Keymappings associated with INPUT and OUTPUT streams.
####################################################################
# Stream containing batch of inputs (INPUT)
inputs: inputs
# Stream containing predictions (OUTPUT)
predictions: predictions
globals:
####################################################################
# 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
####################################################################
####################################################################
# 4. Keymappings associated with GLOBAL variables that will be SET.
####################################################################
# The loaded/exported word mappings (RETRIEVED/SET)
# This depends on the import/export configuration flags above.
word_mappings: word_mappings
# Size of the vocabulary (RETRIEVED/SET)
# This depends on the import/export configuration flags above.
vocabulary_size: vocabulary_size
# Size of the embeddings (SET)
# It is exported to globals, so other components can use it during
# their initialization.
embeddings_size: embeddings_size
# Index of the <PAD> token
# Will be set only if `export_pad_mapping_to_globals == True`
pad_index: pad_index
####################################################################
# 5. Keymappings associated with statistics that will be ADDED.
####################################################################