IBM/pytorchpipe

View on GitHub
configs/default/components/language/sentence_indexer.yml

Summary

Maintainability
Test Coverage
# This file defines the default values for Sentence Indexer.

####################################################################
# 1. CONFIGURATION PARAMETERS that will be LOADED by the component.
####################################################################

# Folder where task will store data (LOADED)
data_folder: '~/data/'

# Source files that will be used to create the vocabulary  (LOADED)
source_vocabulary_files: ''

# Additional tokens that will be added to vocabulary (LOADED)
# This list can be extended, but <PAD> and <EOS> are special tokens.
# <PAD> is ALWAYS used for padding shorter sequences.
additional_tokens: '<PAD>,<EOS>'

# Enable <EOS> (end of sequence) token.
eos_token: False

# HACK: This key is useless here, but needed by parent class. Should be removed/fixed in the future
export_pad_index_to_globals: False

# File containing word (LOADED)
word_mappings_file: 'word_mappings.csv'

# If set, component will always (re)generate the vocabulary (LOADED)
regenerate: False 

# Flag informing whether word mappings will be imported from globals (LOADED)
import_word_mappings_from_globals: False

# Flag informing whether word mappings will be exported to globals (LOADED)
export_word_mappings_to_globals: False

# Fixed padding length
# -1  -> For each batch, automatically pad to the length of the longest sequence of the batch
#        (variable from batch to batch)
# > 0 -> Pad each pad to the chosen length (fixed for all batches)
fixed_padding: -1

# Operation mode. If 'reverse' is True, then it will change indices into words (LOADED)
reverse: False

# Flag indicating whether inputs are represented as distributions or indices (LOADED)
# Options: True (expects distribution for each input item in sequence)
#          False (expects indices (max args))
use_input_distributions: False

streams: 
  ####################################################################
  # 2. Keymappings associated with INPUT and OUTPUT streams.
  ####################################################################

  # Stream containing input tensor (INPUT)
  inputs: inputs

  # Stream containing output tensor (OUTPUT)
  outputs: outputs

globals:
  ####################################################################
  # 3. Keymappings of variables that will be RETRIEVED from GLOBALS.
  ####################################################################

  ####################################################################
  # 4. Keymappings associated with GLOBAL variables that will be SET.
  ####################################################################

  # The loaded/exported word mappings (RETRIEVED/SET)
  # This depends on the import/export configuration flags above.
  word_mappings: word_mappings

  # Size of the vocabulary (RETRIEVED/SET)
  # This depends on the import/export configuration flags above.
  vocabulary_size: vocabulary_size

  # Index of the <PAD> token
  # Will be set only if `export_pad_mapping_to_globals == True`
  pad_index: pad_index

  ####################################################################
  # 5. Keymappings associated with statistics that will be ADDED.
  ####################################################################