IBM/pytorchpipe

View on GitHub
configs/vqa_med_2019/c2_classification/c2_word_answer_onehot_bow.yml

Summary

Maintainability
Test Coverage
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/c2_classification/default_c2_classification.yml

# Training parameters:
training:
  task:
    batch_size: 128
  terminal_conditions:
    episode_limit: 1000

# Validation parameters:
validation:
  task:
    batch_size: 128

pipeline:

  # Answer encoding.
  answer_tokenizer:
    type: SentenceTokenizer
    priority: 1.1
    preprocessing: lowercase,remove_punctuation
    remove_characters: [“,”,’]
    streams: 
      inputs: answers
      outputs: tokenized_answer_words

  answer_onehot_encoder:
    type: SentenceOneHotEncoder
    priority: 1.2
    data_folder: ~/data/vqa-med
    word_mappings_file: answer_words.c2.preprocessed.word.mappings.csv
    export_word_mappings_to_globals: True
    streams:
      inputs: tokenized_answer_words
      outputs: encoded_answer_words
    globals:
      vocabulary_size: answer_words_vocabulary_size
      word_mappings: answer_words_word_mappings

  answer_bow_encoder:
    type: BOWEncoder
    priority: 1.3
    streams:
      inputs: encoded_answer_words
      outputs: bow_answer_words
    globals:
        bow_size: answer_words_vocabulary_size

  # Model.
  classifier:
    type: FeedForwardNetwork 
    hidden_sizes: [50]
    dropout_rate: 0.5
    priority: 3
    streams:
      inputs: bow_answer_words
    globals:
      input_size: answer_words_vocabulary_size
      prediction_size: vocabulary_size_c2

   # Viewers.
  viewer:
    type: StreamViewer
    priority: 100.4
    input_streams: answers, tokenized_answer_words, predicted_answers
 
#: pipeline