IBM/pytorchpipe

View on GitHub
configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml

Summary

Maintainability
Test Coverage
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml

# Training parameters:
training:
  task:
    categories: C4
    batch_size: 512
    # In here we won't use images at all.
    stream_images: False
  dataloader:
    num_workers: 0
    
# Validation parameters:
validation:
  task:
    categories: C4
    batch_size: 512
    # In here we won't use images at all.
    stream_images: False
  dataloader:
    num_workers: 0


pipeline:

  global_publisher:
    priority: 0
    type: GlobalVariablePublisher
    # Add input_size to globals.
    keys: [answer_word_embeddings_size]
    values: [100]

  # Answer encoding.
  answer_tokenizer:
    type: SentenceTokenizer
    priority: 1.1
    preprocessing: lowercase,remove_punctuation
    remove_characters: [“,”,’]
    streams: 
      inputs: answers
      outputs: tokenized_answer_words

  # Model 1: Embeddings
  answer_embeddings:
    priority: 1.2
    type: SentenceEmbeddings
    embeddings_size: 100
    pretrained_embeddings_file: glove.6B.100d.txt
    data_folder: ~/data/vqa-med
    word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv
    export_word_mappings_to_globals: True
    streams:
      inputs: tokenized_answer_words
      outputs: encoded_answer_words
    globals:
      vocabulary_size: answer_words_vocabulary_size
      word_mappings: answer_words_word_mappings

  answer_reduction:
    type: ReduceTensor
    priority: 1.3
    num_inputs_dims: 3
    reduction_dim: 1
    reduction_type: sum
    keepdim: False
    streams:
      inputs: encoded_answer_words
      outputs: reduced_answers
    globals:
      input_size: answer_word_embeddings_size

  # Model.
  classifier:
    type: FeedForwardNetwork 
    hidden_sizes: [500, 500]
    dropout_rate: 0.5
    priority: 3
    streams:
      inputs: reduced_answers
    globals:
      input_size: answer_word_embeddings_size
      prediction_size: vocabulary_size_c4

   # Viewers.
  viewer:
    type: StreamViewer
    priority: 100.4
    input_streams: answers, tokenized_answer_words, predicted_answers
 
#: pipeline