configs/vqa_med_2019/c4_classification/c4_word_answer_glove_sum.yml
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml
# Training parameters:
training:
task:
categories: C4
batch_size: 512
# In here we won't use images at all.
stream_images: False
dataloader:
num_workers: 0
# Validation parameters:
validation:
task:
categories: C4
batch_size: 512
# In here we won't use images at all.
stream_images: False
dataloader:
num_workers: 0
pipeline:
global_publisher:
priority: 0
type: GlobalVariablePublisher
# Add input_size to globals.
keys: [answer_word_embeddings_size]
values: [100]
# Answer encoding.
answer_tokenizer:
type: SentenceTokenizer
priority: 1.1
preprocessing: lowercase,remove_punctuation
remove_characters: [“,”,’]
streams:
inputs: answers
outputs: tokenized_answer_words
# Model 1: Embeddings
answer_embeddings:
priority: 1.2
type: SentenceEmbeddings
embeddings_size: 100
pretrained_embeddings_file: glove.6B.100d.txt
data_folder: ~/data/vqa-med
word_mappings_file: answer_words.c4.preprocessed.word.mappings.csv
export_word_mappings_to_globals: True
streams:
inputs: tokenized_answer_words
outputs: encoded_answer_words
globals:
vocabulary_size: answer_words_vocabulary_size
word_mappings: answer_words_word_mappings
answer_reduction:
type: ReduceTensor
priority: 1.3
num_inputs_dims: 3
reduction_dim: 1
reduction_type: sum
keepdim: False
streams:
inputs: encoded_answer_words
outputs: reduced_answers
globals:
input_size: answer_word_embeddings_size
# Model.
classifier:
type: FeedForwardNetwork
hidden_sizes: [500, 500]
dropout_rate: 0.5
priority: 3
streams:
inputs: reduced_answers
globals:
input_size: answer_word_embeddings_size
prediction_size: vocabulary_size_c4
# Viewers.
viewer:
type: StreamViewer
priority: 100.4
input_streams: answers, tokenized_answer_words, predicted_answers
#: pipeline