configs/vqa_med_2019/c4_classification/c4_classification_all_rnn_vgg16_ewm_size.yml
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/c4_classification/default_c4_classification.yml
pipeline:
global_publisher:
priority: 0
type: GlobalVariablePublisher
# Add input_size to globals.
keys: [question_encoder_output_size, image_encoder_output_size, element_wise_activation_size,image_size_encoder_input_size, image_size_encoder_output_size]
values: [100, 100, 100, 2, 10]
################# PIPE 0: question #################
# Questions encoding.
question_tokenizer:
priority: 1.1
type: SentenceTokenizer
streams:
inputs: questions
outputs: tokenized_questions
# Model 1: Embeddings
question_embeddings:
priority: 1.2
type: SentenceEmbeddings
embeddings_size: 100
pretrained_embeddings_file: glove.6B.100d.txt
data_folder: ~/data/vqa-med
word_mappings_file: questions.all.word.mappings.csv
streams:
inputs: tokenized_questions
outputs: embedded_questions
# Model 2: RNN
question_lstm:
priority: 1.3
type: RecurrentNeuralNetwork
cell_type: LSTM
prediction_mode: Last
use_logsoftmax: False
initial_state: Zero
hidden_size: 50
#dropout_rate: 0.5
streams:
inputs: embedded_questions
predictions: question_activations
globals:
input_size: embeddings_size
prediction_size: question_encoder_output_size
################# PIPE 2: image #################
# Image encoder.
image_encoder:
priority: 3.1
type: GenericImageEncoder
streams:
inputs: images
outputs: image_activations
globals:
output_size: image_encoder_output_size
################# PIPE 3: image-question fusion #################
# Element wise multiplication + FF.
question_image_fusion:
priority: 4.1
type: LowRankBilinearPooling
dropout_rate: 0.5
streams:
image_encodings: image_activations
question_encodings: question_activations
outputs: element_wise_activations
globals:
image_encoding_size: image_encoder_output_size
question_encoding_size: question_encoder_output_size
output_size: element_wise_activation_size
question_image_ffn:
priority: 4.2
type: FeedForwardNetwork
hidden_sizes: [100]
dropout_rate: 0.5
streams:
inputs: element_wise_activations
predictions: question_image_activations
globals:
input_size: element_wise_activation_size
prediction_size: element_wise_activation_size
################# PIPE 4: image-question-image size fusion + classification #################
# 2nd subpipeline: image size.
# Model - image size classifier.
image_size_encoder:
priority: 5.1
type: FeedForwardNetwork
streams:
inputs: image_sizes
predictions: image_size_activations
globals:
input_size: image_size_encoder_input_size
prediction_size: image_size_encoder_output_size
# 4th subpipeline: concatenation + FF.
concat:
priority: 5.2
type: ConcatenateTensor
input_streams: [question_image_activations,image_size_activations]
# ConcatenateTensor
dim: 1 # default
input_dims: [[-1,100],[-1,10]]
output_dims: [-1,110]
streams:
outputs: concatenated_activations
globals:
output_size: concatentated_activations_size
classifier:
priority: 5.3
type: FeedForwardNetwork
hidden_sizes: [500]
dropout_rate: 0.5
streams:
inputs: concatenated_activations
globals:
input_size: concatentated_activations_size
prediction_size: vocabulary_size_c4
#: pipeline