configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/c3_classification/default_c3_classification.yml
pipeline:
global_publisher:
type: GlobalVariablePublisher
priority: 0
# Add input_size to globals.
keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
values: [100, 2, 10, 100]
# First subpipeline: question.
# Questions encoding.
question_tokenizer:
type: SentenceTokenizer
priority: 1.1
streams:
inputs: questions
outputs: tokenized_questions
# Model 1: Embeddings
question_embeddings:
type: SentenceEmbeddings
priority: 1.2
embeddings_size: 50
pretrained_embeddings_file: glove.6B.50d.txt
data_folder: ~/data/vqa-med
word_mappings_file: questions.all.word.mappings.csv
streams:
inputs: tokenized_questions
outputs: embedded_questions
# Model 2: RNN
question_lstm:
type: RecurrentNeuralNetwork
cell_type: LSTM
prediction_mode: Last
priority: 1.3
use_logsoftmax: False
initial_state: Zero
#num_layers: 5
hidden_size: 50
streams:
inputs: embedded_questions
predictions: question_activations
globals:
input_size: embeddings_size
prediction_size: question_embeddings_output_size
# 2nd subpipeline: image size.
# Model - image size classifier.
image_size_encoder:
type: FeedForwardNetwork
priority: 2.1
streams:
inputs: image_sizes
predictions: image_size_activations
globals:
input_size: image_size_encoder_input_size
prediction_size: image_size_encoder_output_size
# 3rd subpipeline: image.
# Image encoder.
image_encoder:
type: GenericImageEncoder
priority: 3.1
streams:
inputs: images
outputs: image_activations
globals:
output_size: image_encoder_output_size
# 4th subpipeline: concatenation + FF.
concat:
type: ConcatenateTensor
priority: 4.1
input_streams: [question_activations,image_size_activations,image_activations]
# ConcatenateTensor
dim: 1 # default
input_dims: [[-1,100],[-1,10],[-1,100]]
output_dims: [-1,210]
streams:
outputs: concatenated_activations
globals:
output_size: output_size
classifier:
type: FeedForwardNetwork
hidden_sizes: [100]
priority: 4.2
streams:
inputs: concatenated_activations
globals:
input_size: output_size
prediction_size: vocabulary_size_c3
#: pipeline