configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/default_vqa_med_2019.yml
# Training parameters:
training:
task:
categories: C1,C2,C3
export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
sampler:
weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
# Validation parameters:
validation:
task:
categories: C1,C2,C3
pipeline:
################# PIPE 0: SHARED #################
# Add global variables.
global_publisher:
type: GlobalVariablePublisher
priority: 0
# Add input_size to globals.
keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c2_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix]
values: [100, 2, 10, 100, {"C1": 0}, {"C2": 0}, {"C3": 0}, {"BINARY": 0}]
# Statistics.
batch_size:
type: BatchSizeStatistics
priority: 0.1
# Questions encoding.
pipe1_question_tokenizer:
priority: 0.2
type: SentenceTokenizer
streams:
inputs: questions
outputs: tokenized_questions
################# PIPE 0: CATEGORY #################
# Model 1: question embeddings
pipe0_question_embeddings:
type: SentenceEmbeddings
priority: 0.3
# LOAD AND FREEZE #
load:
file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
model: question_embeddings
freeze: True
###################
embeddings_size: 50
pretrained_embeddings_file: glove.6B.50d.txt
data_folder: ~/data/vqa-med
word_mappings_file: questions.all.word.mappings.csv
streams:
inputs: tokenized_questions
outputs: pipe0_embedded_questions
# Model 2: question RNN
pipe0_lstm:
priority: 0.4
type: RecurrentNeuralNetwork
cell_type: LSTM
# LOAD AND FREEZE #
load:
file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
model: lstm
freeze: True
###################
prediction_mode: Last
initial_state: Trainable
use_logsoftmax: False
dropout_rate: 0.5
streams:
inputs: pipe0_embedded_questions
predictions: pipe0_questions_activations
globals:
input_size: embeddings_size
prediction_size: question_lstm_output_size
# Model 3: FFN question category
pipe0_classifier:
priority: 0.5
type: FeedForwardNetwork
# LOAD AND FREEZE #
load:
file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
model: classifier
freeze: True
###################
hidden: [50]
dropout_rate: 0.5
streams:
inputs: pipe0_questions_activations
predictions: pipe0_predicted_question_categories_preds
globals:
input_size: question_lstm_output_size # Set by global publisher
prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
pipe0_category_decoder:
priority: 0.6
type: WordDecoder
# Use the same word mappings as label indexer.
import_word_mappings_from_globals: True
streams:
inputs: pipe0_predicted_question_categories_preds
outputs: pipe0_predicted_question_categories_names
globals:
vocabulary_size: num_categories
word_mappings: category_word_mappings
pipe0_category_accuracy:
type: AccuracyStatistics
priority: 0.7
streams:
targets: category_ids
predictions: pipe0_predicted_question_categories_preds
statistics:
accuracy: categorization_accuracy
################# PIPE 1: SHARED QUESTION ENCODER #################
# Model 1: question embeddings
pipe1_question_embeddings:
type: SentenceEmbeddings
priority: 1.1
embeddings_size: 50
pretrained_embeddings_file: glove.6B.50d.txt
data_folder: ~/data/vqa-med
word_mappings_file: questions.all.word.mappings.csv
streams:
inputs: tokenized_questions
outputs: embedded_questions
# Model 2: question RNN
pipe1_lstm:
priority: 1.2
type: RecurrentNeuralNetwork
cell_type: LSTM
prediction_mode: Last
initial_state: Trainable
use_logsoftmax: False
dropout_rate: 0.5
streams:
inputs: embedded_questions
predictions: questions_activations
globals:
input_size: embeddings_size
prediction_size: question_lstm_output_size
# Answer encoding
pipe1_all_answer_indexer:
type: LabelIndexer
priority: 1.3
data_folder: ~/data/vqa-med
word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: all_answers_ids
globals:
#vocabulary_size: vocabulary_size_all_c1_c2_c3_binary
word_mappings: word_mappings_all_c1_c2_c3_binary
################# PIPE 2: SHARED IMAGE ENCODER #################
# Image encoder.
image_encoder:
type: GenericImageEncoder
priority: 2.1
streams:
inputs: images
outputs: image_activations
globals:
output_size: image_encoder_output_size
################# PIPE 3: SHARED IMAGE SIZE ENCODER #################
# Model - image size classifier.
image_size_encoder:
type: FeedForwardNetwork
priority: 3.1
streams:
inputs: image_sizes
predictions: image_size_activations
globals:
input_size: image_size_encoder_input_size
prediction_size: image_size_encoder_output_size
################# PIPE 4: SHARED CONCAT #################
concat:
type: ConcatenateTensor
priority: 4.1
input_streams: [questions_activations,image_activations,image_size_activations]
# ConcatenateTensor
dim: 1 # default
input_dims: [[-1,100],[-1,100],[-1,10]]
output_dims: [-1,210]
streams:
outputs: concatenated_activations
globals:
output_size: concatenated_activations_size
################# PIPE 5: C1 question #################
# Answer encoding for PIPE 5.
pipe5_c1_answer_indexer:
type: LabelIndexer
priority: 5.1
data_folder: ~/data/vqa-med
word_mappings_file: answers.c1_without_yn.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: pipe5_c1_answers_without_yn_ids
globals:
vocabulary_size: vocabulary_size_c1_without_yn
word_mappings: word_mappings_c1_without_yn
# Sample masking based on categories.
pipe5_c1_string_to_mask:
priority: 5.2
type: StringToMask
globals:
word_mappings: category_c1_word_to_ix
streams:
strings: pipe0_predicted_question_categories_names
string_indices: predicted_c1_question_categories_indices # NOT USED
masks: pipe5_c1_masks
# Model 4: FFN C1 answering
pipe5_c1_ffn:
priority: 5.3
type: FeedForwardNetwork
hidden: [50]
dropout_rate: 0.5
streams:
inputs: concatenated_activations
predictions: pipe5_c1_predictions
globals:
input_size: concatenated_activations_size
prediction_size: vocabulary_size_c1_without_yn
pipe5_c1_nllloss:
type: NLLLoss
priority: 5.4
targets_dim: 1
use_masking: True
streams:
predictions: pipe5_c1_predictions
masks: pipe5_c1_masks
targets: pipe5_c1_answers_without_yn_ids
loss: pipe5_c1_loss
pipe5_c1_precision_recall:
type: PrecisionRecallStatistics
priority: 5.5
use_word_mappings: True
use_masking: True
#show_class_scores: True
#show_confusion_matrix: True
streams:
masks: pipe5_c1_masks
predictions: pipe5_c1_predictions
targets: pipe5_c1_answers_without_yn_ids
globals:
word_mappings: word_mappings_c1_without_yn
#num_classes: vocabulary_size_c1_without_yn
statistics:
precision: pipe5_c1_precision
recall: pipe5_c1_recall
f1score: pipe5_c1_f1score
################# PIPE 6: C2 question #################
pipe6_c2_answer_indexer:
type: LabelIndexer
priority: 6.1
data_folder: ~/data/vqa-med
word_mappings_file: answers.c2.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: pipe6_c2_answers_ids
globals:
vocabulary_size: vocabulary_size_c2
word_mappings: word_mappings_c2
# Sample masking based on categories.
pipe6_c2_string_to_mask:
priority: 6.2
type: StringToMask
globals:
word_mappings: category_c2_word_to_ix
streams:
strings: pipe0_predicted_question_categories_names
string_indices: predicted_c2_question_categories_indices # NOT USED
masks: pipe6_c2_masks
pipe6_c2_ffn:
priority: 6.3
type: FeedForwardNetwork
hidden: [50]
dropout_rate: 0.5
streams:
inputs: concatenated_activations
predictions: pipe6_c2_predictions
globals:
input_size: concatenated_activations_size
prediction_size: vocabulary_size_c2
pipe6_c2_nllloss:
type: NLLLoss
priority: 6.4
targets_dim: 1
use_masking: True
streams:
predictions: pipe6_c2_predictions
masks: pipe6_c2_masks
targets: pipe6_c2_answers_ids
loss: pipe6_c2_loss
pipe6_c2_precision_recall:
type: PrecisionRecallStatistics
priority: 6.5
use_word_mappings: True
use_masking: True
#show_class_scores: True
#show_confusion_matrix: True
streams:
masks: pipe6_c2_masks
predictions: pipe6_c2_predictions
targets: pipe6_c2_answers_ids
globals:
word_mappings: word_mappings_c2
statistics:
precision: pipe6_c2_precision
recall: pipe6_c2_recall
f1score: pipe6_c2_f1score
################# PIPE 7: C3 question #################
# Answer encoding for PIPE 7.
pipe7_c3_answer_indexer:
type: LabelIndexer
priority: 7.1
data_folder: ~/data/vqa-med
word_mappings_file: answers.c3.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: pipe7_c3_answers_ids
globals:
vocabulary_size: vocabulary_size_c3
word_mappings: word_mappings_c3
# Sample masking based on categories.
pipe7_c3_string_to_mask:
priority: 7.2
type: StringToMask
globals:
word_mappings: category_c3_word_to_ix
streams:
strings: pipe0_predicted_question_categories_names
string_indices: predicted_c3_question_categories_indices # NOT USED
masks: pipe7_c3_masks
# Model 4: FFN C1 answering
pipe7_c3_ffn:
priority: 7.3
type: FeedForwardNetwork
hidden: [50]
dropout_rate: 0.5
streams:
inputs: concatenated_activations
predictions: pipe7_c3_predictions
globals:
input_size: concatenated_activations_size
prediction_size: vocabulary_size_c3
pipe7_c3_nllloss:
type: NLLLoss
priority: 7.4
targets_dim: 1
use_masking: True
streams:
predictions: pipe7_c3_predictions
masks: pipe7_c3_masks
targets: pipe7_c3_answers_ids
loss: pipe7_c3_loss
pipe7_c3_precision_recall:
type: PrecisionRecallStatistics
priority: 7.5
use_word_mappings: True
use_masking: True
#show_class_scores: True
#show_confusion_matrix: True
streams:
masks: pipe7_c3_masks
predictions: pipe7_c3_predictions
targets: pipe7_c3_answers_ids
globals:
word_mappings: word_mappings_c3
#num_classes: vocabulary_size_c3
statistics:
precision: pipe7_c3_precision
recall: pipe7_c3_recall
f1score: pipe7_c3_f1score
################# PIPE 8: BINARY question #################
# Answer encoding for pipe 8.
pipe8_binary_answer_indexer:
type: LabelIndexer
priority: 8.1
data_folder: ~/data/vqa-med
word_mappings_file: answers.binary_yn.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: pipe8_binary_answers_ids
globals:
vocabulary_size: vocabulary_size_binary_yn
word_mappings: word_mappings_binary_yn
pipe8_binary_string_to_mask:
priority: 8.2
type: StringToMask
globals:
word_mappings: category_binary_word_to_ix
streams:
strings: pipe0_predicted_question_categories_names
string_indices: predicted_binary_question_categories_indices # NOT USED
masks: pipe8_binary_masks
# Model 4: FFN C1 answering
pipe8_binary_ffn:
priority: 8.3
type: FeedForwardNetwork
hidden: [50]
dropout_rate: 0.5
streams:
inputs: concatenated_activations
predictions: pipe8_binary_predictions
globals:
input_size: concatenated_activations_size
prediction_size: vocabulary_size_binary_yn
pipe8_binary_nllloss:
type: NLLLoss
priority: 8.4
targets_dim: 1
use_masking: True
streams:
predictions: pipe8_binary_predictions
masks: pipe8_binary_masks
targets: pipe8_binary_answers_ids
loss: pipe8_binary_loss
pipe8_binary_precision_recall:
type: PrecisionRecallStatistics
priority: 8.5
use_word_mappings: True
use_masking: True
#show_class_scores: True
#show_confusion_matrix: True
streams:
masks: pipe8_binary_masks
predictions: pipe8_binary_predictions
targets: pipe8_binary_answers_ids
globals:
word_mappings: word_mappings_binary_yn
#num_classes: vocabulary_size_binary_yn
statistics:
precision: pipe8_binary_precision
recall: pipe8_binary_recall
f1score: pipe8_binary_f1score
################# PIPE 9: MERGE ANSWERS #################
# Merge predictions.
pipe9_merged_predictions:
type: JoinMaskedPredictions
priority: 9.1
# Names of used input streams.
input_prediction_streams: [pipe5_c1_predictions, pipe6_c2_predictions, pipe7_c3_predictions, pipe8_binary_predictions]
input_mask_streams: [pipe5_c1_masks, pipe6_c2_masks, pipe7_c3_masks, pipe8_binary_masks]
input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c2, word_mappings_c3, word_mappings_binary_yn]
globals:
output_word_mappings: word_mappings_all_c1_c2_c3_binary
streams:
output_strings: pipe9_merged_predictions
output_indices: pipe9_merged_pred_indices
# Statistics.
pipe9_merged_precision_recall:
type: PrecisionRecallStatistics
priority: 9.2
# Use prediction indices instead of distributions.
use_prediction_distributions: False
use_word_mappings: True
show_class_scores: True
show_confusion_matrix: True
globals:
word_mappings: word_mappings_all_c1_c2_c3_binary
streams:
targets: all_answers_ids
predictions: pipe9_merged_pred_indices
statistics:
precision: pipe9_merged_precision
recall: pipe9_merged_recall
f1score: pipe9_merged_f1score
# Viewers.
viewer:
type: StreamViewer
priority: 9.3
input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c2_masks,pipe6_c2_answers_ids,pipe6_c2_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions
#: pipeline