configs/vqa_med_2019/vf/c1_binary_vf_cat_rnn_shared_question_rnn_two_ffns_losses.yml
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/default_vqa_med_2019.yml
# Training parameters:
training:
task:
categories: C1
export_sample_weights: ~/data/vqa-med/answers.c1.weights.csv
sampler:
weights: ~/data/vqa-med/answers.c1.weights.csv
# Validation parameters:
validation:
task:
categories: C1
pipeline:
################# PIPE 0: SHARED #################
# Add global variables.
global_publisher:
type: GlobalVariablePublisher
priority: 0
# Add input_size to globals.
keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_binary_word_to_ix]
values: [100, 2, 10, 100, {"C1": 0}, {"BINARY": 0}]
# Statistics.
batch_size:
type: BatchSizeStatistics
priority: 0.1
################# PIPE 0: SHARED QUESTION #################
# Questions encoding.
pipe0_question_tokenizer:
priority: 0.2
type: SentenceTokenizer
streams:
inputs: questions
outputs: tokenized_questions
# Model 1: question embeddings
pipe0_question_embeddings:
type: SentenceEmbeddings
priority: 0.3
# LOAD AND FREEZE #
#load:
# file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
# model: question_embeddings
#freeze: True
###################
embeddings_size: 50
pretrained_embeddings_file: glove.6B.50d.txt
data_folder: ~/data/vqa-med
word_mappings_file: questions.all.word.mappings.csv
streams:
inputs: tokenized_questions
outputs: embedded_questions
# Model 2: question RNN
pipe0_lstm:
priority: 0.4
type: RecurrentNeuralNetwork
cell_type: LSTM
# LOAD AND FREEZE #
#load:
# file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
# model: lstm
#freeze: True
###################
prediction_mode: Last
initial_state: Trainable
use_logsoftmax: False
dropout_rate: 0.5
streams:
inputs: embedded_questions
predictions: lstm_activations_questions
globals:
input_size: embeddings_size
prediction_size: question_lstm_output_size
# Answer encoding
pipe0_all_answer_indexer:
type: LabelIndexer
priority: 0.6
data_folder: ~/data/vqa-med
word_mappings_file: answers.c1.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: all_answers_ids
globals:
vocabulary_size: vocabulary_size_all_c1_binasry
word_mappings: word_mappings_all_c1_binary
################# PIPE 1: CATEGORY #################
# Model 1: question embeddings
pipe1_question_embeddings:
type: SentenceEmbeddings
priority: 1.1
# LOAD AND FREEZE #
load:
file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
model: question_embeddings
freeze: True
###################
embeddings_size: 50
pretrained_embeddings_file: glove.6B.50d.txt
data_folder: ~/data/vqa-med
word_mappings_file: questions.all.word.mappings.csv
streams:
inputs: tokenized_questions
outputs: pipe1_embedded_questions
# Model 2: question RNN
pipe1_lstm:
priority: 1.2
type: RecurrentNeuralNetwork
cell_type: LSTM
# LOAD AND FREEZE #
load:
file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
model: lstm
freeze: True
###################
prediction_mode: Last
initial_state: Trainable
use_logsoftmax: False
dropout_rate: 0.5
streams:
inputs: pipe1_embedded_questions
predictions: pipe1_lstm_activations_questions
globals:
input_size: embeddings_size
prediction_size: question_lstm_output_size
# Model 3: FFN question category
pipe1_classifier:
priority: 1.3
type: FeedForwardNetwork
# LOAD AND FREEZE #
load:
file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
model: classifier
freeze: True
###################
hidden: [50]
dropout_rate: 0.5
streams:
inputs: pipe1_lstm_activations_questions
predictions: pipe1_predicted_question_categories_preds
globals:
input_size: question_lstm_output_size # Set by global publisher
prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK
pipe1_category_decoder:
priority: 1.4
type: WordDecoder
# Use the same word mappings as label indexer.
import_word_mappings_from_globals: True
streams:
inputs: pipe1_predicted_question_categories_preds
outputs: pipe1_predicted_question_categories_names
globals:
vocabulary_size: num_categories
word_mappings: category_word_mappings
pipe1_category_accuracy:
type: AccuracyStatistics
priority: 1.5
streams:
targets: category_ids
predictions: pipe1_predicted_question_categories_preds
statistics:
accuracy: categorization_accuracy
################# PIPE 2: C1 question #################
# Answer encoding for PIPE 2.
pipe2_c1_answer_indexer:
type: LabelIndexer
priority: 2.0
data_folder: ~/data/vqa-med
word_mappings_file: answers.c1_without_yn.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: pipe2_c1_answers_without_yn_ids
globals:
vocabulary_size: vocabulary_size_c1_without_yn
word_mappings: word_mappings_c1_without_yn
# Sample masking based on categories.
pipe2_c1_string_to_mask:
priority: 2.1
type: StringToMask
globals:
word_mappings: category_c1_word_to_ix
streams:
strings: pipe1_predicted_question_categories_names
string_indices: predicted_c1_question_categories_indices # NOT USED
masks: pipe2_c1_masks
# Model 4: FFN C1 answering
pipe2_c1_lstm:
priority: 2.2
type: FeedForwardNetwork
hidden: [50]
dropout_rate: 0.5
streams:
inputs: lstm_activations_questions
predictions: pipe2_c1_predictions
globals:
input_size: question_lstm_output_size # Set by global publisher
prediction_size: vocabulary_size_c1_without_yn
pipe2_c1_nllloss:
type: NLLLoss
priority: 2.3
targets_dim: 1
use_masking: True
streams:
predictions: pipe2_c1_predictions
masks: pipe2_c1_masks
targets: pipe2_c1_answers_without_yn_ids
loss: pipe2_c1_loss
pipe2_c1_precision_recall:
type: PrecisionRecallStatistics
priority: 2.4
use_word_mappings: True
use_masking: True
#show_class_scores: True
#show_confusion_matrix: True
streams:
masks: pipe2_c1_masks
predictions: pipe2_c1_predictions
targets: pipe2_c1_answers_without_yn_ids
globals:
word_mappings: word_mappings_c1_without_yn
num_classes: vocabulary_size_c1_without_yn
statistics:
precision: pipe2_c1_precision
recall: pipe2_c1_recall
f1score: pipe2_c1_f1score
################# PIPE 3: BINARY question #################
# Answer encoding for PIPE 3.
pipe3_binary_answer_indexer:
type: LabelIndexer
priority: 3.0
data_folder: ~/data/vqa-med
word_mappings_file: answers.binary_yn.word.mappings.csv
# Export mappings and size to globals.
export_word_mappings_to_globals: True
streams:
inputs: answers
outputs: pipe3_binary_answers_ids
globals:
vocabulary_size: vocabulary_size_binary_yn
word_mappings: word_mappings_binary_yn
pipe3_binary_string_to_mask:
priority: 3.1
type: StringToMask
globals:
word_mappings: category_binary_word_to_ix
streams:
strings: pipe1_predicted_question_categories_names
string_indices: predicted_binary_question_categories_indices # NOT USED
masks: pipe3_binary_masks
# Model 4: FFN C1 answering
pipe3_binary_lstm:
priority: 3.2
type: FeedForwardNetwork
hidden: [50]
dropout_rate: 0.5
streams:
inputs: lstm_activations_questions
predictions: pipe3_binary_predictions
globals:
input_size: question_lstm_output_size # Set by global publisher
prediction_size: vocabulary_size_binary_yn
pipe3_binary_nllloss:
type: NLLLoss
priority: 3.3
targets_dim: 1
use_masking: True
streams:
predictions: pipe3_binary_predictions
masks: pipe3_binary_masks
targets: pipe3_binary_answers_ids
loss: pipe3_binary_loss
pipe3_binary_precision_recall:
type: PrecisionRecallStatistics
priority: 3.4
use_word_mappings: True
use_masking: True
#show_class_scores: True
#show_confusion_matrix: True
streams:
masks: pipe3_binary_masks
predictions: pipe3_binary_predictions
targets: pipe3_binary_answers_ids
globals:
word_mappings: word_mappings_binary_yn
num_classes: vocabulary_size_binary_yn
statistics:
precision: pipe3_binary_precision
recall: pipe3_binary_recall
f1score: pipe3_binary_f1score
################# PIPE 4: MERGE ANSWERS #################
# Merge predictions
merged_predictions:
type: JoinMaskedPredictions
priority: 4.1
# Names of used input streams.
input_prediction_streams: [pipe2_c1_predictions, pipe3_binary_predictions]
input_mask_streams: [pipe2_c1_masks, pipe3_binary_masks]
input_word_mappings: [word_mappings_c1_without_yn, word_mappings_binary_yn]
globals:
output_word_mappings: word_mappings_all_c1_binary
streams:
output_strings: merged_predictions
output_indices: merged_pred_indices
# Statistics.
merged_precision_recall:
type: PrecisionRecallStatistics
priority: 4.2
# Use prediction indices instead of distributions.
use_prediction_distributions: False
use_word_mappings: True
show_class_scores: True
show_confusion_matrix: True
globals:
word_mappings: word_mappings_all_c1_binary
streams:
targets: all_answers_ids
predictions: merged_pred_indices
statistics:
precision: merged_precision
recall: merged_recall
f1score: merged_f1score
# Viewers.
viewer:
type: StreamViewer
priority: 4.3
input_streams: questions,answers, category_names,predicted_question_categories_names, pipe2_c1_masks,pipe2_c1_answers_without_yn_ids,pipe2_c1_predictions, pipe3_binary_masks,pipe3_binary_answers_ids,pipe3_binary_predictions, merged_predictions
#: pipeline