configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml from IBM/pytorchpipe

configs/vqa_med_2019/vf/c1_c2_c3_binary_vf_cat_rnn_shared_all_encoders_four_ffns_losses.yml
Summary

Maintainability

Test Coverage

Issues
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/default_vqa_med_2019.yml

# Training parameters:
training:
  task:
    categories: C1,C2,C3
    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
  sampler:
    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv

# Validation parameters:
validation:
  task:
    categories: C1,C2,C3


pipeline:
  
  ################# PIPE 0: SHARED #################

  # Add global variables.
  global_publisher:
    type: GlobalVariablePublisher
    priority: 0
    # Add input_size to globals.
    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c1_word_to_ix, category_c2_word_to_ix, category_c3_word_to_ix, category_binary_word_to_ix]
    values: [100, 2, 10, 100, {"C1": 0}, {"C2": 0}, {"C3": 0}, {"BINARY": 0}]

  # Statistics.
  batch_size:
    type: BatchSizeStatistics
    priority: 0.1

  # Questions encoding.
  pipe1_question_tokenizer:
    priority: 0.2
    type: SentenceTokenizer
    streams: 
      inputs: questions
      outputs: tokenized_questions

  ################# PIPE 0: CATEGORY #################

  # Model 1: question embeddings
  pipe0_question_embeddings:
    type: SentenceEmbeddings
    priority: 0.3
    # LOAD AND FREEZE #
    load: 
      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
      model: question_embeddings
    freeze: True
    ###################
    embeddings_size: 50
    pretrained_embeddings_file: glove.6B.50d.txt
    data_folder: ~/data/vqa-med
    word_mappings_file: questions.all.word.mappings.csv
    streams:
      inputs: tokenized_questions
      outputs: pipe0_embedded_questions      
  
  # Model 2: question RNN
  pipe0_lstm:
    priority: 0.4
    type: RecurrentNeuralNetwork
    cell_type: LSTM
    # LOAD AND FREEZE #
    load: 
      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
      model: lstm
    freeze: True
    ###################
    prediction_mode: Last
    initial_state: Trainable
    use_logsoftmax: False
    dropout_rate: 0.5
    streams:
      inputs: pipe0_embedded_questions
      predictions: pipe0_questions_activations
    globals:
      input_size: embeddings_size
      prediction_size: question_lstm_output_size

  # Model 3: FFN question category
  pipe0_classifier:
    priority: 0.5
    type: FeedForwardNetwork
    # LOAD AND FREEZE #
    load: 
      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
      model: classifier
    freeze: True
    ###################
    hidden: [50]
    dropout_rate: 0.5
    streams:
      inputs: pipe0_questions_activations
      predictions: pipe0_predicted_question_categories_preds
    globals:
      input_size: question_lstm_output_size # Set by global publisher
      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK

  pipe0_category_decoder:
    priority: 0.6
    type: WordDecoder
    # Use the same word mappings as label indexer.
    import_word_mappings_from_globals: True
    streams:
      inputs: pipe0_predicted_question_categories_preds
      outputs: pipe0_predicted_question_categories_names
    globals:
      vocabulary_size: num_categories
      word_mappings: category_word_mappings

  pipe0_category_accuracy:
    type: AccuracyStatistics
    priority: 0.7
    streams:
      targets: category_ids
      predictions: pipe0_predicted_question_categories_preds
    statistics:
      accuracy: categorization_accuracy
  


  ################# PIPE 1: SHARED QUESTION ENCODER #################

  # Model 1: question embeddings
  pipe1_question_embeddings:
    type: SentenceEmbeddings
    priority: 1.1
    embeddings_size: 50
    pretrained_embeddings_file: glove.6B.50d.txt
    data_folder: ~/data/vqa-med
    word_mappings_file: questions.all.word.mappings.csv
    streams:
      inputs: tokenized_questions
      outputs: embedded_questions      
  
  # Model 2: question RNN
  pipe1_lstm:
    priority: 1.2
    type: RecurrentNeuralNetwork
    cell_type: LSTM
    prediction_mode: Last
    initial_state: Trainable
    use_logsoftmax: False
    dropout_rate: 0.5
    streams:
      inputs: embedded_questions
      predictions: questions_activations
    globals:
      input_size: embeddings_size
      prediction_size: question_lstm_output_size

  # Answer encoding
  pipe1_all_answer_indexer:
    type: LabelIndexer
    priority: 1.3
    data_folder: ~/data/vqa-med
    word_mappings_file: answers.c1_c2_c3_binary_yn.word.mappings.csv
    # Export mappings and size to globals.
    export_word_mappings_to_globals: True
    streams:
      inputs: answers
      outputs: all_answers_ids
    globals:
      #vocabulary_size: vocabulary_size_all_c1_c2_c3_binary
      word_mappings: word_mappings_all_c1_c2_c3_binary

  ################# PIPE 2: SHARED IMAGE ENCODER #################

  # Image encoder.
  image_encoder:
    type: GenericImageEncoder
    priority: 2.1
    streams:
      inputs: images
      outputs: image_activations
    globals:
      output_size: image_encoder_output_size

  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################

  # Model - image size classifier.
  image_size_encoder:
    type: FeedForwardNetwork 
    priority: 3.1
    streams:
      inputs: image_sizes
      predictions: image_size_activations
    globals:
      input_size: image_size_encoder_input_size
      prediction_size: image_size_encoder_output_size

  ################# PIPE 4: SHARED CONCAT #################

  concat:
    type: ConcatenateTensor
    priority: 4.1
    input_streams: [questions_activations,image_activations,image_size_activations]
    # ConcatenateTensor 
    dim: 1 # default
    input_dims: [[-1,100],[-1,100],[-1,10]]
    output_dims: [-1,210]
    streams:
      outputs: concatenated_activations
    globals:
      output_size: concatenated_activations_size


  ################# PIPE 5: C1 question #################

  # Answer encoding for PIPE 5.
  pipe5_c1_answer_indexer:
    type: LabelIndexer
    priority: 5.1
    data_folder: ~/data/vqa-med
    word_mappings_file: answers.c1_without_yn.word.mappings.csv
    # Export mappings and size to globals.
    export_word_mappings_to_globals: True
    streams:
      inputs: answers
      outputs: pipe5_c1_answers_without_yn_ids
    globals:
      vocabulary_size: vocabulary_size_c1_without_yn
      word_mappings: word_mappings_c1_without_yn

  # Sample masking based on categories.
  pipe5_c1_string_to_mask:
    priority: 5.2
    type: StringToMask
    globals:
      word_mappings: category_c1_word_to_ix
    streams:
      strings: pipe0_predicted_question_categories_names
      string_indices: predicted_c1_question_categories_indices # NOT USED
      masks: pipe5_c1_masks

  # Model 4: FFN C1 answering
  pipe5_c1_ffn:
    priority: 5.3
    type: FeedForwardNetwork
    hidden: [50]
    dropout_rate: 0.5
    streams:
      inputs: concatenated_activations
      predictions: pipe5_c1_predictions
    globals:
      input_size: concatenated_activations_size
      prediction_size: vocabulary_size_c1_without_yn

  pipe5_c1_nllloss:
    type: NLLLoss
    priority: 5.4
    targets_dim: 1
    use_masking: True
    streams:
      predictions: pipe5_c1_predictions
      masks: pipe5_c1_masks
      targets: pipe5_c1_answers_without_yn_ids
      loss: pipe5_c1_loss

  pipe5_c1_precision_recall:
    type: PrecisionRecallStatistics
    priority: 5.5
    use_word_mappings: True
    use_masking: True
    #show_class_scores: True
    #show_confusion_matrix: True
    streams:
      masks: pipe5_c1_masks
      predictions: pipe5_c1_predictions
      targets: pipe5_c1_answers_without_yn_ids
    globals:
      word_mappings: word_mappings_c1_without_yn
      #num_classes: vocabulary_size_c1_without_yn
    statistics:
      precision: pipe5_c1_precision
      recall: pipe5_c1_recall
      f1score: pipe5_c1_f1score

  ################# PIPE 6: C2 question #################

  pipe6_c2_answer_indexer:
    type: LabelIndexer
    priority: 6.1
    data_folder: ~/data/vqa-med
    word_mappings_file: answers.c2.word.mappings.csv
    # Export mappings and size to globals.
    export_word_mappings_to_globals: True
    streams:
      inputs: answers
      outputs: pipe6_c2_answers_ids
    globals:
      vocabulary_size: vocabulary_size_c2
      word_mappings: word_mappings_c2

  # Sample masking based on categories.
  pipe6_c2_string_to_mask:
    priority: 6.2
    type: StringToMask
    globals:
      word_mappings: category_c2_word_to_ix
    streams:
      strings: pipe0_predicted_question_categories_names
      string_indices: predicted_c2_question_categories_indices # NOT USED
      masks: pipe6_c2_masks

  pipe6_c2_ffn:
    priority: 6.3
    type: FeedForwardNetwork
    hidden: [50]
    dropout_rate: 0.5
    streams:
      inputs: concatenated_activations
      predictions: pipe6_c2_predictions
    globals:
      input_size: concatenated_activations_size
      prediction_size: vocabulary_size_c2

  pipe6_c2_nllloss:
    type: NLLLoss
    priority: 6.4
    targets_dim: 1
    use_masking: True
    streams:
      predictions: pipe6_c2_predictions
      masks: pipe6_c2_masks
      targets: pipe6_c2_answers_ids
      loss: pipe6_c2_loss

  pipe6_c2_precision_recall:
    type: PrecisionRecallStatistics
    priority: 6.5
    use_word_mappings: True
    use_masking: True
    #show_class_scores: True
    #show_confusion_matrix: True
    streams:
      masks: pipe6_c2_masks
      predictions: pipe6_c2_predictions
      targets: pipe6_c2_answers_ids
    globals:
      word_mappings: word_mappings_c2
    statistics:
      precision: pipe6_c2_precision
      recall: pipe6_c2_recall
      f1score: pipe6_c2_f1score

  ################# PIPE 7: C3 question #################

  # Answer encoding for PIPE 7.
  pipe7_c3_answer_indexer:
    type: LabelIndexer
    priority: 7.1
    data_folder: ~/data/vqa-med
    word_mappings_file: answers.c3.word.mappings.csv
    # Export mappings and size to globals.
    export_word_mappings_to_globals: True
    streams:
      inputs: answers
      outputs: pipe7_c3_answers_ids
    globals:
      vocabulary_size: vocabulary_size_c3
      word_mappings: word_mappings_c3

  # Sample masking based on categories.
  pipe7_c3_string_to_mask:
    priority: 7.2
    type: StringToMask
    globals:
      word_mappings: category_c3_word_to_ix
    streams:
      strings: pipe0_predicted_question_categories_names
      string_indices: predicted_c3_question_categories_indices # NOT USED
      masks: pipe7_c3_masks

  # Model 4: FFN C1 answering
  pipe7_c3_ffn:
    priority: 7.3
    type: FeedForwardNetwork
    hidden: [50]
    dropout_rate: 0.5
    streams:
      inputs: concatenated_activations
      predictions: pipe7_c3_predictions
    globals:
      input_size: concatenated_activations_size
      prediction_size: vocabulary_size_c3

  pipe7_c3_nllloss:
    type: NLLLoss
    priority: 7.4
    targets_dim: 1
    use_masking: True
    streams:
      predictions: pipe7_c3_predictions
      masks: pipe7_c3_masks
      targets: pipe7_c3_answers_ids
      loss: pipe7_c3_loss

  pipe7_c3_precision_recall:
    type: PrecisionRecallStatistics
    priority: 7.5
    use_word_mappings: True
    use_masking: True
    #show_class_scores: True
    #show_confusion_matrix: True
    streams:
      masks: pipe7_c3_masks
      predictions: pipe7_c3_predictions
      targets: pipe7_c3_answers_ids
    globals:
      word_mappings: word_mappings_c3
      #num_classes: vocabulary_size_c3
    statistics:
      precision: pipe7_c3_precision
      recall: pipe7_c3_recall
      f1score: pipe7_c3_f1score

  ################# PIPE 8: BINARY question #################

  # Answer encoding for pipe 8.
  pipe8_binary_answer_indexer:
    type: LabelIndexer
    priority: 8.1
    data_folder: ~/data/vqa-med
    word_mappings_file: answers.binary_yn.word.mappings.csv
    # Export mappings and size to globals.
    export_word_mappings_to_globals: True
    streams:
      inputs: answers
      outputs: pipe8_binary_answers_ids
    globals:
      vocabulary_size: vocabulary_size_binary_yn
      word_mappings: word_mappings_binary_yn

  pipe8_binary_string_to_mask:
    priority: 8.2
    type: StringToMask
    globals:
      word_mappings: category_binary_word_to_ix
    streams:
      strings: pipe0_predicted_question_categories_names
      string_indices: predicted_binary_question_categories_indices # NOT USED
      masks: pipe8_binary_masks

  # Model 4: FFN C1 answering
  pipe8_binary_ffn:
    priority: 8.3
    type: FeedForwardNetwork
    hidden: [50]
    dropout_rate: 0.5
    streams:
      inputs: concatenated_activations
      predictions: pipe8_binary_predictions
    globals:
      input_size: concatenated_activations_size
      prediction_size: vocabulary_size_binary_yn

  pipe8_binary_nllloss:
    type: NLLLoss
    priority: 8.4
    targets_dim: 1
    use_masking: True
    streams:
      predictions: pipe8_binary_predictions
      masks: pipe8_binary_masks
      targets: pipe8_binary_answers_ids
      loss: pipe8_binary_loss

  pipe8_binary_precision_recall:
    type: PrecisionRecallStatistics
    priority: 8.5
    use_word_mappings: True
    use_masking: True
    #show_class_scores: True
    #show_confusion_matrix: True
    streams:
      masks: pipe8_binary_masks
      predictions: pipe8_binary_predictions
      targets: pipe8_binary_answers_ids
    globals:
      word_mappings: word_mappings_binary_yn
      #num_classes: vocabulary_size_binary_yn
    statistics:
      precision: pipe8_binary_precision
      recall: pipe8_binary_recall
      f1score: pipe8_binary_f1score

  ################# PIPE 9: MERGE ANSWERS #################

  # Merge predictions.
  pipe9_merged_predictions:
    type: JoinMaskedPredictions
    priority: 9.1
    # Names of used input streams.
    input_prediction_streams: [pipe5_c1_predictions, pipe6_c2_predictions, pipe7_c3_predictions, pipe8_binary_predictions]
    input_mask_streams: [pipe5_c1_masks, pipe6_c2_masks, pipe7_c3_masks, pipe8_binary_masks]
    input_word_mappings: [word_mappings_c1_without_yn, word_mappings_c2, word_mappings_c3, word_mappings_binary_yn]
    globals: 
      output_word_mappings: word_mappings_all_c1_c2_c3_binary
    streams:
      output_strings: pipe9_merged_predictions
      output_indices: pipe9_merged_pred_indices

  # Statistics.
  pipe9_merged_precision_recall:
    type: PrecisionRecallStatistics
    priority: 9.2
    # Use prediction indices instead of distributions.
    use_prediction_distributions: False
    use_word_mappings: True
    show_class_scores: True
    show_confusion_matrix: True
    globals:
      word_mappings: word_mappings_all_c1_c2_c3_binary
    streams:
      targets: all_answers_ids
      predictions: pipe9_merged_pred_indices
    statistics:
      precision: pipe9_merged_precision
      recall: pipe9_merged_recall
      f1score: pipe9_merged_f1score

  # Viewers.
  viewer:
    type: StreamViewer
    priority: 9.3
    input_streams: questions,answers, category_names,predicted_question_categories_names, pipe5_c1_masks,pipe5_c1_answers_without_yn_ids,pipe5_c1_predictions, pipe6_c2_masks,pipe6_c2_answers_ids,pipe6_c2_predictions, pipe7_c3_masks,pipe7_c3_answers_ids,pipe7_c3_predictions, pipe8_binary_masks,pipe8_binary_answers_ids,pipe8_binary_predictions, pipe9_merged_predictions


#: pipeline