configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml from IBM/pytorchpipe

configs/vqa_med_2019/vf/lstm_resnet152_is_cat_ffn_c123_no_binary_loss.yml
Summary

Maintainability

Test Coverage

Issues
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/default_vqa_med_2019.yml

# Training parameters:
training:
  task:
    categories: C1,C2,C3
    export_sample_weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv
    # Appy all preprocessing/data augmentations.
    question_preprocessing: lowercase,remove_punctuation,tokenize
    streams: 
      questions: tokenized_questions
  sampler:
    weights: ~/data/vqa-med/answers.c1_c2_c3_binary_yn.weights.csv

# Validation parameters:
validation:
  task:
    categories: C1,C2,C3
    # Appy all preprocessing/data augmentations.
    question_preprocessing: lowercase,remove_punctuation,tokenize
    streams: 
      questions: tokenized_questions


pipeline:
  
  ################# PIPE 0: SHARED #################

  # Add global variables.
  global_publisher:
    type: GlobalVariablePublisher
    priority: 0
    # Add input_size to globals.
    keys: [question_lstm_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size, category_c123_without_yn_word_to_ix]
    values: [100, 2, 10, 100, {"C1": 0, "C2": 1, "C3": 2}]

  # Statistics.
  batch_size:
    type: BatchSizeStatistics
    priority: 0.1

  ################# PIPE 0: CATEGORY #################

  # Model 1: question embeddings
  pipe0_question_embeddings:
    type: SentenceEmbeddings
    priority: 0.3
    # LOAD AND FREEZE #
    load: 
      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
      model: question_embeddings
    freeze: True
    ###################
    embeddings_size: 50
    pretrained_embeddings_file: glove.6B.50d.txt
    data_folder: ~/data/vqa-med
    word_mappings_file: questions.all.word.mappings.csv
    streams:
      inputs: tokenized_questions
      outputs: pipe0_embedded_questions      
  
  # Model 2: question RNN
  pipe0_lstm:
    priority: 0.4
    type: RecurrentNeuralNetwork
    cell_type: LSTM
    # LOAD AND FREEZE #
    load: 
      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
      model: lstm
    freeze: True
    ###################
    prediction_mode: Last
    initial_state: Trainable
    use_logsoftmax: False
    streams:
      inputs: pipe0_embedded_questions
      predictions: pipe0_questions_activations
    globals:
      input_size: embeddings_size
      prediction_size: question_lstm_output_size

  # Model 3: FFN question category
  pipe0_classifier:
    priority: 0.5
    type: FeedForwardNetwork
    # LOAD AND FREEZE #
    load: 
      file: ~/image-clef-2019/experiments/q_categorization/20190416_120801/checkpoints/vqa_med_question_categorization_rnn_ffn_best.pt
      model: classifier
    freeze: True
    ###################
    hidden: [50]
    dropout_rate: 0.5
    streams:
      inputs: pipe0_questions_activations
      predictions: pipe0_predicted_question_categories_preds
    globals:
      input_size: question_lstm_output_size # Set by global publisher
      prediction_size: num_categories # C1,C2,C3,C4, BINARY, UNK

  pipe0_category_decoder:
    priority: 0.6
    type: WordDecoder
    # Use the same word mappings as label indexer.
    import_word_mappings_from_globals: True
    streams:
      inputs: pipe0_predicted_question_categories_preds
      outputs: pipe0_predicted_question_categories_names
    globals:
      vocabulary_size: num_categories
      word_mappings: category_word_mappings

  pipe0_category_accuracy:
    type: AccuracyStatistics
    priority: 0.7
    streams:
      targets: category_ids
      predictions: pipe0_predicted_question_categories_preds
    statistics:
      accuracy: categorization_accuracy
  
  ################# PIPE 1: SHARED QUESTION ENCODER #################

  # Model 1: question embeddings
  pipe1_question_embeddings:
    type: SentenceEmbeddings
    priority: 1.1
    embeddings_size: 50
    pretrained_embeddings_file: glove.6B.50d.txt
    data_folder: ~/data/vqa-med
    word_mappings_file: questions.all.word.mappings.csv
    streams:
      inputs: tokenized_questions
      outputs: embedded_questions      
  
  # Model 2: question RNN
  pipe1_lstm:
    priority: 1.2
    type: RecurrentNeuralNetwork
    cell_type: LSTM
    prediction_mode: Last
    initial_state: Trainable
    use_logsoftmax: False
    streams:
      inputs: embedded_questions
      predictions: questions_activations
    globals:
      input_size: embeddings_size
      prediction_size: question_lstm_output_size

  # Answer encoding
  pipe1_all_answer_indexer:
    type: LabelIndexer
    priority: 1.3
    data_folder: ~/data/vqa-med
    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
    # Export mappings and size to globals.
    export_word_mappings_to_globals: True
    streams:
      inputs: answers
      outputs: all_answers_ids
    globals:
      vocabulary_size: vocabulary_size_c123_without_yn
      word_mappings: word_mappings_c123_without_yn

  ################# PIPE 2: SHARED IMAGE ENCODER #################

  # Image encoder.
  image_encoder:
    type: GenericImageEncoder
    model: resnet152
    priority: 2.1
    streams:
      inputs: images
      outputs: image_activations
    globals:
      output_size: image_encoder_output_size

  ################# PIPE 3: SHARED IMAGE SIZE ENCODER #################

  # Model - image size classifier.
  image_size_encoder:
    type: FeedForwardNetwork 
    priority: 3.1
    use_losfotmax: False
    streams:
      inputs: image_sizes
      predictions: image_size_activations
    globals:
      input_size: image_size_encoder_input_size
      prediction_size: image_size_encoder_output_size

  ################# PIPE 4: SHARED CONCAT #################

  concat:
    type: ConcatenateTensor
    priority: 4.1
    input_streams: [questions_activations,image_activations,image_size_activations]
    # ConcatenateTensor 
    dim: 1 # default
    input_dims: [[-1,100],[-1,100],[-1,10]]
    output_dims: [-1,210]
    streams:
      outputs: concatenated_activations
    globals:
      output_size: concatenated_activations_size


  ################# PIPE 5: C1 + C2 + C3 questions #################

  # Answer encoding for PIPE 5.
  pipe5_c123_without_yn_answer_indexer:
    type: LabelIndexer
    priority: 5.1
    data_folder: ~/data/vqa-med
    word_mappings_file: answers.c1_c2_c3_without_yn.word.mappings.csv
    # Export mappings and size to globals.
    export_word_mappings_to_globals: True
    streams:
      inputs: answers
      outputs: pipe5_c123_without_yn_answers_ids
    globals:
      vocabulary_size: vocabulary_size_c123_without_yn
      word_mappings: word_mappings_c123_without_yn

  # Sample masking based on categories.
  pipe5_c123_without_yn_string_to_mask:
    priority: 5.2
    type: StringToMask
    globals:
      word_mappings: category_c123_without_yn_word_to_ix
    streams:
      strings: pipe0_predicted_question_categories_names
      string_indices: predicted_c123_by_question_categories_indices # NOT USED
      masks: pipe5_c123_without_yn_masks

  # Model 4: FFN C1 answering
  pipe5_c123_without_yn_ffn:
    priority: 5.3
    type: FeedForwardNetwork
    hidden: [100]
    dropout_rate: 0.5
    streams:
      inputs: concatenated_activations
      predictions: pipe5_c123_without_yn_predictions
    globals:
      input_size: concatenated_activations_size
      prediction_size: vocabulary_size_c123_without_yn

  pipe5_c123_without_yn_nllloss:
    type: NLLLoss
    priority: 5.4
    targets_dim: 1
    use_masking: True
    streams:
      predictions: pipe5_c123_without_yn_predictions
      masks: pipe5_c123_without_yn_masks
      targets: pipe5_c123_without_yn_answers_ids
      loss: pipe5_c123_without_yn_loss

  pipe5_c123_without_yn_precision_recall:
    type: PrecisionRecallStatistics
    priority: 5.5
    use_word_mappings: True
    use_masking: True
    show_class_scores: True
    #show_confusion_matrix: True
    streams:
      masks: pipe5_c123_without_yn_masks
      predictions: pipe5_c123_without_yn_predictions
      targets: pipe5_c123_without_yn_answers_ids
    globals:
      word_mappings: word_mappings_c123_without_yn
    statistics:
      precision: pipe5_c123_without_yn_precision
      recall: pipe5_c123_without_yn_recall
      f1score: pipe5_c123_without_yn_f1score

  # C123 Predictions decoder.
  pipe5_prediction_decoder:
    type: WordDecoder
    priority: 5.6
    # Use the same word mappings as label indexer.
    import_word_mappings_from_globals: True
    streams:
      inputs: pipe5_c123_without_yn_predictions
      outputs: predicted_answers
    globals:
      word_mappings: word_mappings_c123_without_yn

  ################# PIPE 9: MERGE ANSWERS #################


  # Viewers.
  viewer:
    type: StreamViewer
    priority: 9.3
    input_streams:
      tokenized_questions, category_names,
      pipe0_predicted_question_categories_names,
      pipe5_c123_without_yn_masks,
      answers, predicted_answers


#: pipeline