IBM/pytorchpipe

View on GitHub
configs/vqa_med_2019/c3_classification/c3_classification_all_rnn_vgg16_concat.yml

Summary

Maintainability
Test Coverage
# Load config defining tasks for training, validation and testing.
default_configs: vqa_med_2019/c3_classification/default_c3_classification.yml

pipeline:

  global_publisher:
    type: GlobalVariablePublisher
    priority: 0
    # Add input_size to globals.
    keys: [question_embeddings_output_size, image_size_encoder_input_size, image_size_encoder_output_size, image_encoder_output_size]
    values: [100, 2, 10, 100]

  # First subpipeline: question.
  # Questions encoding.
  question_tokenizer:
    type: SentenceTokenizer
    priority: 1.1
    streams: 
      inputs: questions
      outputs: tokenized_questions

  # Model 1: Embeddings
  question_embeddings:
    type: SentenceEmbeddings
    priority: 1.2
    embeddings_size: 50
    pretrained_embeddings_file: glove.6B.50d.txt
    data_folder: ~/data/vqa-med
    word_mappings_file: questions.all.word.mappings.csv
    streams:
      inputs: tokenized_questions
      outputs: embedded_questions      
  
  # Model 2: RNN
  question_lstm:
    type: RecurrentNeuralNetwork
    cell_type: LSTM
    prediction_mode: Last
    priority: 1.3
    use_logsoftmax: False
    initial_state: Zero
    #num_layers: 5
    hidden_size: 50
    streams:
      inputs: embedded_questions
      predictions: question_activations
    globals:
      input_size: embeddings_size
      prediction_size: question_embeddings_output_size

  # 2nd subpipeline: image size.
  # Model - image size classifier.
  image_size_encoder:
    type: FeedForwardNetwork 
    priority: 2.1
    streams:
      inputs: image_sizes
      predictions: image_size_activations
    globals:
      input_size: image_size_encoder_input_size
      prediction_size: image_size_encoder_output_size

  # 3rd subpipeline: image.
  # Image encoder.
  image_encoder:
    type: GenericImageEncoder
    priority: 3.1
    streams:
      inputs: images
      outputs: image_activations
    globals:
      output_size: image_encoder_output_size
  
  # 4th subpipeline: concatenation + FF.
  concat:
    type: ConcatenateTensor
    priority: 4.1
    input_streams: [question_activations,image_size_activations,image_activations]
    # ConcatenateTensor 
    dim: 1 # default
    input_dims: [[-1,100],[-1,10],[-1,100]]
    output_dims: [-1,210]
    streams:
      outputs: concatenated_activations
    globals:
      output_size: output_size


  classifier:
    type: FeedForwardNetwork 
    hidden_sizes: [100]
    priority: 4.2
    streams:
      inputs: concatenated_activations
    globals:
      input_size: output_size
      prediction_size: vocabulary_size_c3


  #: pipeline