IBM/pytorchpipe

View on GitHub
configs/vqa_med_2019/extend_answers_c4.yml

Summary

Maintainability
Test Coverage
# This config is not a standalone config!
# It adds new sections (sets) without samplers and components for saving answers that we can use for getting final answers.

training_answers:
  task:
    type: &p_type VQAMED2019
    data_folder: &data_folder ~/data/vqa-med
    split: training
    categories: C4
    resize_image: &resize_image [224, 224]
    batch_size: 64
    # Appy all preprocessing/data augmentations.
    question_preprocessing: lowercase,remove_punctuation,tokenize
    streams: 
      questions: tokenized_questions
  dataloader:
    # No sampler, process samples in the same order.
    shuffle: false
    # Use 1 worker, so batches will follow the samples order.
    num_workers: 1

validation_answers:
  task:
    type: *p_type
    data_folder: *data_folder
    split: validation
    categories: C4
    resize_image: *resize_image     
    batch_size: 64
    # Appy all preprocessing/data augmentations.
    question_preprocessing: lowercase,remove_punctuation,tokenize
    streams: 
      questions: tokenized_questions
  dataloader:
    # No sampler, process samples in the same order.
    shuffle: false
    # Use 1 worker, so batches will follow the samples order.
    num_workers: 1


# Add component for exporting answers to files.
pipeline:
  disable: viewer,question_tokenizer
  # Viewers.
  viewer_extended:
    priority: 100.4
    type: StreamViewer
    sample_number: 0
    input_streams: 
      indices,image_ids,tokenized_questions,
      category_names,predicted_categories,
      answers,tokenized_answers,predicted_answers

  answer_exporter:
    priority: 100.5
    type: StreamFileExporter
    separator: '|'
    filename: 'answers.csv'
    export_separator_line_to_csv: True
    input_streams: 
      indices,image_ids,tokenized_questions,
      category_names,predicted_categories,
      answers,tokenized_answers,predicted_answers

  submission_exporter:
    priority: 100.6
    type: StreamFileExporter
    separator: '|'
    filename: 'submission.txt'
    input_streams: 
      image_ids,
      predicted_answers

#: pipeline