cidc_schemas/pipeline_configs/rna_level1_analysis_config.yaml.j2
{#
participant_id_from_cimac: Callable[[str], str]
returns participant id from cimac id
data_bucket: str = GOOGLE_ACL_DATA_BUCKET
a name of a data_bucket where data files are expected to be available for the pipeline runner
samples: List[dict]
RNAseq assay records to be entered for this config
see schemas/assays/rna_assay-v0.json#properties/records/items
google_bucket_path: str
the path to a google bucket folder to store the results in
instance_name: str
the unique part of the instance name to use
NOTE: "rima-auto" will automatically be prepended to this string
#}
###############################################################################
# Automator configuration file
###############################################################################
# Give the new instance a unique name, e.g. the rima run name
# NOTE: "rima-auto" will automatically be prepended to this string
instance_name: {{ instance_name }}
# Define the number of cores for the rima instance
# Options- 32 (default), 64, 96
cores: 64
# Define the disk size to use in GB, default 250
# The name of the persistent disk will be: "rima_auto_{instance_name}_disk"
disk_size: 250
# Define the path to the google bucket path for the run
google_bucket_path: {{ google_bucket_path }}
# Define the specific rima commit string to use
rima_commit: "f5533ec"
serviceAcct: "biofxvm@cidc-biofx.iam.gserviceaccount.com"
# Define the specific rima GCP image to use
# NOTE: If a specific GCP image is not set via config['image'], then
# the default behavior is to get the latest rima image
image: 'rima-ver3-1'
# Define the rima reference snapshot to use
rima_ref_snapshot: 'rima-v27-reference-v1-0'
# Define the samples- each sample should have a name, e.g. SAMPLE1
# and a Google bucket path to the input file,
# e.g. gs://mybucket/data/sample1.fastq.gz
# Valid inputs: fastq, fastq.gz, bam
# NOTE: for PAIRED-END fastq/fastq.gz, give both pairs to the sample:
# SAMPLE_1_PE:
# - gs://mybucket/data/sample1_pair1.fastq
# - gs://mybucket/data/sample1_pair2.fastq
samples:
{% for sample in samples %}
{{ sample.cimac_id }}:
# either bam or both r1/r2
{% if sample.files.r1 %}
# r1
{% for fastq_gz_artifact in sample.files.r1 %}
- gs://{{data_bucket}}/{{fastq_gz_artifact.object_url}}
{% endfor %}
# r2
{% for fastq_gz_artifact in sample.files.r2 %}
- gs://{{data_bucket}}/{{fastq_gz_artifact.object_url}}
{% endfor %}
{% else %}
# bam
{% for bam_artifact in sample.files.bam %}
- gs://{{data_bucket}}/{{bam_artifact.object_url}}
{% endfor %}
{% endif %}
{% endfor %}
metasheet:
{% for sample in samples %}
{{ sample.cimac_id }}:
SampleName: {{ sample.cimac_id }}
PatName: {{ participant_id_from_cimac(sample.cimac_id) }}
{% endfor %}