test_ngi_config.yaml
# An edited copy of this file should be placed under $HOME/.ngiconfig/ngi_config.yaml
# or its path exported as the environment variable NGI_CONFIG
analysis:
workflows:
NGI:
analysis_engine: ngi_pipeline.engines.piper_ngi
mate_pair:
analysis_engine: ngi_pipeline.engines.de_novo_pipeline
RNA:
analysis_engine: ngi_pipeline.engines.bcbio_ngi
best_practice_analysis:
whole_genome_reseq:
analysis_engine: ngi_pipeline.engines.piper_ngi
IGN:
analysis_engine: ngi_pipeline.engines.piper_ngi
qc:
analysis_engine: ngi_pipeline.engines.qc_ngi
wgs_germline:
analysis_engine: ngi_pipeline.engines.sarek
exome_germline:
analysis_engine: ngi_pipeline.engines.sarek
# top_dir is the directory used by the pipeline as a starting point to build up the analysis run
# see ngi_pipeline/utils/filesystem.py for details.
# for nestor the default is /proj/a2014205/nobackup/NGI/analysis_ready
top_dir: /lupus/ngi/staging/wildwest/ngi2016001/nobackup/NGI
sthlm_root: ngi2016003
upps_root: ngi2016001
# for nestor it is simply /proj
base_root: /lupus/ngi/staging/wildwest
database:
# SQLite file to know what/where/how things are happening (state machine to back up Charon for network failure)
# -at nestor it is at /proj/a2014205/ngi_resources/record_tracking_database_nestor.sql
# -if you are trying to work without one, it is going to create an empty schema that is
# sqlite> .fullschema
# CREATE TABLE sampleanalysis (
# project_id VARCHAR(50) NOT NULL,
# project_name VARCHAR(50),
# project_base_path VARCHAR(100),
# sample_id VARCHAR(50) NOT NULL,
# workflow VARCHAR(50) NOT NULL,
# engine VARCHAR(50),
# analysis_dir VARCHAR(100),
# process_id INTEGER,
# slurm_job_id INTEGER,
# PRIMARY KEY (project_id, sample_id, workflow)
# );
# Compulsory to define: to make sure you are not overwriting the production version below, it is commented out,
# forcing you to edit the config file
record_tracking_db_path: /lupus/ngi/staging/wildwest/ngi2016001/private/db/record_tracking_database.sql
environment:
project_id: ngi2016001
# directory containing scripts like ngi_pipeline_start.py, print_running_jobs.py etc
# on nestor the production code at /proj/a2014205/software/ngi_pipeline/scripts
ngi_scripts_dir: /lupus/ngi/staging/latest/sw/ngi_pipeline/scripts
conda_env: ngi_pipeline
# Flowcell directories; the path string must contain the strings either /a2014205/ or /a2015179/
# see ngi_pipeline/ngi_pipeline/conductor/flowcell.py:setup_analysis_directory_structure() for details
# On nestor the default values should be /proj/a2014205/archive /proj/a2015179/archive
flowcell_inbox:
- /lupus/ngi/staging/wildwest/ngi2016001/incoming
- /lupus/ngi/staging/wildwest/ngi2016003/incoming
logging:
# the log file itself is compulsory to be defined, or you will get a nasty exception
# to make sure you are defining it, and not overwriting the production one below, it is left commented out
# default location is /proj/a2014205/ngi_resources/ngi_pipeline.log
log_file: /lupus/ngi/staging/wildwest/ngi2016001/private/log/ngi_pipeline.test.log
paths: # Hard code paths here if you are that kind of a person
binaries:
#bowtie2:
#fastqc:
#fastq_screen:
references:
#log: /base/to/proj/a2010002/data/log
#store_dir: /base/to/proj/a2010002/archive
piper:
# The engine we are usually runnig - further engines should be configured also like this.
# Also can be set as an environmental variable $PIPER_QSCRIPTS_DIR
# nestor default location is /proj/a2014205/software/piper/qscripts
path_to_piper_qscripts: /base/to/proj/piper/qscripts
load_modules:
- java/sun_jdk1.7.0_25
- R/2.15.0
threads: 16
job_walltime:
merge_process_variantcall: "10-00:00:00"
#sample:
# required_autosomal_coverage: 28.4
shell_jobrunner: Shell
#shell_jobrunner: ParallelShell --super_charge --ways_to_split 4
#jobNative:
# - arg1
# - arg2
# - arg3
sarek:
tag: 2.6
tools:
- haplotypecaller
- snpeff
genomes_base_paths:
GRCh37: /sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37/
GRCh38: /sw/data/uppnex/ToolBox/hg38bundle/
qc:
# These qc modules are related to pre-analysis QC runs
load_modules:
- bioinfo-tools
fastqc:
load_modules:
- FastQC
threads: 16
fastq_screen:
config_path: "/proj/a2014205/ngi_resources/fastq_screen.nestor.conf"
load_modules:
- bowtie2
- fastq_screen
subsample_reads: 200000
threads: 1
slurm:
extra_params:
"--qos": "seqver"
cores: 16
supported_genomes:
#"GRCh37": "/apus/data/uppnex/reference/Homo_sapiens/GRCh37/concat/Homo_sapiens.GRCh37.57.dna.concat.fa"
"GRCh37": /proj/a2014205/piper_references/gatk_bundle/2.8/b37/human_g1k_v37.fasta
"GRCm38": "/apus/data/uppnex/reference/Mus_musculus/GRCm38/concat/Mus_musculus.GRCm38.69.dna.concat.fa"
"rn4": None
"saccer2": None
"dm3": None
"tair9": None
"xentro2": None
"ws210": None
"canfam3": None
#project:
# INBOX: /proj/a2010002/archive
test_data:
workflows:
whole_genome_reseq:
test_project:
project_id: P0000
project_name: "Y.Mom_15_01"
bpa: "whole_genome_reseq"
customize_config:
analysis:
top_dir: /proj/a2014205/nobackup/NGI/test_data/
charon:
charon_base_url: http://charon-dev.scilifelab.se
local_files:
flowcell: /proj/a2014205/nobackup/NGI/test_data/whole_genome_reseq/150424_ST-E00214_0031_BH2WY7CCXX
#project: /proj/a2014205/nobackup/NGI/test_data/whole_genome_reseq/DATA/P0000
vcf: /proj/a2014205/nobackup/NGI/test_data/whole_genome_reseq/vcf/
mail:
# For testing you can change the default recipient here.
# By default (when this value is not set) mails are sent to
# "ngi_pipeline_operators@scilifelab.se" (see ngi_pipeline/utils/communication.py for details)
# If you do not want to have mails, use the "quiet" option like
# scripts/print_running_jobs.py -q
recipient: user@scilifelab.se