analyzer/Snakefile
configfile: "config.yaml"
sample = config['sample']
host_transcripts = config['reference']
prot_db = config['prot_db']
host_index = f"{host_transcripts}.kidx"
num_threads = config['threads']
rule all:
input:
"data/diamond_out_1.outfmt102"
rule index:
input: host_transcripts
output: host_index
conda:
"envs/kallisto.yaml"
shell:
"kallisto index -i {output} {input}"
rule kallisto_quant:
input:
host_index
f"data/{sample}_1.fastq",
f"data/{sample}_2.fastq",
output:
directory("data/kallisto_out")
conda:
"envs/kallisto.yaml"
threads: num_threads
shell:
"kallisto quant "
"-i {input[0]} "
"-b 0 "
"-o {output[0]} "
"--pseudobam "
"{input[1]} {input[2]}"
rule filter_not_aligned:
input:
"data/kallisto_out"
output:
"data/unmapped_1.fq",
"data/unmapped_2.fq"
conda:
"envs/kallisto.yaml"
shell:
"samtools view -u -f 12 -F 256 {input}/pseudoalignments.bam "
"| bamToFastq -i /dev/stdin -fq {output[0]} -fq2 {output[1]}"
rule diamond_make:
input:
prot_db
output:
"reference/nr.dmnd"
conda:
"envs/diamond.yaml"
threads: num_threads
shell:
"diamond makedb --threads {threads} --in {input} -d {output} --taxonmap "
"reference/prot.accession2taxid.gz --taxonnodes reference/nodes.dmp"
rule run_diamond:
input:
"data/unmapped_1.fq",
"data/unmapped_2.fq",
"reference/nr.dmnd"
output:
"data/diamond_out_1.outfmt102",
"data/diamond_out_2.outfmt102"
conda:
"envs/diamond.yaml"
threads: num_threads
shell:
"""
diamond blastx --threads {threads} -d {input[2]} -q {input[0]} --outfmt 102 -o {output[0]}
diamond blastx --threads {threads} -d {input[2]} -q {input[1]} --outfmt 102 -o {output[1]}
"""