lib/miga/project/base.rb
# frozen_string_literal: true
require 'miga/common/with_option'
class MiGA::Project < MiGA::MiGA
include MiGA::Common::WithOption
class << self
##
# Does the project at +path+ exist?
def exist?(path)
Dir.exist?(path) and File.exist?("#{path}/miga.project.json")
end
##
# Load the project at +path+. Returns MiGA::Project if project exists, nil
# otherwise.
def load(path)
return nil unless exist? path
new path
end
def INCLADE_TASKS
@@INCLADE_TASKS
end
def DISTANCE_TASKS
@@DISTANCE_TASKS
end
def KNOWN_TYPES
@@KNOWN_TYPES
end
def RESULT_DIRS
@@RESULT_DIRS
end
def OPTIONS
@@OPTIONS
end
end
end
module MiGA::Project::Base
##
# Top-level folders inside a project
@@FOLDERS = %w[data metadata daemon]
##
# Folders for results
@@DATA_FOLDERS = %w[
01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
05.assembly 06.cds
07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
07.annotation/01.function/01.essential
07.annotation/01.function/02.ssu
07.annotation/02.taxonomy/01.mytaxa
07.annotation/03.qa 07.annotation/03.qa/02.mytaxa_scan
08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
09.distances 09.distances/01.haai 09.distances/02.aai
09.distances/03.ani 09.distances/04.ssu 09.distances/05.taxonomy
10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
90.stats
]
##
# Directories containing the results from project-wide tasks
@@RESULT_DIRS = {
project_stats: '90.stats',
# Distances
haai_distances: '09.distances/01.haai',
aai_distances: '09.distances/02.aai',
ani_distances: '09.distances/03.ani',
# ssu_distances: '09.distances/04.ssu',
# Clade identification
clade_finding: '10.clades/01.find',
# Clade analysis
subclades: '10.clades/02.ani',
ogs: '10.clades/03.ogs'
# ess_phylogeny: '10.clades/04.phylogeny/01.essential',
# core_phylogeny: '10.clades/04.phylogeny/02.core',
# clade_metadata: '10.clades/05.metadata'
}
##
# Supported types of projects
@@KNOWN_TYPES = {
mixed: {
description: 'Mixed collection of genomes, metagenomes, and viromes',
single: true, multi: true, markers: true
},
genomes: {
description: 'Collection of genomes',
single: true, multi: false, markers: true
},
clade: {
description: 'Collection of closely-related genomes (ANI >= 90%)',
single: true, multi: false, markers: true
},
metagenomes: {
description: 'Collection of metagenomes and/or viromes',
single: false, multi: true, markers: true
},
plasmids: {
description: 'Collection of plasmids',
single: true, multi: false, markers: false
}
}
##
# Project-wide distance estimations
@@DISTANCE_TASKS = %i[
project_stats haai_distances aai_distances ani_distances
clade_finding
]
##
# Project-wide tasks for :clade projects
@@INCLADE_TASKS = %i[subclades ogs]
##
# Options supported by projects
@@OPTIONS = {
ref_project: {
desc: 'Project with reference taxonomy', type: String
},
db_proj_dir: {
desc: 'Directory containing database projects', type: String
},
tax_pvalue: {
desc: 'Maximum p-value to transfer taxonomy', default: 0.1, type: Float,
in: 0.0..1.0
},
haai_p: {
desc: 'Value of aai.rb -p on hAAI', type: String,
default: proc { |project|
project.clade? || !project.markers? ? 'no' : 'fastaai'
},
in: %w[blast+ blast blat diamond fastaai no]
},
aai_p: {
desc: 'Value of aai.rb -p on AAI', default: 'diamond', type: String,
in: %w[blast+ blast blat diamond]
},
ani_p: {
desc: 'Value of ani.rb -p on ANI', default: 'fastani', type: String,
in: %w[blast+ blast blat fastani]
},
max_try: {
desc: 'Maximum number of task attempts', default: 10, type: Integer,
in: (0..1000)
},
aai_save_rbm: {
desc: 'Should RBMs be saved for OGS analysis?',
default: proc { |project| project.clade? },
in: [true, false]
},
ogs_identity: {
desc: 'Min RBM identity for OGS', default: 80.0, type: Float,
in: (0.0..100.0)
},
clean_ogs: {
desc: 'If false, keeps ABC files (clades only)', default: true,
in: [true, false]
},
run_clades: {
desc: 'Should clades be estimated from distances?', default: true,
in: [true, false]
},
gsp_ani: {
desc: 'ANI limit to propose gsp clades', default: 95.0, type: Float,
in: (0.0..100.0)
},
gsp_aai: {
desc: 'AAI limit to propose gsp clades', default: 90.0, type: Float,
in: (0.0..100.0)
},
gsp_metric: {
desc: 'Metric to propose clades', default: 'ani', type: String,
in: %w[ani aai]
},
ess_coll: {
desc: 'Collection of essential genes to use', default: 'dupont_2012',
type: String, in: %w[dupont_2012 lee_2019]
},
min_qual: {
desc: 'Minimum genome quality', default: 25.0, type: Float,
in: -Float::INFINITY..100.0, tokens: %w[no]
},
distances_checkpoint: {
desc: 'Number of comparisons before storing data', default: 10,
type: Integer, in: 1...Float::INFINITY
}
}
end