app/views/shared/results/_distances.html.erb
<%
ref_db = project
if job == :taxonomy && dataset.ref?
ref_db = nil
pm = project.miga
ref_path = pm.metadata[:ref_project] if pm
ref_db = Project.find_by(path: File.basename(ref_path)) if ref_path
end
db = {}
val = []
unless res.file_path(:ani_db).nil?
begin
db[:ani] = SQLite3::Database.new(res.file_path :ani_db)
val = db[:ani].execute('SELECT seq2, ani FROM ani WHERE seq2 != ? ' \
'GROUP BY seq2 ORDER BY ani DESC LIMIT 2', [dataset.name])
val_m = 'ANI'
rescue
db[:ani] = nil
end
end
begin
db[:aai] = SQLite3::Database.new(res.file_path :aai_db)
if val.empty?
val = db[:aai].execute('SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
'GROUP BY seq2 ORDER BY aai DESC LIMIT 2', [dataset.name])
val_m = 'AAI'
end
rescue
db[:aai] = nil
end
unless dataset.ref?
miga_p = dataset.is_a?(MiGA::Dataset) ?
dataset.project : dataset.project.miga
method = miga_p.is_clade? ? :subclades : :clade_finding
clades = miga_p.result(method)
unless clades.nil?
clades_t = DateTime.parse(clades.data[:created])
if DateTime.parse(res.data[:created]) < clades_t
%>
<div class="alert alert-info">
The database was updated <%= time_ago_in_words(clades_t) %> ago,
<%= link_to 'recalculate distances',
query_dataset_run_distances_url(dataset) %>.
</div>
<%
end
end
end
%>
<p>
<%
max_val = 0.0
max_val_ds = nil
%>
<% if not val.empty? and val.second %>
The closest relatives found by MiGA in the database were
<%= link_to_reference_dataset(ref_db, val.first.first) %>
(<%= val.first.second.round 2 %>% <%= val_m %>) and
<%= link_to_reference_dataset(ref_db, val.second.first) %>
(<%= val.second.second.round 2 %>% <%= val_m %>).
<% max_val = val.first.second %>
<% max_val_ds = ref_db.miga.dataset(val.first.first) %>
<% elsif not val.empty? and val.first %>
The closest relative found by MiGA in the database was
<%= link_to_reference_dataset(ref_db, val.first.first) %>
(<%= val.first.second.round 2 %>% <%= val_m %>).
<% max_val = val.first.second %>
<% max_val_ds = ref_db.miga.dataset(val.first.first) %>
<% else %>
MiGA couldn't find any close relatives registered in the database.
<% end %>
</p>
<% unless (dataset.ref? && job == :distances) || max_val_ds.nil? || project.miga.is_clade? %>
<h4>Taxonomic classification
<%= info_msg "Taxonomic classification of query datasets" do %>
<p>
The taxonomic classification of your dataset is inferred by the maximum
Average Amino Acid Identity (AAI) found against all the genomes in the
database. The p-value is estimated from the empirical distribution
observed in all the reference genomes of NCBI's RefSeq at each taxonomic
level, and indicates the probability of a different classification with
the observed AAI.
</p><p>
More formally, the p-value indicates the probability of
any two genomes not being classified in the same taxon at that taxonomic
rank, given that the AAI between them is greater than or equal to the one
observed. For example, a p-value of 0.01 for a genus classification
indicates that only 1% of all the pairs of genomes in RefSeq with AAI
above the observed value are from different genera; therefore the query
dataset most likely belongs to the same genus as the closest relative in
the database.
</p>
<% end %></h4>
<p><%= aai_intax(max_val, ref_db, max_val_ds) %></p>
<h4>Taxonomic novelty
<%= info_msg "Taxonomic novelty of query datasets" do %>
<p>
The taxonomic novelty of the query dataset is an analysis indicating the
taxonomic rank at which your dataset represents a novel taxon with respect
to the database. Note that the database does not contain genomes for all
described species, and the query dataset may belong to a previously
described taxon not represented in the database.
</p><p>
The taxonomic novelty is determined by the maximum Average Amino Acid
Identity (AAI) found against the genomes in the database. The p-value is
estimated from the empirical distribution observed in all the reference
genomes of NCBI's RefSeq at each taxonomic level, and indicates the
probability of the observed AAI between genomes in the same taxon.
</p><p>
More formally, the p-value indicates the probability of any two genomes
having an AAI lesser than or equal to the one observed, given that they
are classified in the same taxon at that taxonomic rank. For example, a
p-value of 0.01 for a novel family indicates that only 1% of all the
pairs of genomes in RefSeq classified in the same family have an AAI below
the observed value; therefore the query dataset most likely belongs to a
family not represented in the database.
</p>
<% end %></h4>
<p><%= aai_novel(max_val, ref_db) %></p>
<% end %>
<h4>Genome relatedness
<%= info_msg "Genome relatedness to database references" do %>
<p>
Average sequence identities to reference datasets in the database. Displays
only the top-50 values.
</p>
<p><b>ANI:</b>Average Nucleotide Identity</p>
<p><b>AAI:</b>Average Amino Acid Identity</p>
<% end %></h4>
<% if res.file_path(:ref_tree_pdf) %>
<p>
This dataset was placed in the context of some representative genomes from
the reference collection. See below the <b>ref tree</b> (in Newick format)
and <b>ref tree pdf</b> (in PDF).
</p>
<% end %>
<div class="panel-group" id=distances role=tablist
aria-multiselectable=true>
<% %w[ani aai].each do |metric| %>
<%
next if db[metric.to_sym].nil?
db_qry = db[metric.to_sym].
execute("SELECT seq2, #{metric}, sd, 100.0*n/omega " +
"FROM #{metric} WHERE seq2 != ? " +
"GROUP BY seq2 ORDER BY #{metric} DESC " +
"LIMIT 50", [dataset.name])
next if db_qry.empty?
%>
<div class="panel panel-default">
<div class=panel-heading role=tab id="<%= metric %>-h">
<h4 class=panel-title>
<a role=button data-toggle=collapse data-parent="#distances"
class=btn href="#<%= metric %>-b" aria-expanded=false
aria-controls="<%= metric %>-b">
<%= metric.upcase %> table
</a>
</h4>
</div>
<div id="<%= metric %>-b" class="panel-collapse collapse"
role=tabpanel aria-labelledby="<%= metric %>-h">
<div class="panel-body table-responsive">
<table class="table table-hover">
<thead><tr>
<th>Dataset</th>
<th><%= metric.upcase %> (%)</th>
<th>Standard deviation (<%= metric.upcase %>%)</th>
<th>Fraction of <%= metric == 'ani' ? 'genome' : 'proteins' %>
shared (%)</th>
</tr></thead>
<tbody>
<% db_qry.each do |row| %>
<tr>
<% row.each do |cell| %>
<td>
<% if cell.is_a?(Float) %>
<span rel=tooltip title="<%= cell %>"><%=
cell.round(2) %></span>
<% elsif cell.nil? or cell.empty? %>
<span class=text-muted>(estimated)</span>
<% else %>
<%= link_to_reference_dataset(ref_db, cell) %>
<% end %>
</td>
<% end %>
</tr>
<% end %>
</tbody>
</table>
</div>
</div>
</div>
<% end %>
</div>