bio-miga/miga-web

View on GitHub
app/views/shared/results/_distances.html.erb

Summary

Maintainability
Test Coverage
<%
  ref_db = project
  if job == :taxonomy && dataset.ref?
    ref_db = nil
    pm = project.miga
    ref_path = pm.metadata[:ref_project] if pm
    ref_db = Project.find_by(path: File.basename(ref_path)) if ref_path
  end
  db = {}
  val = []
  unless res.file_path(:ani_db).nil?
    begin
      db[:ani] = SQLite3::Database.new(res.file_path :ani_db)
      val = db[:ani].execute('SELECT seq2, ani FROM ani WHERE seq2 != ? ' \
        'GROUP BY seq2 ORDER BY ani DESC LIMIT 2', [dataset.name])
      val_m = 'ANI'
    rescue
      db[:ani] = nil
    end
  end
  begin
    db[:aai] = SQLite3::Database.new(res.file_path :aai_db)
    if val.empty?
      val = db[:aai].execute('SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
        'GROUP BY seq2 ORDER BY aai DESC LIMIT 2', [dataset.name])
      val_m = 'AAI'
    end
  rescue
    db[:aai] = nil
  end

  unless dataset.ref?
    miga_p = dataset.is_a?(MiGA::Dataset) ?
              dataset.project : dataset.project.miga
    method = miga_p.is_clade? ? :subclades : :clade_finding
    clades = miga_p.result(method)
    unless clades.nil?
      clades_t = DateTime.parse(clades.data[:created])
      if DateTime.parse(res.data[:created]) < clades_t
        %>
        <div class="alert alert-info">
          The database was updated <%= time_ago_in_words(clades_t) %> ago,
          <%= link_to 'recalculate distances',
                query_dataset_run_distances_url(dataset) %>.
        </div>
        <%
      end
    end
  end
%>
<p>
  <%
    max_val = 0.0
    max_val_ds = nil
  %>
  <% if not val.empty? and val.second %>
    The closest relatives found by MiGA in the database were
    <%= link_to_reference_dataset(ref_db, val.first.first) %>
    (<%= val.first.second.round 2 %>% <%= val_m %>) and
    <%= link_to_reference_dataset(ref_db, val.second.first) %>
    (<%= val.second.second.round 2 %>% <%= val_m %>).
    <% max_val = val.first.second %>
    <% max_val_ds = ref_db.miga.dataset(val.first.first) %>
  <% elsif not val.empty? and val.first %>
    The closest relative found by MiGA in the database was
    <%= link_to_reference_dataset(ref_db, val.first.first) %>
    (<%= val.first.second.round 2 %>% <%= val_m %>).
    <% max_val = val.first.second %>
    <% max_val_ds = ref_db.miga.dataset(val.first.first) %>
  <% else %>
    MiGA couldn't find any close relatives registered in the database.
  <% end %>
</p>
<% unless (dataset.ref? && job == :distances) || max_val_ds.nil? || project.miga.is_clade? %>
  <h4>Taxonomic classification
  <%= info_msg "Taxonomic classification of query datasets" do %>
    <p>
      The taxonomic classification of your dataset is inferred by the maximum
      Average Amino Acid Identity (AAI) found against all the genomes in the
      database. The p-value is estimated from the empirical distribution
      observed in all the reference genomes of NCBI's RefSeq at each taxonomic 
      level, and indicates the probability of a different classification with
      the observed AAI.
    </p><p>
      More formally, the p-value indicates the probability of
      any two genomes not being classified in the same taxon at that taxonomic
      rank, given that the AAI between them is greater than or equal to the one
      observed. For example, a p-value of 0.01 for a genus classification
      indicates that only 1% of all the pairs of genomes in RefSeq with AAI
      above the observed value are from different genera; therefore the query
      dataset most likely belongs to the same genus as the closest relative in
      the database.
    </p>
  <% end %></h4>
  <p><%= aai_intax(max_val, ref_db, max_val_ds) %></p>
  <h4>Taxonomic novelty
  <%= info_msg "Taxonomic novelty of query datasets" do %>
    <p>
      The taxonomic novelty of the query dataset is an analysis indicating the
      taxonomic rank at which your dataset represents a novel taxon with respect
      to the database. Note that the database does not contain genomes for all
      described species, and the query dataset may belong to a previously
      described taxon not represented in the database.
    </p><p>
      The taxonomic novelty is determined by the maximum Average Amino Acid
      Identity (AAI) found against the genomes in the database. The p-value is
      estimated from the empirical distribution observed in all the reference
      genomes of NCBI's RefSeq at each taxonomic level, and indicates the
      probability of the observed AAI between genomes in the same taxon.
    </p><p>
      More formally, the p-value indicates the probability of any two genomes
      having an AAI lesser than or equal to the one observed, given that they
      are classified in the same taxon at that taxonomic rank. For example, a
      p-value of 0.01 for a novel family indicates that only 1% of all the
      pairs of genomes in RefSeq classified in the same family have an AAI below
      the observed value; therefore the query dataset most likely belongs to a
      family not represented in the database.
    </p>
  <% end %></h4>
  <p><%= aai_novel(max_val, ref_db) %></p>
<% end %>
<h4>Genome relatedness
<%= info_msg "Genome relatedness to database references" do %>
  <p>
    Average sequence identities to reference datasets in the database. Displays
    only the top-50 values.
  </p>
  <p><b>ANI:</b>Average Nucleotide Identity</p>
  <p><b>AAI:</b>Average Amino Acid Identity</p>
<% end %></h4>
<% if res.file_path(:ref_tree_pdf) %>
  <p>
    This dataset was placed in the context of some representative genomes from
    the reference collection. See below the <b>ref tree</b> (in Newick format)
    and <b>ref tree pdf</b> (in PDF).
  </p>
<% end %>
<div class="panel-group" id=distances role=tablist
      aria-multiselectable=true>
<% %w[ani aai].each do |metric| %>
  <%
    next if db[metric.to_sym].nil?
    db_qry = db[metric.to_sym].
          execute("SELECT seq2, #{metric}, sd, 100.0*n/omega " +
            "FROM #{metric} WHERE seq2 != ? " +
            "GROUP BY seq2 ORDER BY #{metric} DESC " +
            "LIMIT 50", [dataset.name])
    next if db_qry.empty?
  %>
  <div class="panel panel-default">
    <div class=panel-heading role=tab id="<%= metric %>-h">
      <h4 class=panel-title>
        <a role=button data-toggle=collapse data-parent="#distances"
              class=btn href="#<%= metric %>-b" aria-expanded=false
              aria-controls="<%= metric %>-b">
          <%= metric.upcase %> table
        </a>
      </h4>
    </div>
    <div id="<%= metric %>-b" class="panel-collapse collapse"
          role=tabpanel aria-labelledby="<%= metric %>-h">
      <div class="panel-body table-responsive">
        <table class="table table-hover">
          <thead><tr>
            <th>Dataset</th>
            <th><%= metric.upcase %> (%)</th>
            <th>Standard deviation (<%= metric.upcase %>%)</th>
            <th>Fraction of <%= metric == 'ani' ? 'genome' : 'proteins' %>
              shared (%)</th>
          </tr></thead>
          <tbody>
          <% db_qry.each do |row| %>
            <tr>
              <% row.each do |cell| %>
                <td>
                <% if cell.is_a?(Float) %>
                  <span rel=tooltip title="<%= cell %>"><%=
                    cell.round(2) %></span>
                <% elsif cell.nil? or cell.empty? %>
                  <span class=text-muted>(estimated)</span>
                <% else %>
                  <%= link_to_reference_dataset(ref_db, cell) %>
                <% end %>
                </td>
              <% end %>
              </tr>
            <% end %>
          </tbody>
        </table>
      </div>
    </div>
  </div>
<% end %>
</div>