bio-miga/miga

View on GitHub
lib/miga/dataset.rb

Summary

Maintainability
A
0 mins
Test Coverage
B
89%
# frozen_string_literal: true

# @package MiGA
# @license Artistic-2.0

require 'set'
require 'miga/metadata'
require 'miga/dataset/result'
require 'miga/dataset/status'
require 'miga/dataset/type'
require 'miga/dataset/hooks'

# This library is only required by +#closest_relatives+, so it is now
# being loaded on call instead to allow most of miga-base to work without
# issue in systems with problematic SQLite3 installations.
# require 'miga/sqlite'

##
# Dataset representation in MiGA
class MiGA::Dataset < MiGA::MiGA
  include MiGA::Dataset::Result
  include MiGA::Dataset::Status
  include MiGA::Dataset::Type
  include MiGA::Dataset::Hooks

  # Class-level
  class << self
    ##
    # Does the +project+ already have a dataset with that +name+?
    def exist?(project, name)
      project.dataset_names_set.include? name
    end

    ##
    # Standard fields of metadata for datasets
    def INFO_FIELDS
      %w[name created updated type ref user description comments]
    end
  end

  # Instance-level

  ##
  # MiGA::Project that contains the dataset
  attr_reader :project

  ##
  # Datasets are uniquely identified by +name+ in a project
  attr_reader :name

  ##
  # Create a MiGA::Dataset object in a +project+ MiGA::Project with a
  # uniquely identifying +name+. +is_ref+ indicates if the dataset is to
  # be treated as reference (true, default) or query (false). Pass any
  # additional +metadata+ as a Hash.
  def initialize(project, name, is_ref = true, metadata = {})
    name = name.to_s
    name.miga_name? or
      raise 'Invalid name, please use only alphanumerics and underscores: ' +
            name

    @project, @name, @metadata = project, name, nil
    metadata[:ref] = is_ref
    metadata[:type] ||= :empty
    metadata[:status] ||= 'incomplete'
    @metadata_future = [
      File.join(project.path, 'metadata', "#{name}.json"),
      metadata
    ]
    return if File.exist? @metadata_future[0]

    save
    pull_hook :on_create
  end

  ##
  # MiGA::Metadata with information about the dataset
  def metadata
    if @metadata.nil?
      @metadata = MiGA::Metadata.new(*@metadata_future)
      pull_hook :on_load
    end
    @metadata
  end

  ##
  # Save any changes you've made in the dataset
  def save
    MiGA.DEBUG "Dataset.save: #{name}"
    metadata.save
    pull_hook :on_save
  end

  ##
  # Forces a save even if nothing has changed in the metadata
  def save!
    MiGA.DEBUG "Dataset.save!: #{name}"
    metadata.save!
    pull_hook :on_save
  end

  ##
  # Delete the dataset with all it's contents (including results) and returns
  # nil
  def remove!
    results.each(&:remove!)
    metadata.remove!
    pull_hook :on_remove
  end

  ##
  # Inactivate a dataset. This halts automated processing by the daemon
  # 
  # If given, the +reason+ string is saved as a metadata +:warn+ entry
  def inactivate!(reason = nil)
    metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
    metadata[:inactive] = true
    metadata.save
    project.recalculate_tasks("Reference dataset inactivated: #{name}") if ref?
    pull_hook :on_inactivate
  end

  ##
  # Activate a dataset. This removes the +:inactive+ flag
  def activate!
    metadata[:inactive] = nil
    metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
    metadata.save
    project.recalculate_tasks("Reference dataset activated: #{name}") if ref?
    pull_hook :on_activate
  end

  ##
  # Get standard metadata values for the dataset as Array
  def info
    MiGA::Dataset.INFO_FIELDS.map do |k|
      k == 'name' ? name : metadata[k]
    end
  end

  ##
  # Is this dataset a reference?
  def ref?
    !query?
  end

  ##
  # Is this dataset a query (non-reference)?
  def query?
    !metadata[:ref]
  end

  ##
  # Is this dataset active?
  def active?
    metadata[:inactive].nil? || !metadata[:inactive]
  end

  ##
  # Same as +ref?+ for backwards-compatibility
  alias is_ref? ref?

  ##
  # Same as +query?+ for backwards-compatibility
  alias is_query? query?

  ##
  # Same as +multi?+ for backwards-compatibility
  alias is_multi? multi?

  ##
  # Same as +is_nonmulti?+ for backwards-compatibility
  alias is_nonmulti? nonmulti?

  ##
  # Same as +active?+ for backwards-compatibility
  alias is_active? active?

  ##
  # Returns an Array of +how_many+ duples (Arrays) sorted by AAI:
  # - +0+: A String with the name(s) of the reference dataset.
  # - +1+: A Float with the AAI.
  # This function is currently only supported for query datasets when
  # +ref_project+ is false (default), and only for reference dataset when
  # +ref_project+ is true. It returns +nil+ if this analysis is not supported.
  def closest_relatives(how_many = 1, ref_project = false)
    return nil if (ref? != ref_project) || multi?

    r = result(ref_project ? :taxonomy : :distances)
    return nil if r.nil?

    require 'miga/sqlite'
    MiGA::SQLite.new(r.file_path(:aai_db)).run(
      'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
      'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many]
    )
  end
end