wurmlab/GeneValidator

View on GitHub
lib/genevalidator/hsp.rb

Summary

Maintainability
A
1 hr
Test Coverage
A
96%
require 'forwardable'

require 'genevalidator/blast'
require 'genevalidator/exceptions'

module GeneValidator
  # A class that initialises the BLAST tabular attributes
  class Hsp
    extend Forwardable
    def_delegators GeneValidator, :config

    attr_accessor :hit_from # ref. from the unaligned hit sequence
    attr_accessor :hit_to
    attr_accessor :match_query_from # ref. from the unaligned query sequence
    attr_accessor :match_query_to
    attr_accessor :query_reading_frame
    attr_accessor :hit_alignment
    attr_accessor :query_alignment
    attr_accessor :middles # conserved residues are with letters,
    # positive (mis)matches with +, mismatches and gaps are with space

    attr_accessor :bit_score
    attr_accessor :hsp_score
    attr_accessor :hsp_evalue
    attr_accessor :identity # number of conserved residues
    attr_accessor :pidentity # percentage of identical matches
    attr_accessor :positive # positive score for the (mis)match
    attr_accessor :gaps
    attr_accessor :align_len

    def initialize(input = {})
      @query_alignment = nil
      @hit_alignment   = nil
      init_xml_attributes(input[:xml_input]) if input[:xml_input]
      init_tabular_attribute(input[:tabular_input]) if input[:tabular_input]
    end

    def init_xml_attributes(hsp)
      @match_query_from    = hsp.query_from.to_i
      @match_query_to      = hsp.query_to.to_i
      @query_reading_frame = hsp.query_frame.to_i
      @hit_from            = hsp.hit_from.to_i
      @hit_to              = hsp.hit_to.to_i
      @query_alignment     = hsp.qseq.to_s
      @hit_alignment       = hsp.hseq.to_s
      @align_len           = hsp.align_len.to_i
      @pidentity           = (100 * hsp.identity / hsp.align_len.to_f).round(2)
      @identity            = hsp.identity.to_i
      @hsp_evalue          = format('%.0e', hsp.evalue)
      assert_seq_type(@hit_alignment) if @hit_alignment
      assert_seq_type(@query_alignment) if @query_alignment
      return unless config[:type] == :nucleotide
      @match_query_from = (@match_query_from / 3) + 1
      @match_query_to   = (@match_query_to / 3) + 1
    end

    ##
    # Initializes the corresponding attribute of the hsp
    # with respect to the column name of the tabular blast output
    # Params:
    # +column+: String with column name.
    # +value+: Value of the column
    def init_tabular_attribute(hash)
      @match_query_from    = hash['qstart'].to_i if hash['qstart']
      @match_query_to      = hash['qend'].to_i if hash['qend']
      @query_reading_frame = hash['qframe'].to_i if hash['qframe']
      @hit_from            = hash['sstart'].to_i if hash['sstart']
      @hit_to              = hash['send'].to_i if hash['send']
      @query_alignment     = hash['qseq'] if hash['qseq']
      @hit_alignment       = hash['sseq'] if hash['sseq']
      @align_len           = hash['length'].to_i if hash['length']
      @pidentity           = hash['pident'].to_f if hash['pident']
      @identity            = hash['nident'].to_f if hash['nident']
      @hsp_evalue          = hash['evalue'].to_f if hash['evalue']
      assert_seq_type(@query_alignment) if hash['sseq']
      assert_seq_type(@hit_alignment) if hash['sseq']
    end

    def assert_seq_type(query)
      seq_type = BlastUtils.guess_sequence_type(query)
      return if seq_type == :protein
      warn '*** A BLAST hit looks like a nucleotide sequence. Continuing' \
           ' with the analysis anyway.'
      warn '    Make sure that you used a protein database or the analysis' \
           ' results will be incorrect!'
    end
  end
end