lib/genevalidator/validation_length_rank.rb
require 'forwardable'
require 'genevalidator/exceptions'
require 'genevalidator/ext/array'
require 'genevalidator/validation_report'
require 'genevalidator/validation_test'
module GeneValidator
##
# Class that stores the validation output information
class LengthRankValidationOutput < ValidationReport
attr_reader :msg
attr_reader :query_length
attr_reader :no_of_hits
attr_reader :median
attr_reader :mean
attr_reader :smallest_hit
attr_reader :largest_hit
attr_reader :extreme_hits
attr_reader :percentage
attr_reader :result
def initialize(short_header, header, description, msg, query_length,
no_of_hits, median, mean, smallest_hit, largest_hit,
extreme_hits, percentage)
@short_header = short_header
@header = header
@description = description
@msg = msg
@query_length = query_length
@no_of_hits = no_of_hits
@median = median
@mean = mean
@smallest_hit = smallest_hit
@largest_hit = largest_hit
@extreme_hits = extreme_hits
@percentage = percentage
@result = validation
@expected = :yes
@approach = 'If the query sequence is well conserved and similar' \
' sequences (BLAST hits) are correct, we can expect' \
' query and hit sequences to have similar lengths.'
@explanation = explain
@conclusion = conclude
end
def explain
diff = @query_length > @median ? 'longer' : 'shorter'
exp1 = "The query sequence is #{@query_length} amino-acids long. BLAST" \
" identified #{@no_of_hits} hit sequences with lengths from" \
" #{@smallest_hit} to #{@largest_hit} amino-acids (median:" \
" #{@median}; mean: #{@mean})."
exp2 = if @extreme_hits != 0
" #{@extreme_hits} of these hit sequences (i.e." \
" #{@percentage}%) are #{diff} than the query sequence."
else
" All hit sequences are #{diff} than the query sequence."
end
exp1 + exp2
end
def conclude
if @result == :yes
'There is no reason to believe there is any problem with the length' \
' of the query sequence.'
else
"The sequence may be #{@msg.gsub(' ', ' ')}."
end
end
def print
@msg.empty? ? "#{@percentage}%" : "#{@percentage}% (#{@msg})"
end
def validation
@msg.empty? ? :yes : :no
end
end
##
# This class contains the methods necessary for
# length validation by ranking the hit lengths
class LengthRankValidation < ValidationTest
extend Forwardable
def_delegators GeneValidator, :opt
THRESHOLD = 20
##
# Initializes the object
# Params:
# +prediction+: a +Sequence+ object representing the blast query
# +hits+: a vector of +Sequence+ objects (representing blast hits)
def initialize(prediction, hits)
super
@short_header = 'LengthRank'
@header = 'Length Rank'
@description = 'Check whether the rank of the prediction length lies' \
' among 80% of all the BLAST hit lengths.'
@cli_name = 'lenr'
end
##
# Calculates a percentage based on the rank of the prediction among the
# hit lengths
# Params:
# +hits+ (optional): a vector of +Sequence+ objects
# +prediction+ (optional): a +Sequence+ object
# Output:
# +LengthRankValidationOutput+ object
def run(hits = @hits, prediction = @prediction)
raise NotEnoughHitsError if hits.length < opt[:min_blast_hits]
raise unless prediction.is_a?(Query) && hits[0].is_a?(Query)
start = Time.now
hits_lengths = hits.map { |x| x.length_protein.to_i }
.sort { |a, b| a <=> b }
no_of_hits = hits_lengths.length
median = hits_lengths.median.round
query_length = prediction.length_protein
mean = hits_lengths.mean.round
smallest_hit = hits_lengths[0]
largest_hit = hits_lengths[-1]
if hits_lengths.standard_deviation <= 5
msg = ''
percentage = 100
else
if query_length < median
extreme_hits = hits_lengths.find_all { |x| x < query_length }.length
percentage = ((extreme_hits.to_f / no_of_hits) * 100).round
msg = 'too short'
else
extreme_hits = hits_lengths.find_all { |x| x > query_length }.length
percentage = ((extreme_hits.to_f / no_of_hits) * 100).round
msg = 'too long'
end
end
msg = '' if percentage >= THRESHOLD
@validation_report = LengthRankValidationOutput.new(@short_header,
@header, @description,
msg, query_length,
no_of_hits, median,
mean, smallest_hit,
largest_hit,
extreme_hits,
percentage)
@validation_report.run_time = Time.now - start
@validation_report
rescue NotEnoughHitsError
@validation_report = ValidationReport.new('Not enough evidence', :warning,
@short_header, @header,
@description)
rescue StandardError
@validation_report = ValidationReport.new('Unexpected error', :error,
@short_header, @header,
@description)
@validation_report.errors.push 'Unexpected Error'
end
end
end