Ensembl/ensembl-hive

View on GitHub
modules/Bio/EnsEMBL/Hive/RunnableDB/SqlHealthcheck.pm

Summary

Maintainability
Test Coverage
=pod 

=head1 NAME

    Bio::EnsEMBL::Hive::RunnableDB::SqlHealthcheck

=head1 SYNOPSIS

    standaloneJob.pl Bio::EnsEMBL::Hive::RunnableDB::SqlHealthcheck \
                    -db_conn mysql://ensro@compara1/mm14_compara_homology_71 \
                    -description 'We expect at least 20,000 human genes' \
                    -query 'SELECT * FROM member WHERE genome_db_id = 90 AND source_name = "ENSEMBLGENE"' \
                    -expected_size '>= 20000'

=head1 DESCRIPTION

    This is a generic RunnableDB module for testing the size of the resultset of any SQL query.

    The query is passed by the parameter 'inputquery' (param substituted)
    The expected size is passed by the parameter 'expected_size' as a string "CONDITION VALUE" (CONDITION defaults to equality, VALUE defaults to 0).
    Currently, CONDITION is one of: = == < <= > >= <> !=

    TODO: implement a "expected_value" test

=head1 LICENSE

    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
    Copyright [2016-2021] EMBL-European Bioinformatics Institute

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

=head1 CONTACT

    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates

=cut


package Bio::EnsEMBL::Hive::RunnableDB::SqlHealthcheck;

use strict;
use warnings;

use base ('Bio::EnsEMBL::Hive::Process');


sub param_defaults {
    return {
        'description'   => '/no description/',
        'expected_size' => undef,
        'inputquery'    => undef,
    }
}



=head2 fetch_input

    Description : Implements fetch_input() interface method of Bio::EnsEMBL::Hive::Process that is used to read in parameters and load data.
                  
=cut

sub fetch_input {
    my $self = shift @_;

    $self->param('inputquery') && warn "'inputquery' is deprecated in SqlHealthcheck. Use 'query' instead\n";
    
    my $test = {
        description => $self->param('description'),
        query => $self->param('inputquery') || $self->param_required('query'),
        expected_size => $self->param('expected_size'),
    };

    $self->param('tests', [$test]);
    $self->_validate_tests;
}


=head2 _validate_tests

    Description : Checks that the tests are properly defined, and parses the "expected_size"

=cut

sub _validate_tests {
    my $self = shift @_;

    foreach my $test (@{$self->param('tests')}) {
        die "The SQL query must be provided" unless $test->{query};
        die "The description must be provided" unless $test->{description};
        $test->{subst_query} = $self->param_substitute($test->{query});
        my $expected_size = $self->param_substitute($test->{expected_size} || '');
        unless ($expected_size =~ /^\s*(=|==|>|>=|<|<=|<>|!=|)\s*(\d*)\s*$/) {
            die "Cannot interpret the 'expected_size' parameter: '$expected_size'";
        }
        $test->{logical_test} = $1 || '=';
        $test->{reference_size} = $2 || '0';
    }
}

=head2 run

    Description : Implements run() interface method of Bio::EnsEMBL::Hive::Process
                  Iterate through the tests and run them all. Report the failed tests at the end

=cut

sub run {
    my $self = shift @_;

    my @failures = ();
    foreach my $test (@{$self->param('tests')}) {
        push @failures, $test unless $self->_run_test($test);
    }
    if (@failures) {
        # Transient errors like lost access to the database, etc, tend to
        # make the job die in _run_test(). If we've passed this point, the
        # test geneuinely failed, and it will fail again anyway
        $self->input_job->transient_error(0);
        die "The following tests have failed:\n".join('', map {sprintf(" - %s\n   > %s\n", $_->{description}, $_->{subst_query})} @failures);
    }
}


=head2 _run_test

    Description : Runs a single test, defined in a hash with the following keys:
                   description, query, reference_size, logical_test

=cut

sub _run_test {
    my $self = shift @_;
    my $test = shift @_;

    my $description = $test->{description};
    my $query = $test->{subst_query};
    my $reference_size = $test->{reference_size};
    my $logical_test = $test->{logical_test};

    # Final semicolons are removed if present
    if ($query =~ /(;\s*$)/) {
        $query =~ s/$1//;
    }

    print "Test description: $description\n";
    print "Checking whether the number of rows $logical_test $reference_size\n";

    # This could benefit from 'switch' once we move to a more recent version of Perl
    my $maxrow = $reference_size;
    $maxrow++ if grep {$_ eq $logical_test} qw(= == > <= <> !=);

    $query .= " LIMIT $maxrow" unless $query =~ /LIMIT/i;
    print "Query: $query\n";

    my $sth_attribs = ($self->data_dbc->driver eq 'mysql') ? { 'mysql_use_result' => 1 } : {};

    my $sth = $self->data_dbc()->prepare($query, $sth_attribs);
    $sth->execute();

    my $nrow = 0;
    while (defined $sth->fetchrow_arrayref()) {
        $nrow++;
    }
    $sth->finish;

    print "$nrow rows returned".($nrow == $maxrow ? " (test aborted, there could be more rows)" : "")."\n";

    # This could benefit from 'switch' once we move to a more recent version of Perl
    my $success = 0;
    if ($logical_test eq '=' or $logical_test eq '==') {
        $success = 1 if $nrow == $reference_size;

    } elsif ($logical_test eq '<' or $logical_test eq '<=') {
        $success = 1 if $nrow < $maxrow;

    } elsif ($logical_test eq '>' or $logical_test eq '>=') {
        $success = 1 if $nrow >= $maxrow;

   } elsif ($logical_test eq '<>' or $logical_test eq '!=') {
        $success = 1 if $nrow != $reference_size;

    } else {
        die "This should not happen. A logical test is not checked";
    }
    warn $success ? "Success\n\n" : "Failure\n\n";
    return $success;
}


1;