modules/Bio/EnsEMBL/Hive/RunnableDB/DbCmd.pm
=pod
=head1 NAME
Bio::EnsEMBL::Hive::RunnableDB::DbCmd
=head1 SYNOPSIS
standaloneJob.pl Bio::EnsEMBL::Hive::RunnableDB::DbCmd -db_conn mysql://ensro@ens-livemirror/ncbi_taxonomy -input_query "SELECT name FROM ncbi_taxa_name WHERE name_class = 'scientific name' AND taxon_id = 9606" --append '["-N"]' -output_file out.dat
=head1 DESCRIPTION
This RunnableDB module acts as a wrapper around a database connection. It interfaces with the database the same way as you would
on the command line (i.e. with redirections and / or pipes to other commands) but with hive parameters instead.
The database connection is created from the "data_dbc" parameter, if provided, or the current hive database.
=head1 CONFIGURATION EXAMPLE
#
# The following examples show how to configure SystemCmd in a PipeConfig module.
#
# The most common use-case is to apply a SQL script onto the current database
{ -logic_name => 'write_member_counts',
-module => 'Bio::EnsEMBL::Hive::RunnableDB::DbCmd',
-parameters => {
'input_file' => $self->o('ensembl_cvs_root_dir').'/ensembl-compara/scripts/production/populate_member_production_counts_table.sql',
},
-flow_into => [ 'notify_pipeline_completed' ],
},
# You can also use the advanced parameters to run a query on the
# database with the db_cmd.pl and pipe its output onto another command
# e.g. mysql -h... -u... curr_db_name -N -q -e 'select * from mcl_sparse_matrix' | #mcl_bin_dir#/mcxload -abc ...
{ -logic_name => 'mcxload_matrix',
-module => 'Bio::EnsEMBL::Hive::RunnableDB::DbCmd',
-parameters => {
'append' => [qw(-N -q)],
'input_query' => 'select * from mcl_sparse_matrix',
'command_out' => [qw(#mcl_bin_dir#/mcxload -abc - -ri max -o #work_dir#/#file_basename#.tcx -write-tab #work_dir#/#file_basename#.itab)],
},
-flow_into => [ 'mcl' ],
},
# Finally, you can run another executable (like mysqlimport) with its
# own parameters onto another database (specified by 'db_conn')
{ -logic_name => 'populate_method_links_from_file',
-module => 'Bio::EnsEMBL::Hive::RunnableDB::DbCmd',
-parameters => {
'db_conn' => '#rel_db#',
'method_link_dump_file' => $self->o('method_link_dump_file'),
'executable' => 'mysqlimport',
'append' => [ '#method_link_dump_file#' ],
},
-flow_into => [ 'load_all_genomedbs' ],
},
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2021] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
=head1 CONTACT
Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
=cut
package Bio::EnsEMBL::Hive::RunnableDB::DbCmd;
use strict;
use warnings;
use Bio::EnsEMBL::Hive::Utils qw(join_command_args);
# This runnable is simply a SystemCmd specialized for database commands
use base ('Bio::EnsEMBL::Hive::RunnableDB::SystemCmd');
sub param_defaults {
my $self = shift;
return {
%{$self->SUPER::param_defaults(@_)},
'executable' => undef,
'prepend' => [],
'append' => [],
'input_file' => undef,
'input_query' => undef,
'output_file' => undef,
'command_in' => undef,
'command_out' => undef,
}
}
=head2 fetch_input
Description : Implements fetch_input() interface method of Bio::EnsEMBL::Hive::Process that is used to read in parameters and load data.
Here it deals with finding the command line, doing parameter substitution and storing the result in a predefined place.
param('cmd'): The recommended way of passing in the command line. It can be either a string, or an array-ref of strings. The later is safer if some of the
arguments contain white-spaces.
param('*'): Any other parameters can be freely used for parameter substitution.
=cut
sub fetch_input {
my $self = shift;
# Validate the arguments
# There can be only 1 input
if (not $self->param('executable') and not ($self->param('input_file') or $self->param('input_query') or $self->param('command_in'))) {
die "No input defined (missing 'input_file', 'input_query' and 'command_in')\n";
} elsif (($self->param('input_file') and ($self->param('input_query') or $self->param('command_in'))) or ($self->param('input_query') and $self->param('command_in'))) {
die "Only 1 input ('input_file', 'input_query' and 'command_in') can be defined\n";
}
# And 1 output
if ($self->param('output_file') and $self->param('command_out')) {
die "'output_file' and 'command_out' cannot be set together\n";
}
# If there is any of those, system() will need a shell to deal with
# the pipes / redirections, and we need to hide the passwords
my $need_a_shell = ($self->param('input_file') or $self->param('command_in') or $self->param('output_file') or $self->param('command_out')) ? 1 : 0;
my @cmd = @{ $self->data_dbc->to_cmd(
$self->param('executable'),
[grep {defined $_} @{$self->param('prepend')}],
[grep {defined $_} @{$self->param('append')}],
$self->param('input_query'),
$need_a_shell,
) };
# Add the input data
my $prefix = '';
if ($self->param('input_file')) {
push @cmd, '<', $self->param('input_file');
} elsif ($self->param('input_query')) {
# the query as already been fed into @cmd by to_cmd()
} elsif ($self->param('command_in')) {
$prefix = (join_command_args($self->param('command_in')))[1] . ' | ';
}
# Add the output data
my $postfix = '';
if ($self->param('output_file')) {
push @cmd, '>', $self->param('output_file');
} elsif ($self->param('command_out')) {
$postfix = ' | ' . (join_command_args($self->param('command_out')))[1];
}
if ($need_a_shell) {
my ($join_needed, $flat_cmd) = join_command_args(\@cmd);
$flat_cmd =~ s/ '(-p\$EHIVE_TMP_PASSWORD_\d+)' / $1 /g;
$self->param('cmd', $prefix.$flat_cmd.$postfix);
} else {
$self->param('cmd', \@cmd);
}
}
1;