Ensembl/ensembl-hive

View on GitHub
modules/Bio/EnsEMBL/Hive/Utils/Collection.pm

Summary

Maintainability
Test Coverage
=head1 LICENSE

Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2021] EMBL-European Bioinformatics Institute

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut

=pod

=head1 NAME

Bio::EnsEMBL::Hive::Utils::Collection - A collection object

=cut

package Bio::EnsEMBL::Hive::Utils::Collection;

use strict;
use warnings;


sub new {
    my $class = shift @_;

    my $self = bless {}, $class;

    $self->listref( shift @_ || [] );

    return $self;
}


sub listref {
    my $self = shift @_;

    if(@_) {
        $self->{'_listref'} = shift @_;
    }
    return $self->{'_listref'};
}


sub list {
    my $self = shift @_;

    return @{ $self->listref };
}


sub present {
    my $self        = shift @_;
    my $candidate   = shift @_;

    foreach my $element (@{ $self->listref }) {
        return 1 if($element eq $candidate);
    }
    return 0;
}


sub add {
    my $self = shift @_;

    push @{ $self->listref }, @_;
}


sub add_once {
    my $self        = shift @_;
    my $candidate   = shift @_;

    unless( $self->present( $candidate ) ) {
        $self->add( $candidate );
    }
}


sub forget {
    my $self        = shift @_;
    my $candidate   = shift @_;

    my $listref = $self->listref;

    for(my $i=scalar(@$listref)-1;$i>=0;$i--) {
        if($listref->[$i] eq $candidate) {
            splice @$listref, $i, 1;
        }
    }
}


sub find_one_by {
    my ($self, %method_to_filter_value) = @_;

    ELEMENT: foreach my $element (@{ $self->listref }) {
        keys %method_to_filter_value;   # sic! This is to "rewind" the each% operator to the beginning each time
        while(my ($filter_name, $filter_value) = each %method_to_filter_value) {
            my $actual_value = (ref($element) eq 'HASH') ? $element->{$filter_name} : $element->$filter_name();
            next ELEMENT unless( defined($actual_value)   # either both defined and equal or neither defined
                                    ? defined($filter_value) && ( (ref($filter_value) eq 'CODE')
                                                                    ? &$filter_value( $actual_value )
                                                                    : ( $filter_value eq $actual_value )
                                                                )
                                    : !defined($filter_value)
                               );
        }
        return $element;
    }

    return undef;   # have to be explicit here to avoid surprises
}


sub find_all_by {
    my ($self, %method_to_filter_value) = @_;

    my @filtered_elements = ();

    ELEMENT: foreach my $element (@{ $self->listref }) {
        keys %method_to_filter_value;   # sic! This is to "rewind" the each% operator to the beginning each time
        while(my ($filter_name, $filter_value) = each %method_to_filter_value) {
            my $actual_value = (ref($element) eq 'HASH') ? $element->{$filter_name} : $element->$filter_name();
            next ELEMENT unless( defined($actual_value)   # either both defined and equal or neither defined
                                    ? defined($filter_value) && ( (ref($filter_value) eq 'CODE')
                                                                    ? &$filter_value( $actual_value )
                                                                    : ( $filter_value eq $actual_value )
                                                                )
                                    : !defined($filter_value)
                               );
        }
        push @filtered_elements, $element;
    }

    return \@filtered_elements;
}


sub _find_all_by_subpattern {    # subpatterns can be combined into full patterns using +-,
    my ($self, $pattern) = @_;

    my $filtered_elements = [];
    $pattern //= '';

    if( $pattern=~/^\d+$/ ) {

        $filtered_elements = $self->find_all_by( 'dbID', $pattern );

    } elsif( $pattern=~/^(\d+)\.\.(\d+)$/ ) {

        $filtered_elements = $self->find_all_by( 'dbID', sub { return $1<=$_[0] && $_[0]<=$2; } );

    } elsif( $pattern=~/^(\d+)\.\.$/ ) {

        $filtered_elements = $self->find_all_by( 'dbID', sub { return $1<=$_[0]; } );

    } elsif( $pattern=~/^\.\.(\d+)$/ ) {

        $filtered_elements = $self->find_all_by( 'dbID', sub { return $_[0]<=$1; } );

    } elsif( $pattern=~/^\w+$/) {

        $filtered_elements = $self->find_all_by( 'name', $pattern );

    } elsif( $pattern=~/^[\w\%]+$/) {

        $pattern=~s/\%/.*/g;
        $filtered_elements = $self->find_all_by( 'name', sub { return $_[0]=~/^${pattern}$/; } );

    } elsif( $pattern=~/^(\w+)==(.*)$/) {

        $filtered_elements = $self->find_all_by( $1, $2 );

    } elsif( $pattern=~/^(\w+)!=(.*)$/) {

        $filtered_elements = $self->find_all_by( $1, sub { return $_[0] ne $2; } );

    } elsif( $pattern=~/^(\w+)<=(.*)$/) {       # NB: the order is important - all digraphs should be parsed before their proper prefixes

        $filtered_elements = $self->find_all_by( $1, sub { return $_[0] <= $2; } );

    } elsif( $pattern=~/^(\w+)>=(.*)$/) {

        $filtered_elements = $self->find_all_by( $1, sub { return $_[0] >= $2; } );

    } elsif( $pattern=~/^(\w+)<(.*)$/) {

        $filtered_elements = $self->find_all_by( $1, sub { return $_[0] < $2; } );

    } elsif( $pattern=~/^(\w+)>(.*)$/) {

        $filtered_elements = $self->find_all_by( $1, sub { return $_[0] > $2; } );
    }

    return $filtered_elements;
}


=head2 find_all_by_pattern

  Arg [1]    : (optional) string $pattern
  Example    : my $first_fifteen_analyses_and_two_more = $collection->find_all_by_pattern( '1..15,analysis_X,21' );
  Example    : my $two_open_ranges = $collection->>find_all_by_pattern( '..7,10..' );
  Example    : my $double_exclusion = $collection->find_all_by_pattern( '1..15-3..5+4' );
  Example    : my $blast_related_with_exceptions = $collection->find_all_by_pattern( 'blast%-12-%funnel' );
  Description: Filters an arrayref of non-repeating objects from the given collection by interpreting a pattern.
                The pattern can contain individual analyses_ids, individual logic_names,
                open and closed ranges of analysis_ids, wildcard patterns of logic_names,
                merges (+ or ,) and exclusions (-) of the above subsets.
  Exceptions : none
  Caller     : both beekeeper.pl (for scheduling) and runWorker.pl (for specialization)

=cut

sub find_all_by_pattern {
    my ($self, $pattern) = @_;

    if( not defined($pattern) ) {

        return [ $self->list ];

    } else {

        # By using the grouping, we ask Perl to return the pattern and their delimiters
        my @syll = split(/([+\-,])/, $pattern);

        my %uniq = map { ("$_" => $_) } @{ $self->_find_all_by_subpattern( shift @syll ) };   # initialize with the first syllable

        while(@syll) {
            my $operation   = shift @syll;  # by construction this is one of [+-,]
            my $subpattern  = shift @syll;  # can be an empty string

            foreach my $element (@{ $self->_find_all_by_subpattern( $subpattern ) }) {
                if($operation eq '-') {
                    delete $uniq{ "$element" };
                } else {
                    $uniq{ "$element" } = $element;
                }
            }
        }

        return [ values %uniq ];
    }
}


sub dark_collection {       # contain another collection of objects marked for deletion
    my $self = shift @_;

    if(@_) {
        $self->{'_dark_collection'} = shift @_;
    }
    return $self->{'_dark_collection'};
}


sub forget_and_mark_for_deletion {
    my $self        = shift @_;
    my $candidate   = shift @_;

    $self->forget( $candidate );

    unless( $self->dark_collection ) {
        $self->dark_collection( Bio::EnsEMBL::Hive::Utils::Collection->new );
    }
    $self->dark_collection->add( $candidate );
}


sub DESTROY {
    my $self = shift @_;

    $self->listref( [] );
    $self->dark_collection( undef );
}

1;