Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/RangeI.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/RangeI.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,443 @@ +# $Id: RangeI.pm,v 1.30 2002/11/05 02:55:12 lapp Exp $ +# +# BioPerl module for Bio::RangeI +# +# Cared for by Lehvaslaiho <heikki@ebi.ac.uk> +# +# Copyright Matthew Pocock +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code + +=head1 NAME + +Bio::RangeI - Range interface + +=head1 SYNOPSIS + + #Do not run this module directly + +=head1 DESCRIPTION + +This provides a standard BioPerl range interface that should be +implemented by any object that wants to be treated as a range. This +serves purely as an abstract base class for implementers and can not +be instantiated. + +Ranges are modeled as having (start, end, length, strand). They use +Bio-coordinates - all points E<gt>= start and E<lt>= end are within the +range. End is always greater-than or equal-to start, and length is +greather than or equal to 1. The behaviour of a range is undefined if +ranges with negative numbers or zero are used. + +So, in summary: + + length = end - start + 1 + end >= start + strand = (-1 | 0 | +1) + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Heikki Lehvaslaiho + +Email: heikki@ebi.ac.uk + +=head1 CONTRIBUTORS + +Juha Muilu (muilu@ebi.ac.uk) + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + +package Bio::RangeI; + +use strict; +use Carp; +use Bio::Root::RootI; +use vars qw(@ISA); +use integer; +use vars qw( @ISA %STRAND_OPTIONS ); + +@ISA = qw( Bio::Root::RootI ); + +BEGIN { +# STRAND_OPTIONS contains the legal values for the strand options + %STRAND_OPTIONS = map { $_, '_'.$_ } + ( + 'strong', # ranges must have the same strand + 'weak', # ranges must have the same strand or no strand + 'ignore', # ignore strand information + ); +} + +# utility methods +# + +# returns true if strands are equal and non-zero +sub _strong { + my ($r1, $r2) = @_; + my ($s1, $s2) = ($r1->strand(), $r2->strand()); + + return 1 if $s1 != 0 && $s1 == $s2; +} + +# returns true if strands are equal or either is zero +sub _weak { + my ($r1, $r2) = @_; + my ($s1, $s2) = ($r1->strand(), $r2->strand()); + return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2; +} + +# returns true for any strandedness +sub _ignore { + return 1; +} + +# works out what test to use for the strictness and returns true/false +# e.g. $r1->_testStrand($r2, 'strong') +sub _testStrand() { + my ($r1, $r2, $comp) = @_; + return 1 unless $comp; + my $func = $STRAND_OPTIONS{$comp}; + return $r1->$func($r2); +} + +=head1 Abstract methods + +These methods must be implemented in all subclasses. + +=head2 start + + Title : start + Usage : $start = $range->start(); + Function: get/set the start of this range + Returns : the start of this range + Args : optionaly allows the start to be set + using $range->start($start) + +=cut + +sub start { + shift->throw_not_implemented(); +} + +=head2 end + + Title : end + Usage : $end = $range->end(); + Function: get/set the end of this range + Returns : the end of this range + Args : optionaly allows the end to be set + using $range->end($end) + +=cut + +sub end { + shift->throw_not_implemented(); +} + +=head2 length + + Title : length + Usage : $length = $range->length(); + Function: get/set the length of this range + Returns : the length of this range + Args : optionaly allows the length to be set + using $range->length($length) + +=cut + +sub length { + shift->throw_not_implemented(); +} + +=head2 strand + + Title : strand + Usage : $strand = $range->strand(); + Function: get/set the strand of this range + Returns : the strandidness (-1, 0, +1) + Args : optionaly allows the strand to be set + using $range->strand($strand) + +=cut + +sub strand { + shift->throw_not_implemented(); +} + +=head1 Boolean Methods + +These methods return true or false. They throw an error if start and +end are not defined. + + $range->overlaps($otherRange) && print "Ranges overlap\n"; + +=head2 overlaps + + Title : overlaps + Usage : if($r1->overlaps($r2)) { do stuff } + Function: tests if $r2 overlaps $r1 + Args : arg #1 = a range to compare this one to (mandatory) + arg #2 = strand option ('strong', 'weak', 'ignore') (optional) + Returns : true if the ranges overlap, false otherwise + +=cut + +sub overlaps { + my ($self, $other, $so) = @_; + + $self->throw("start is undefined") unless defined $self->start; + $self->throw("end is undefined") unless defined $self->end; + $self->throw("not a Bio::RangeI object") unless defined $other && + $other->isa('Bio::RangeI'); + $other->throw("start is undefined") unless defined $other->start; + $other->throw("end is undefined") unless defined $other->end; + + return + ($self->_testStrand($other, $so) + and not ( + ($self->start() > $other->end() or + $self->end() < $other->start() ) + )); +} + +=head2 contains + + Title : contains + Usage : if($r1->contains($r2) { do stuff } + Function: tests whether $r1 totally contains $r2 + Args : arg #1 = a range to compare this one to (mandatory) + alternatively, integer scalar to test + arg #2 = strand option ('strong', 'weak', 'ignore') (optional) + Returns : true if the argument is totaly contained within this range + +=cut + +sub contains { + my ($self, $other, $so) = @_; + $self->throw("start is undefined") unless defined $self->start; + $self->throw("end is undefined") unless defined $self->end; + + if(defined $other && ref $other) { # a range object? + $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); + $other->throw("start is undefined") unless defined $other->start; + $other->throw("end is undefined") unless defined $other->end; + + return ($self->_testStrand($other, $so) and + $other->start() >= $self->start() and + $other->end() <= $self->end()); + } else { # a scalar? + $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/; + return ($other >= $self->start() and $other <= $self->end()); + } +} + +=head2 equals + + Title : equals + Usage : if($r1->equals($r2)) + Function: test whether $r1 has the same start, end, length as $r2 + Args : a range to test for equality + Returns : true if they are describing the same range + +=cut + +sub equals { + my ($self, $other, $so) = @_; + + $self->throw("start is undefined") unless defined $self->start; + $self->throw("end is undefined") unless defined $self->end; + $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); + $other->throw("start is undefined") unless defined $other->start; + $other->throw("end is undefined") unless defined $other->end; + + return ($self->_testStrand($other, $so) and + $self->start() == $other->start() and + $self->end() == $other->end() ); +} + +=head1 Geometrical methods + +These methods do things to the geometry of ranges, and return +Bio::RangeI compliant objects or triplets (start, stop, strand) from +which new ranges could be built. + + +=head2 intersection + + Title : intersection + Usage : ($start, $stop, $strand) = $r1->intersection($r2) + Function: gives the range that is contained by both ranges + Args : arg #1 = a range to compare this one to (mandatory) + arg #2 = strand option ('strong', 'weak', 'ignore') (optional) + Returns : undef if they do not overlap, + or the range that they do overlap + (in an objectlike the calling one) + +=cut + +sub intersection { + my ($self, $other, $so) = @_; + return unless $self->_testStrand($other, $so); + + $self->throw("start is undefined") unless defined $self->start; + $self->throw("end is undefined") unless defined $self->end; + $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); + $other->throw("start is undefined") unless defined $other->start; + $other->throw("end is undefined") unless defined $other->end; + + my @start = sort {$a<=>$b} + ($self->start(), $other->start()); + my @end = sort {$a<=>$b} + ($self->end(), $other->end()); + + my $start = pop @start; + my $end = shift @end; + + my $union_strand; # Strand for the union range object. + + if($self->strand == $other->strand) { + $union_strand = $other->strand; + } else { + $union_strand = 0; + } + + if($start > $end) { + return undef; + } else { + return $self->new('-start' => $start, + '-end' => $end, + '-strand' => $union_strand + ); + #return ($start, $end, $union_strand); + } +} + +=head2 union + + Title : union + Usage : ($start, $stop, $strand) = $r1->union($r2); + : ($start, $stop, $strand) = Bio::RangeI->union(@ranges); + my $newrange = Bio::RangeI->union(@ranges); + Function: finds the minimal range that contains all of the ranges + Args : a range or list of ranges to find the union of + Returns : the range object containing all of the ranges + +=cut + +sub union { + my $self = shift; + my @ranges = @_; + if(ref $self) { + unshift @ranges, $self; + } + + my @start = sort {$a<=>$b} + map( { $_->start() } @ranges); + my @end = sort {$a<=>$b} + map( { $_->end() } @ranges); + + my $start = shift @start; + while( !defined $start ) { + $start = shift @start; + } + + my $end = pop @end; + + my $union_strand; # Strand for the union range object. + + foreach(@ranges) { + if(! defined $union_strand) { + $union_strand = $_->strand; + next; + } else { + if($union_strand ne $_->strand) { + $union_strand = 0; + last; + } + } + } + return undef unless $start or $end; + if( wantarray() ) { + return ( $start,$end,$union_strand); + } else { + return $self->new('-start' => $start, + '-end' => $end, + '-strand' => $union_strand + ); + } +} + +=head2 overlap_extent + + Title : overlap_extent + Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b) + Function: Provides actual amount of overlap between two different + ranges. + Example : + Returns : array of values for + - the amount unique to a + - the amount common to both + - the amount unique to b + Args : a range + + +=cut + +sub overlap_extent{ + my ($a,$b) = @_; + + $a->throw("start is undefined") unless defined $a->start; + $a->throw("end is undefined") unless defined $a->end; + $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI'); + $b->throw("start is undefined") unless defined $b->start; + $b->throw("end is undefined") unless defined $b->end; + + my ($au,$bu,$is,$ie); + if( ! $a->overlaps($b) ) { + return ($a->length,0,$b->length); + } + + if( $a->start < $b->start ) { + $au = $b->start - $a->start; + } else { + $bu = $a->start - $b->start; + } + + if( $a->end > $b->end ) { + $au += $a->end - $b->end; + } else { + $bu += $b->end - $a->end; + } + my $intersect = $a->intersection($b); + $ie = $intersect->end; + $is = $intersect->start; + + return ($au,$ie-$is+1,$bu); +} + +1;