diff variant_effect_predictor/Bio/Location/Split.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Location/Split.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,579 @@
+# $Id: Split.pm,v 1.35 2002/12/28 03:26:32 lapp Exp $
+#
+# BioPerl module for Bio::Location::SplitLocation
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::Split - Implementation of a Location on a Sequence
+which has multiple locations (start/end points)
+
+=head1 SYNOPSIS
+
+    use Bio::Location::Split;
+
+    my $splitlocation = new Bio::Location::Split();
+    $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>1,
+							       -end=>30,
+							       -strand=>1));
+    $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>50,
+							       -end=>61,
+							       -strand=>1));   
+    my @sublocs = $splitlocation->sub_Location();
+
+    my $count = 1;
+    # print the start/end points of the sub locations
+    foreach my $location ( sort { $a->start <=> $b->start } 
+			   @sublocs ) {
+	printf "sub feature %d [%d..%d]\n", 
+	       $count, $location->start,$location->end, "\n";
+        $count++;
+    }
+
+=head1 DESCRIPTION
+
+This implementation handles locations which span more than one
+start/end location, or and/or lie on different sequences.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org             - General discussion
+  http://bio.perl.org/MailList.html - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason@bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::Split;
+use vars qw(@ISA @CORBALOCATIONOPERATOR);
+use strict;
+
+use Bio::Root::Root;
+use Bio::Location::SplitLocationI;
+use Bio::Location::Atomic;
+
+@ISA = qw(Bio::Location::Atomic Bio::Location::SplitLocationI );
+
+BEGIN { 
+    # as defined by BSANE 0.03
+    @CORBALOCATIONOPERATOR= ('NONE','JOIN', undef, 'ORDER');  
+}
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    # initialize
+    $self->{'_sublocations'} = [];
+    my ( $type, $seqid, $locations ) = 
+	$self->_rearrange([qw(SPLITTYPE
+                              SEQ_ID
+			      LOCATIONS
+                              )], @args);
+    if( defined $locations && ref($locations) =~ /array/i ) {
+	$self->add_sub_Location(@$locations);
+    }
+    $seqid  && $self->seq_id($seqid);
+    $type = lc ($type);    
+    $self->splittype($type || 'JOIN');
+    return $self;
+}
+
+=head2 each_Location
+
+ Title   : each_Location
+ Usage   : @locations = $locObject->each_Location($order);
+ Function: Conserved function call across Location:: modules - will
+           return an array containing the component Location(s) in
+           that object, regardless if the calling object is itself a
+           single location or one containing sublocations.
+ Returns : an array of Bio::LocationI implementing objects
+ Args    : Optional sort order to be passed to sub_Location()
+
+=cut
+
+sub each_Location {
+    my ($self, $order) = @_;
+    my @locs = ();
+    foreach my $subloc ($self->sub_Location($order)) {
+	# Recursively check to get hierarchical split locations:
+	push @locs, $subloc->each_Location($order);
+    }
+    return @locs;
+}
+
+=head2 sub_Location
+
+ Title   : sub_Location
+ Usage   : @sublocs = $splitloc->sub_Location();
+ Function: Returns the array of sublocations making up this compound (split)
+           location. Those sublocations referring to the same sequence as
+           the root split location will be sorted by start position (forward
+           sort) or end position (reverse sort) and come first (before
+           those on other sequences).
+
+           The sort order can be optionally specified or suppressed by the
+           value of the first argument. The default is no sort.
+
+ Returns : an array of Bio::LocationI implementing objects
+ Args    : Optionally 1, 0, or -1 for specifying a forward, no, or reverse
+           sort order
+
+=cut
+
+sub sub_Location {
+    my ($self, $order) = @_;
+    $order = 0 unless defined $order;
+    if( defined($order) && ($order !~ /^-?\d+$/) ) {
+	$self->throw("value $order passed in to sub_Location is $order, an invalid value");
+    } 
+    $order = 1 if($order > 1);
+    $order = -1 if($order < -1);
+
+    my @sublocs = defined $self->{'_sublocations'} ? @{$self->{'_sublocations'}} : ();
+
+    # return the array if no ordering requested
+    return @sublocs if( ($order == 0) || (! @sublocs) );
+    
+    # sort those locations that are on the same sequence as the top (`master')
+    # if the top seq is undefined, we take the first defined in a sublocation
+    my $seqid = $self->seq_id();
+    my $i = 0;
+    while((! defined($seqid)) && ($i <= $#sublocs)) {
+	$seqid = $sublocs[$i++]->seq_id();
+    }
+    if((! $self->seq_id()) && $seqid) {
+	$self->warn("sorted sublocation array requested but ".
+		    "root location doesn't define seq_id ".
+		    "(at least one sublocation does!)");
+    }
+    my @locs = ($seqid ?
+		grep { $_->seq_id() eq $seqid; } @sublocs :
+		@sublocs);
+    if(@locs) {
+      if($order == 1) {
+	  # Schwartzian transforms for performance boost	  
+	  @locs = map { $_->[0] }
+	  sort { (defined $a && defined $b) ? 
+		     $a->[1] <=> $b->[1] : $a ? -1 : 1 }
+	  map { [$_, $_->start] } @locs;
+
+      } else { # $order == -1
+	@locs = map {$_->[0]}
+	        sort { 
+		    (defined $a && defined $b) ? 
+			$b->[1] <=> $a->[1] : $a ? -1 : 1 }
+ 	        map { [$_, $_->end] } @locs;
+      }
+    }
+    # push the rest unsorted
+    if($seqid) {
+	push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs);
+    }
+    # done!
+    return @locs;
+}
+
+=head2 add_sub_Location
+
+ Title   : add_sub_Location
+ Usage   : $splitloc->add_sub_Location(@locationIobjs);
+ Function: add an additional sublocation
+ Returns : number of current sub locations
+ Args    : list of Bio::LocationI implementing object(s) to add
+
+=cut
+
+sub add_sub_Location {
+    my ($self,@args) = @_;
+    my @locs;    
+    foreach my $loc ( @args ) {
+	if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) {
+	    $self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!");
+	    next;
+	}	
+	push @{$self->{'_sublocations'}}, $loc;
+    }
+
+    return scalar @{$self->{'_sublocations'}};
+}
+
+=head2 splittype
+
+  Title   : splittype
+  Usage   : $splittype = $fuzzy->splittype();
+  Function: get/set the split splittype
+  Returns : the splittype of split feature (join, order)
+  Args    : splittype to set
+
+=cut
+
+sub splittype {
+    my ($self, $value) = @_;
+    if( defined $value || ! defined $self->{'_splittype'} ) {
+	$value = 'JOIN' unless( defined $value );
+	$self->{'_splittype'} = uc ($value);
+    }
+    return $self->{'_splittype'};
+}
+
+=head2 is_single_sequence
+
+  Title   : is_single_sequence
+  Usage   : if($splitloc->is_single_sequence()) {
+                print "Location object $splitloc is split ".
+                      "but only across a single sequence\n";
+	    }
+  Function: Determine whether this location is split across a single or
+            multiple sequences.
+
+            This implementation ignores (sub-)locations that do not define
+            seq_id(). The same holds true for the root location.
+
+  Returns : TRUE if all sublocations lie on the same sequence as the root
+            location (feature), and FALSE otherwise.
+  Args    : none
+
+=cut
+
+sub is_single_sequence {
+    my ($self) = @_;
+
+    my $seqid = $self->seq_id();
+    foreach my $loc ($self->sub_Location(0)) {
+	$seqid = $loc->seq_id() if(! $seqid);
+	if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) {
+	    return 0;
+	}
+    }
+    return 1;
+}
+
+=head1 LocationI methods
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $obj->strand($newval)
+ Function: For SplitLocations, setting the strand of the container
+           (this object) is a short-cut for setting the strand of all
+           sublocations.
+
+           In get-mode, checks if no sub-location is remote, and if
+           all have the same strand. If so, it returns that shared
+           strand value. Otherwise it returns undef.
+
+ Example : 
+ Returns : on get, value of strand if identical between sublocations 
+           (-1, 1, or undef)
+ Args    : new value (-1 or 1, optional)
+
+
+=cut
+
+sub strand{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'strand'} = $value;
+	# propagate to all sublocs
+	foreach my $loc ($self->sub_Location(0)) {
+	    $loc->strand($value) if ! $loc->is_remote();
+	}
+    } else {
+	my ($strand, $lstrand);
+	foreach my $loc ($self->sub_Location(0)) {
+	    # we give up upon any location that's remote or doesn't have
+	    # the strand specified, or has a differing one set than 
+	    # previously seen.
+	    # calling strand() is potentially expensive if the subloc is also
+	    # a split location, so we cache it
+	    $lstrand = $loc->strand();
+	    if((! $lstrand) ||
+	       ($strand && ($strand != $lstrand)) ||
+	       $loc->is_remote()) {
+		$strand = undef;
+		last;
+	    } elsif(! $strand) {
+		$strand = $lstrand;
+	    }
+	}
+	return $strand;
+    }
+}
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $location->start();
+  Function: get the starting point of the first (sorted) sublocation
+  Returns : integer
+  Args    : none
+
+=cut
+
+sub start {
+    my ($self,$value) = @_;    
+    if( defined $value ) {
+	$self->throw("Trying to set the starting point of a split location, that is not possible, try manipulating the sub Locations");
+    }
+    return $self->SUPER::start();
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $location->end();
+  Function: get the ending point of the last (sorted) sublocation
+  Returns : integer
+  Args    : none
+
+=cut
+
+sub end {
+    my ($self,$value) = @_;    
+    if( defined $value ) {
+	$self->throw("Trying to set the ending point of a split location, that is not possible, try manipulating the sub Locations");
+    }
+    return $self->SUPER::end();
+}
+
+=head2 min_start
+
+  Title   : min_start
+  Usage   : $min_start = $location->min_start();
+  Function: get the minimum starting point
+  Returns : the minimum starting point from the contained sublocations
+  Args    : none
+
+=cut
+
+sub min_start {
+    my ($self, $value) = @_;    
+
+    if( defined $value ) {
+	$self->throw("Trying to set the minimum starting point of a split location, that is not possible, try manipulating the sub Locations");
+    }
+    my @locs = $self->sub_Location(1);
+    return $locs[0]->min_start() if @locs; 
+    return undef;
+}
+
+=head2 max_start
+
+  Title   : max_start
+  Usage   : my $maxstart = $location->max_start();
+  Function: Get maximum starting location of feature startpoint  
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+sub max_start {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set the maximum starting point of a split location, that is not possible, try manipulating the sub Locations");
+    }
+    my @locs = $self->sub_Location(1);
+    return $locs[0]->max_start() if @locs; 
+    return undef;
+}
+
+=head2 start_pos_type
+
+  Title   : start_pos_type
+  Usage   : my $start_pos_type = $location->start_pos_type();
+  Function: Get start position type (ie <,>, ^) 
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+sub start_pos_type {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set the start_pos_type of a split location, that is not possible, try manipulating the sub Locations");
+    }
+    my @locs = $self->sub_Location();
+    return ( @locs ) ? $locs[0]->start_pos_type() : undef;    
+}
+
+=head2 min_end
+
+  Title   : min_end
+  Usage   : my $minend = $location->min_end();
+  Function: Get minimum ending location of feature endpoint 
+  Returns : integer or undef if no minimum ending point.
+  Args    : none
+
+=cut
+
+sub min_end {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set the minimum end point of a split location, that is not possible, try manipulating the sub Locations");
+    }
+    # reverse sort locations by largest ending to smallest ending
+    my @locs = $self->sub_Location(-1);
+    return $locs[0]->min_end() if @locs; 
+    return undef;
+}
+
+=head2 max_end
+
+  Title   : max_end
+  Usage   : my $maxend = $location->max_end();
+  Function: Get maximum ending location of feature endpoint 
+  Returns : integer or undef if no maximum ending point.
+  Args    : none
+
+=cut
+
+sub max_end {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set the maximum end point of a split location, that is not possible, try manipulating the sub Locations");
+    }
+    # reverse sort locations by largest ending to smallest ending
+    my @locs = $self->sub_Location(-1);
+    return $locs[0]->max_end() if @locs; 
+    return undef;
+}
+
+=head2 end_pos_type
+
+  Title   : end_pos_type
+  Usage   : my $end_pos_type = $location->end_pos_type();
+  Function: Get end position type (ie <,>, ^) 
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+sub end_pos_type {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set end_pos_type of a split location, that is not possible, try manipulating the sub Locations");
+    }
+    my @locs = $self->sub_Location();
+    return ( @locs ) ? $locs[0]->end_pos_type() : undef;    
+}
+
+
+=head2 seq_id
+
+  Title   : seq_id
+  Usage   : my $seqid = $location->seq_id();
+  Function: Get/Set seq_id that location refers to
+
+            We override this here in order to propagate to all sublocations
+            which are not remote (provided this root is not remote either)
+  Returns : seq_id
+  Args    : [optional] seq_id value to set
+
+
+=cut
+
+sub seq_id {
+    my ($self, $seqid) = @_;
+
+    if(! $self->is_remote()) {
+	foreach my $subloc ($self->sub_Location(0)) {
+	    $subloc->seq_id($seqid) if ! $subloc->is_remote();
+	}
+    }
+    return $self->SUPER::seq_id($seqid);
+}
+
+=head2 coordinate_policy
+
+  Title   : coordinate_policy
+  Usage   : $policy = $location->coordinate_policy();
+            $location->coordinate_policy($mypolicy); # set may not be possible
+  Function: Get the coordinate computing policy employed by this object.
+
+            See Bio::Location::CoordinatePolicyI for documentation about
+            the policy object and its use.
+
+            The interface *does not* require implementing classes to accept
+            setting of a different policy. The implementation provided here
+            does, however, allow to do so.
+
+            Implementors of this interface are expected to initialize every
+            new instance with a CoordinatePolicyI object. The implementation
+            provided here will return a default policy object if none has
+            been set yet. To change this default policy object call this
+            method as a class method with an appropriate argument. Note that
+            in this case only subsequently created Location objects will be
+            affected.
+
+  Returns : A Bio::Location::CoordinatePolicyI implementing object.
+  Args    : On set, a Bio::Location::CoordinatePolicyI implementing object.
+
+=head2 to_FTstring
+
+  Title   : to_FTstring
+  Usage   : my $locstr = $location->to_FTstring()
+  Function: returns the FeatureTable string of this location
+  Returns : string
+  Args    : none
+
+=cut
+
+sub to_FTstring {
+    my ($self) = @_;
+    my @strs;
+    foreach my $loc ( $self->sub_Location() ) {	
+	my $str = $loc->to_FTstring();
+	# we only append the remote seq_id if it hasn't been done already
+	# by the sub-location (which it should if it knows it's remote)
+	# (and of course only if it's necessary)
+	if( (! $loc->is_remote) &&
+	    defined($self->seq_id) && defined($loc->seq_id) &&
+	    ($loc->seq_id ne $self->seq_id) ) {
+	    $str = sprintf("%s:%s", $loc->seq_id, $str);
+	} 
+	push @strs, $str;
+    }    
+
+    my $str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs));
+    return $str;
+}
+
+# we'll probably need to override the RangeI methods since our locations will
+# not be contiguous.
+
+1;