Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Coordinate/Pair.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Coordinate/Pair.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,448 @@ +# $Id: Pair.pm,v 1.9.2.1 2003/02/20 05:11:45 heikki Exp $ +# +# bioperl module for Bio::Coordinate::Pair +# +# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> +# +# Copyright Heikki Lehvaslaiho +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Coordinate::Pair - Continuous match between two coordinate sets + +=head1 SYNOPSIS + + use Bio::Location::Simple; + use Bio::Coordinate::Pair; + + my $match1 = Bio::Location::Simple->new + (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 ); + my $match2 = Bio::Location::Simple->new + (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 ); + my $pair = Bio::Coordinate::Pair->new(-in => $match1, + -out => $match2 + ); + # location to match + $pos = Bio::Location::Simple->new + (-start => 25, -end => 25, -strand=> -1 ); + + # results are in a Bio::Coordinate::Result + # they can be Matches and Gaps; are Bio::LocationIs + $res = $pair->map($pos); + $res->isa('Bio::Coordinate::Result'); + $res->each_match == 1; + $res->each_gap == 0; + $res->each_Location == 1; + $res->match->start == 5; + $res->match->end == 5; + $res->match->strand == -1; + $res->match->seq_id eq 'peptide'; + + +=head1 DESCRIPTION + +This class represents a one continuous match between two coordinate +systems represented by Bio::Location::Simple objects. The relationship +is directed and reversible. It implements methods to ensure internal +consistency, and map continuous and split locations from one +coordinate system to another. + +The map() method returns Bio::Coordinate::Results with +Bio::Coordinate::Result::Gaps. The calling code have to deal (process +or ignore) them. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to the +Bioperl mailing lists Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +report bugs to the Bioperl bug tracking system to help us keep track + the bugs and their resolution. Bug reports can be submitted via + email or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Heikki Lehvaslaiho + +Email: heikki@ebi.ac.uk +Address: + + EMBL Outstation, European Bioinformatics Institute + Wellcome Trust Genome Campus, Hinxton + Cambs. CB10 1SD, United Kingdom + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + +package Bio::Coordinate::Pair; +use vars qw(@ISA ); +use strict; + +# Object preamble - inherits from Bio::Root::Root +use Bio::Root::Root; +use Bio::Coordinate::MapperI; +use Bio::Coordinate::Result; +use Bio::Coordinate::Result::Match; +use Bio::Coordinate::Result::Gap; + +@ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI); + + +sub new { + my($class,@args) = @_; + my $self = $class->SUPER::new(@args); + + my($in, $out) = + $self->_rearrange([qw(IN + OUT + )], + @args); + + $in && $self->in($in); + $out && $self->out($out); + return $self; # success - we hope! +} + +=head2 in + + Title : in + Usage : $obj->in('peptide'); + Function: Set and read the input coordinate system. + Example : + Returns : value of input system + Args : new value (optional), Bio::LocationI + +=cut + +sub in { + my ($self,$value) = @_; + if( defined $value) { + $self->throw("Not a valid input Bio::Location [$value] ") + unless $value->isa('Bio::LocationI'); + $self->{'_in'} = $value; + } + return $self->{'_in'}; +} + + +=head2 out + + Title : out + Usage : $obj->out('peptide'); + Function: Set and read the output coordinate system. + Example : + Returns : value of output system + Args : new value (optional), Bio::LocationI + +=cut + +sub out { + my ($self,$value) = @_; + if( defined $value) { + $self->throw("Not a valid output coordinate Bio::Location [$value] ") + unless $value->isa('Bio::LocationI'); + $self->{'_out'} = $value; + } + return $self->{'_out'}; +} + + +=head2 swap + + Title : swap + Usage : $obj->swap; + Function: Swap the direction of mapping; input <-> output + Example : + Returns : 1 + Args : + +=cut + +sub swap { + my ($self) = @_; + ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'}); + return 1; +} + +=head2 strand + + Title : strand + Usage : $obj->strand; + Function: Get strand value for the pair + Example : + Returns : ( 1 | 0 | -1 ) + Args : + +=cut + +sub strand { + my ($self) = @_; + $self->warn("Outgoing coordinates are not defined") + unless $self->out; + $self->warn("Incoming coordinates are not defined") + unless $self->in; + + return $self->in->strand * $self->out->strand; +} + +=head2 test + + Title : test + Usage : $obj->test; + Function: test that both components are of the same length + Example : + Returns : ( 1 | undef ) + Args : + +=cut + +sub test { + my ($self) = @_; + $self->warn("Outgoing coordinates are not defined") + unless $self->out; + $self->warn("Incoming coordinates are not defined") + unless $self->in; + + 1 if $self->in->end - $self->in->start == $self->out->end - $self->out->start; +} + + +=head2 map + + Title : map + Usage : $newpos = $obj->map($pos); + Function: Map the location from the input coordinate system + to a new value in the output coordinate system. + Example : + Returns : new Bio::LocationI in the output coordinate system or undef + Args : Bio::LocationI object + +=cut + +sub map { + my ($self,$value) = @_; + + $self->throw("Need to pass me a value.") + unless defined $value; + $self->throw("I need a Bio::Location, not [$value]") + unless $value->isa('Bio::LocationI'); + $self->throw("Input coordinate system not set") + unless $self->in; + $self->throw("Output coordinate system not set") + unless $self->out; + + + if ($value->isa("Bio::Location::SplitLocationI")) { + + my $result = new Bio::Coordinate::Result; + my $split = new Bio::Location::Split(-seq_id=>$self->out->seq_id); + foreach my $loc ( $value->sub_Location(1) ) { + + my $res = $self->_map($loc); + map { $result->add_sub_Location($_) } $res->each_Location; + + } + return $result; + + } else { + return $self->_map($value); + } + +} + + +=head2 _map + + Title : _map + Usage : $newpos = $obj->_map($simpleloc); + Function: Internal method that does the actual mapping. Called + multiple times by map() if the location to be mapped is a + split location + Example : + Returns : new location in the output coordinate system or undef + Args : Bio::Location::Simple + +=cut + +sub _map { + my ($self,$value) = @_; + + my $result = new Bio::Coordinate::Result; + + my $offset = $self->in->start - $self->out->start; + my $start = $value->start - $offset; + my $end = $value->end - $offset; + + my $match = Bio::Location::Simple->new; + $match->location_type($value->location_type); + $match->strand($self->strand); + + #within + # |-------------------------| + # |-| + if ($start >= $self->out->start and $end <= $self->out->end) { + + $match->seq_id($self->out->seq_id); + $result->seq_id($self->out->seq_id); + + if ($self->strand == 1) { + $match->start($start); + $match->end($end); + } else { + $match->start($self->out->end - $end + $self->out->start); + $match->end($self->out->end - $start + $self->out->start); + } + if ($value->strand) { + $match->strand($match->strand * $value->strand); + $result->strand($match->strand); + } + bless $match, 'Bio::Coordinate::Result::Match'; + $result->add_sub_Location($match); + } + #out + # |-------------------------| + # |-| or |-| + elsif ( ($end < $self->out->start or $start > $self->out->end ) or + #insertions just outside the range need special settings + ($value->location_type eq 'IN-BETWEEN' and + ($end = $self->out->start or $start = $self->out->end))) { + + $match->seq_id($self->in->seq_id); + $result->seq_id($self->in->seq_id); + $match->start($value->start); + $match->end($value->end); + $match->strand($value->strand); + + bless $match, 'Bio::Coordinate::Result::Gap'; + $result->add_sub_Location($match); + } + #partial I + # |-------------------------| + # |-----| + elsif ($start < $self->out->start and $end <= $self->out->end ) { + + $result->seq_id($self->out->seq_id); + if ($value->strand) { + $match->strand($match->strand * $value->strand); + $result->strand($match->strand); + } + my $gap = Bio::Location::Simple->new; + $gap->start($value->start); + $gap->end($self->in->start - 1); + $gap->strand($value->strand); + $gap->seq_id($self->in->seq_id); + + bless $gap, 'Bio::Coordinate::Result::Gap'; + $result->add_sub_Location($gap); + + # match + $match->seq_id($self->out->seq_id); + + if ($self->strand == 1) { + $match->start($self->out->start); + $match->end($end); + } else { + $match->start($self->out->end - $end + $self->out->start); + $match->end($self->out->end); + } + bless $match, 'Bio::Coordinate::Result::Match'; + $result->add_sub_Location($match); + } + #partial II + # |-------------------------| + # |------| + elsif ($start >= $self->out->start and $end > $self->out->end ) { + + $match->seq_id($self->out->seq_id); + $result->seq_id($self->out->seq_id); + if ($value->strand) { + $match->strand($match->strand * $value->strand); + $result->strand($match->strand); + } + if ($self->strand == 1) { + $match->start($start); + $match->end($self->out->end); + } else { + $match->start($self->out->start); + $match->end($self->out->end - $start + $self->out->start); + } + bless $match, 'Bio::Coordinate::Result::Match'; + $result->add_sub_Location($match); + + my $gap = Bio::Location::Simple->new; + $gap->start($self->in->end + 1); + $gap->end($value->end); + $gap->strand($value->strand); + $gap->seq_id($self->in->seq_id); + bless $gap, 'Bio::Coordinate::Result::Gap'; + $result->add_sub_Location($gap); + + } + #enveloping + # |-------------------------| + # |---------------------------------| + elsif ($start < $self->out->start and $end > $self->out->end ) { + + $result->seq_id($self->out->seq_id); + if ($value->strand) { + $match->strand($match->strand * $value->strand); + $result->strand($match->strand); + } + # gap1 + my $gap1 = Bio::Location::Simple->new; + $gap1->start($value->start); + $gap1->end($self->in->start - 1); + $gap1->strand($value->strand); + $gap1->seq_id($self->in->seq_id); + bless $gap1, 'Bio::Coordinate::Result::Gap'; + $result->add_sub_Location($gap1); + + # match + $match->seq_id($self->out->seq_id); + + $match->start($self->out->start); + $match->end($self->out->end); + bless $match, 'Bio::Coordinate::Result::Match'; + $result->add_sub_Location($match); + + # gap2 + my $gap2 = Bio::Location::Simple->new; + $gap2->start($self->in->end + 1); + $gap2->end($value->end); + $gap2->strand($value->strand); + $gap2->seq_id($self->in->seq_id); + bless $gap2, 'Bio::Coordinate::Result::Gap'; + $result->add_sub_Location($gap2); + + } else { + $self->throw("Should not be here!"); + } + return $result; +} + + +1;