Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Coordinate/Collection.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Coordinate/Collection.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,405 @@ +# $Id: Collection.pm,v 1.11.2.1 2003/02/20 05:11:45 heikki Exp $ +# +# bioperl module for Bio::Coordinate::Collection +# +# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> +# +# Copyright Heikki Lehvaslaiho +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Coordinate::Collection - Noncontinuous match between two coordinate sets + +=head1 SYNOPSIS + + # create Bio::Coordinate::Pairs or other Bio::Coordinate::MapperIs somehow + $pair1; $pair2; + + # add them into a Collection + $collection = Bio::Coordinate::Collection->new; + $collection->add_mapper($pair1); + $collection->add_mapper($pair2); + + # create a position and map it + $pos = Bio::Location::Simple->new (-start => 5, -end => 9 ); + $res = $collection->map($pos); + $res->match->start == 1; + $res->match-> == 5; + + # if mapping is many to one (*>1) or many-to-many (*>*) + # you have to give seq_id not get unrelevant entries + $pos = Bio::Location::Simple->new + (-start => 5, -end => 9 -seq_id=>'clone1'); + +=head1 DESCRIPTION + +Generic, context neutral mapper to provide coordinate transforms +between two B<disjoint> coordinate systems. It brings into Bioperl the +functionality from Ewan Birney's Bio::EnsEMBL::Mapper ported into +current bioperl. + +This class is aimed for representing mapping between whole chromosomes +and contigs, or between contigs and clones, or between sequencing +reads and assembly. The submaps are automatically sorted, so they can +be added in any order. + +To map coordinates to the other direction, you have to swap() the +collection. Keeping track of the direction and ID restrictions +are left to the calling code. + + + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to the +Bioperl mailing lists Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +report bugs to the Bioperl bug tracking system to help us keep track + the bugs and their resolution. Bug reports can be submitted via + email or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Heikki Lehvaslaiho + +Email: heikki@ebi.ac.uk +Address: + + EMBL Outstation, European Bioinformatics Institute + Wellcome Trust Genome Campus, Hinxton + Cambs. CB10 1SD, United Kingdom + +=head1 CONTRIBUTORS + +Ewan Birney, birney@ebi.ac.uk + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + +package Bio::Coordinate::Collection; +use vars qw(@ISA ); +use strict; + +# Object preamble - inherits from Bio::Root::Root +use Bio::Root::Root; +use Bio::Coordinate::MapperI; +use Bio::Coordinate::Result; +use Bio::Coordinate::Result::Gap; + +@ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI); + + +sub new { + my($class,@args) = @_; + my $self = $class->SUPER::new(@args); + + $self->{'_mappers'} = []; + + my($in, $out, $strict, $mappers, $return_match) = + $self->_rearrange([qw(IN + OUT + STRICT + MAPPERS + RETURN_MATCH + )], + @args); + + $in && $self->in($in); + $out && $self->out($out); + $mappers && $self->mappers($mappers); + $return_match && $self->return_match('return_match'); + return $self; # success - we hope! +} + + +=head2 add_mapper + + Title : add_mapper + Usage : $obj->add_mapper($mapper) + Function: Pushes one Bio::Coodinate::MapperI into the list of mappers. + Sets _is_sorted() to false. + Example : + Returns : 1 when succeeds, 0 for failure. + Args : mapper object + +=cut + +sub add_mapper { + my ($self,$value) = @_; + + $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]") + unless defined $value && $value->isa('Bio::Coordinate::MapperI'); + + # test pair range lengths + $self->warn("Coodinates in pair [". $value . ":" . + $value->in->seq_id . "/". $value->in->seq_id . + "] are not right.") + unless $value->test; + + $self->_is_sorted(0); + push(@{$self->{'_mappers'}},$value); +} + +=head2 mappers + + Title : mappers + Usage : $obj->mappers(); + Function: Returns or sets a list of mappers. + Example : + Returns : array of mappers + Args : array of mappers + +=cut + +sub mappers{ + my ($self,@args) = @_; + + if (@args) { + + $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]") + unless defined $args[0] && $args[0]->isa('Bio::Coordinate::MapperI'); + push(@{$self->{'_mappers'}}, @args); + } + + return @{$self->{'_mappers'}}; +} + + +=head2 each_mapper + + Title : each_mapper + Usage : $obj->each_mapper(); + Function: Returns a list of mappers. + Example : + Returns : list of mappers + Args : none + +=cut + +sub each_mapper{ + my ($self) = @_; + return @{$self->{'_mappers'}}; +} + + +=head2 swap + + Title : swap + Usage : $obj->swap; + Function: Swap the direction of mapping;input <-> output + Example : + Returns : 1 + Args : + +=cut + +sub swap { + my ($self) = @_; + use Data::Dumper; + + $self->sort unless $self->_is_sorted; + map {$_->swap;} @{$self->{'_mappers'}}; + ($self->{'_in_ids'}, $self->{'_out_ids'}) = + ($self->{'_out_ids'}, $self->{'_in_ids'}); + 1; +} + +=head2 test + + Title : test + Usage : $obj->test; + Function: test that both components of all pairs are of the same length. + Ran automatically. + Example : + Returns : boolean + Args : + +=cut + +sub test { + my ($self) = @_; + + my $res = 1; + + foreach my $mapper ($self->each_mapper) { + $self->warn("Coodinates in pair [". $mapper . ":" . + $mapper->in->seq_id . "/". $mapper->in->seq_id . + "] are not right.") && ($res = 0) + unless $mapper->test; + } + $res; +} + + +=head2 map + + Title : map + Usage : $newpos = $obj->map($pos); + Function: Map the location from the input coordinate system + to a new value in the output coordinate system. + Example : + Returns : new value in the output coordinate system + Args : integer + +=cut + +sub map { + my ($self,$value) = @_; + + $self->throw("Need to pass me a value.") + unless defined $value; + $self->throw("I need a Bio::Location, not [$value]") + unless $value->isa('Bio::LocationI'); + $self->throw("No coordinate mappers!") + unless $self->each_mapper; + + $self->sort unless $self->_is_sorted; + + + if ($value->isa("Bio::Location::SplitLocationI")) { + + my $result = new Bio::Coordinate::Result; + foreach my $loc ( $value->sub_Location(1) ) { + + my $res = $self->_map($loc); + map { $result->add_sub_Location($_) } $res->each_Location; + + } + return $result; + + } else { + return $self->_map($value); + } + + +} + + +=head2 _map + + Title : _map + Usage : $newpos = $obj->_map($simpleloc); + Function: Internal method that does the actual mapping. Called multiple times + by map() if the location to be mapped is a split location + + Example : + Returns : new location in the output coordinate system or undef + Args : Bio::Location::Simple + +=cut + +sub _map { + my ($self,$value) = @_; + + my $result = Bio::Coordinate::Result->new(-is_remote=>1); + +IDMATCH: { + + # bail out now we if are forcing the use of an ID + # and it is not in this collection + last IDMATCH if defined $value->seq_id && + ! $self->{'_in_ids'}->{$value->seq_id}; + + foreach my $pair ($self->each_mapper) { + + # if we are limiting input to a certain ID + next if defined $value->seq_id && $value->seq_id ne $pair->in->seq_id; + + # if we haven't even reached the start, move on + next if $pair->in->end < $value->start; + # if we have over run, break + last if $pair->in->start > $value->end; + + my $subres = $pair->map($value); + $result->add_result($subres); + } + } + + $result->seq_id($result->match->seq_id) if $result->match; + unless ($result->each_Location) { + #build one gap; + my $gap = Bio::Location::Simple->new(-start => $value->start, + -end => $value->end, + -strand => $value->strand, + -location_type => $value->location_type + ); + $gap->seq_id($value->seq_id) if defined $value->seq_id; + bless $gap, 'Bio::Coordinate::Result::Gap'; + $result->seq_id($value->seq_id) if defined $value->seq_id; + $result->add_sub_Location($gap); + } + return $result; +} + + +=head2 sort + + Title : sort + Usage : $obj->sort; + Function: Sort function so that all mappings are sorted by + input coordinate start + Example : + Returns : 1 + Args : + +=cut + +sub sort{ + my ($self) = @_; + + @{$self->{'_mappers'}} = map { $_->[0] } + sort { $a->[1] <=> $b->[1] } + map { [ $_, $_->in->start] } + @{$self->{'_mappers'}}; + + #create hashes for sequence ids + $self->{'_in_ids'} = (); + $self->{'_out_ids'} = (); + foreach ($self->each_mapper) { + $self->{'_in_ids'}->{$_->in->seq_id} = 1; + $self->{'_out_ids'}->{$_->out->seq_id} = 1; + } + + $self->_is_sorted(1); +} + +=head2 _is_sorted + + Title : _is_sorted + Usage : $newpos = $obj->_is_sorted; + Function: toggle for whether the (internal) coodinate mapper data are sorted + Example : + Returns : boolean + Args : boolean + +=cut + +sub _is_sorted{ + my ($self,$value) = @_; + + $self->{'_is_sorted'} = 1 if defined $value && $value; + return $self->{'_is_sorted'}; +} + +1; +