comparison variant_effect_predictor/Bio/Coordinate/Collection.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Collection.pm,v 1.11.2.1 2003/02/20 05:11:45 heikki Exp $
2 #
3 # bioperl module for Bio::Coordinate::Collection
4 #
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
6 #
7 # Copyright Heikki Lehvaslaiho
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Coordinate::Collection - Noncontinuous match between two coordinate sets
16
17 =head1 SYNOPSIS
18
19 # create Bio::Coordinate::Pairs or other Bio::Coordinate::MapperIs somehow
20 $pair1; $pair2;
21
22 # add them into a Collection
23 $collection = Bio::Coordinate::Collection->new;
24 $collection->add_mapper($pair1);
25 $collection->add_mapper($pair2);
26
27 # create a position and map it
28 $pos = Bio::Location::Simple->new (-start => 5, -end => 9 );
29 $res = $collection->map($pos);
30 $res->match->start == 1;
31 $res->match-> == 5;
32
33 # if mapping is many to one (*>1) or many-to-many (*>*)
34 # you have to give seq_id not get unrelevant entries
35 $pos = Bio::Location::Simple->new
36 (-start => 5, -end => 9 -seq_id=>'clone1');
37
38 =head1 DESCRIPTION
39
40 Generic, context neutral mapper to provide coordinate transforms
41 between two B<disjoint> coordinate systems. It brings into Bioperl the
42 functionality from Ewan Birney's Bio::EnsEMBL::Mapper ported into
43 current bioperl.
44
45 This class is aimed for representing mapping between whole chromosomes
46 and contigs, or between contigs and clones, or between sequencing
47 reads and assembly. The submaps are automatically sorted, so they can
48 be added in any order.
49
50 To map coordinates to the other direction, you have to swap() the
51 collection. Keeping track of the direction and ID restrictions
52 are left to the calling code.
53
54
55
56 =head1 FEEDBACK
57
58 =head2 Mailing Lists
59
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to the
62 Bioperl mailing lists Your participation is much appreciated.
63
64 bioperl-l@bioperl.org - General discussion
65 http://bio.perl.org/MailList.html - About the mailing lists
66
67 =head2 Reporting Bugs
68
69 report bugs to the Bioperl bug tracking system to help us keep track
70 the bugs and their resolution. Bug reports can be submitted via
71 email or the web:
72
73 bioperl-bugs@bio.perl.org
74 http://bugzilla.bioperl.org/
75
76 =head1 AUTHOR - Heikki Lehvaslaiho
77
78 Email: heikki@ebi.ac.uk
79 Address:
80
81 EMBL Outstation, European Bioinformatics Institute
82 Wellcome Trust Genome Campus, Hinxton
83 Cambs. CB10 1SD, United Kingdom
84
85 =head1 CONTRIBUTORS
86
87 Ewan Birney, birney@ebi.ac.uk
88
89 =head1 APPENDIX
90
91 The rest of the documentation details each of the object
92 methods. Internal methods are usually preceded with a _
93
94 =cut
95
96
97 # Let the code begin...
98
99 package Bio::Coordinate::Collection;
100 use vars qw(@ISA );
101 use strict;
102
103 # Object preamble - inherits from Bio::Root::Root
104 use Bio::Root::Root;
105 use Bio::Coordinate::MapperI;
106 use Bio::Coordinate::Result;
107 use Bio::Coordinate::Result::Gap;
108
109 @ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI);
110
111
112 sub new {
113 my($class,@args) = @_;
114 my $self = $class->SUPER::new(@args);
115
116 $self->{'_mappers'} = [];
117
118 my($in, $out, $strict, $mappers, $return_match) =
119 $self->_rearrange([qw(IN
120 OUT
121 STRICT
122 MAPPERS
123 RETURN_MATCH
124 )],
125 @args);
126
127 $in && $self->in($in);
128 $out && $self->out($out);
129 $mappers && $self->mappers($mappers);
130 $return_match && $self->return_match('return_match');
131 return $self; # success - we hope!
132 }
133
134
135 =head2 add_mapper
136
137 Title : add_mapper
138 Usage : $obj->add_mapper($mapper)
139 Function: Pushes one Bio::Coodinate::MapperI into the list of mappers.
140 Sets _is_sorted() to false.
141 Example :
142 Returns : 1 when succeeds, 0 for failure.
143 Args : mapper object
144
145 =cut
146
147 sub add_mapper {
148 my ($self,$value) = @_;
149
150 $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]")
151 unless defined $value && $value->isa('Bio::Coordinate::MapperI');
152
153 # test pair range lengths
154 $self->warn("Coodinates in pair [". $value . ":" .
155 $value->in->seq_id . "/". $value->in->seq_id .
156 "] are not right.")
157 unless $value->test;
158
159 $self->_is_sorted(0);
160 push(@{$self->{'_mappers'}},$value);
161 }
162
163 =head2 mappers
164
165 Title : mappers
166 Usage : $obj->mappers();
167 Function: Returns or sets a list of mappers.
168 Example :
169 Returns : array of mappers
170 Args : array of mappers
171
172 =cut
173
174 sub mappers{
175 my ($self,@args) = @_;
176
177 if (@args) {
178
179 $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]")
180 unless defined $args[0] && $args[0]->isa('Bio::Coordinate::MapperI');
181 push(@{$self->{'_mappers'}}, @args);
182 }
183
184 return @{$self->{'_mappers'}};
185 }
186
187
188 =head2 each_mapper
189
190 Title : each_mapper
191 Usage : $obj->each_mapper();
192 Function: Returns a list of mappers.
193 Example :
194 Returns : list of mappers
195 Args : none
196
197 =cut
198
199 sub each_mapper{
200 my ($self) = @_;
201 return @{$self->{'_mappers'}};
202 }
203
204
205 =head2 swap
206
207 Title : swap
208 Usage : $obj->swap;
209 Function: Swap the direction of mapping;input <-> output
210 Example :
211 Returns : 1
212 Args :
213
214 =cut
215
216 sub swap {
217 my ($self) = @_;
218 use Data::Dumper;
219
220 $self->sort unless $self->_is_sorted;
221 map {$_->swap;} @{$self->{'_mappers'}};
222 ($self->{'_in_ids'}, $self->{'_out_ids'}) =
223 ($self->{'_out_ids'}, $self->{'_in_ids'});
224 1;
225 }
226
227 =head2 test
228
229 Title : test
230 Usage : $obj->test;
231 Function: test that both components of all pairs are of the same length.
232 Ran automatically.
233 Example :
234 Returns : boolean
235 Args :
236
237 =cut
238
239 sub test {
240 my ($self) = @_;
241
242 my $res = 1;
243
244 foreach my $mapper ($self->each_mapper) {
245 $self->warn("Coodinates in pair [". $mapper . ":" .
246 $mapper->in->seq_id . "/". $mapper->in->seq_id .
247 "] are not right.") && ($res = 0)
248 unless $mapper->test;
249 }
250 $res;
251 }
252
253
254 =head2 map
255
256 Title : map
257 Usage : $newpos = $obj->map($pos);
258 Function: Map the location from the input coordinate system
259 to a new value in the output coordinate system.
260 Example :
261 Returns : new value in the output coordinate system
262 Args : integer
263
264 =cut
265
266 sub map {
267 my ($self,$value) = @_;
268
269 $self->throw("Need to pass me a value.")
270 unless defined $value;
271 $self->throw("I need a Bio::Location, not [$value]")
272 unless $value->isa('Bio::LocationI');
273 $self->throw("No coordinate mappers!")
274 unless $self->each_mapper;
275
276 $self->sort unless $self->_is_sorted;
277
278
279 if ($value->isa("Bio::Location::SplitLocationI")) {
280
281 my $result = new Bio::Coordinate::Result;
282 foreach my $loc ( $value->sub_Location(1) ) {
283
284 my $res = $self->_map($loc);
285 map { $result->add_sub_Location($_) } $res->each_Location;
286
287 }
288 return $result;
289
290 } else {
291 return $self->_map($value);
292 }
293
294
295 }
296
297
298 =head2 _map
299
300 Title : _map
301 Usage : $newpos = $obj->_map($simpleloc);
302 Function: Internal method that does the actual mapping. Called multiple times
303 by map() if the location to be mapped is a split location
304
305 Example :
306 Returns : new location in the output coordinate system or undef
307 Args : Bio::Location::Simple
308
309 =cut
310
311 sub _map {
312 my ($self,$value) = @_;
313
314 my $result = Bio::Coordinate::Result->new(-is_remote=>1);
315
316 IDMATCH: {
317
318 # bail out now we if are forcing the use of an ID
319 # and it is not in this collection
320 last IDMATCH if defined $value->seq_id &&
321 ! $self->{'_in_ids'}->{$value->seq_id};
322
323 foreach my $pair ($self->each_mapper) {
324
325 # if we are limiting input to a certain ID
326 next if defined $value->seq_id && $value->seq_id ne $pair->in->seq_id;
327
328 # if we haven't even reached the start, move on
329 next if $pair->in->end < $value->start;
330 # if we have over run, break
331 last if $pair->in->start > $value->end;
332
333 my $subres = $pair->map($value);
334 $result->add_result($subres);
335 }
336 }
337
338 $result->seq_id($result->match->seq_id) if $result->match;
339 unless ($result->each_Location) {
340 #build one gap;
341 my $gap = Bio::Location::Simple->new(-start => $value->start,
342 -end => $value->end,
343 -strand => $value->strand,
344 -location_type => $value->location_type
345 );
346 $gap->seq_id($value->seq_id) if defined $value->seq_id;
347 bless $gap, 'Bio::Coordinate::Result::Gap';
348 $result->seq_id($value->seq_id) if defined $value->seq_id;
349 $result->add_sub_Location($gap);
350 }
351 return $result;
352 }
353
354
355 =head2 sort
356
357 Title : sort
358 Usage : $obj->sort;
359 Function: Sort function so that all mappings are sorted by
360 input coordinate start
361 Example :
362 Returns : 1
363 Args :
364
365 =cut
366
367 sub sort{
368 my ($self) = @_;
369
370 @{$self->{'_mappers'}} = map { $_->[0] }
371 sort { $a->[1] <=> $b->[1] }
372 map { [ $_, $_->in->start] }
373 @{$self->{'_mappers'}};
374
375 #create hashes for sequence ids
376 $self->{'_in_ids'} = ();
377 $self->{'_out_ids'} = ();
378 foreach ($self->each_mapper) {
379 $self->{'_in_ids'}->{$_->in->seq_id} = 1;
380 $self->{'_out_ids'}->{$_->out->seq_id} = 1;
381 }
382
383 $self->_is_sorted(1);
384 }
385
386 =head2 _is_sorted
387
388 Title : _is_sorted
389 Usage : $newpos = $obj->_is_sorted;
390 Function: toggle for whether the (internal) coodinate mapper data are sorted
391 Example :
392 Returns : boolean
393 Args : boolean
394
395 =cut
396
397 sub _is_sorted{
398 my ($self,$value) = @_;
399
400 $self->{'_is_sorted'} = 1 if defined $value && $value;
401 return $self->{'_is_sorted'};
402 }
403
404 1;
405