Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Coordinate/Collection.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Collection.pm,v 1.11.2.1 2003/02/20 05:11:45 heikki Exp $ | |
2 # | |
3 # bioperl module for Bio::Coordinate::Collection | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Coordinate::Collection - Noncontinuous match between two coordinate sets | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 # create Bio::Coordinate::Pairs or other Bio::Coordinate::MapperIs somehow | |
20 $pair1; $pair2; | |
21 | |
22 # add them into a Collection | |
23 $collection = Bio::Coordinate::Collection->new; | |
24 $collection->add_mapper($pair1); | |
25 $collection->add_mapper($pair2); | |
26 | |
27 # create a position and map it | |
28 $pos = Bio::Location::Simple->new (-start => 5, -end => 9 ); | |
29 $res = $collection->map($pos); | |
30 $res->match->start == 1; | |
31 $res->match-> == 5; | |
32 | |
33 # if mapping is many to one (*>1) or many-to-many (*>*) | |
34 # you have to give seq_id not get unrelevant entries | |
35 $pos = Bio::Location::Simple->new | |
36 (-start => 5, -end => 9 -seq_id=>'clone1'); | |
37 | |
38 =head1 DESCRIPTION | |
39 | |
40 Generic, context neutral mapper to provide coordinate transforms | |
41 between two B<disjoint> coordinate systems. It brings into Bioperl the | |
42 functionality from Ewan Birney's Bio::EnsEMBL::Mapper ported into | |
43 current bioperl. | |
44 | |
45 This class is aimed for representing mapping between whole chromosomes | |
46 and contigs, or between contigs and clones, or between sequencing | |
47 reads and assembly. The submaps are automatically sorted, so they can | |
48 be added in any order. | |
49 | |
50 To map coordinates to the other direction, you have to swap() the | |
51 collection. Keeping track of the direction and ID restrictions | |
52 are left to the calling code. | |
53 | |
54 | |
55 | |
56 =head1 FEEDBACK | |
57 | |
58 =head2 Mailing Lists | |
59 | |
60 User feedback is an integral part of the evolution of this and other | |
61 Bioperl modules. Send your comments and suggestions preferably to the | |
62 Bioperl mailing lists Your participation is much appreciated. | |
63 | |
64 bioperl-l@bioperl.org - General discussion | |
65 http://bio.perl.org/MailList.html - About the mailing lists | |
66 | |
67 =head2 Reporting Bugs | |
68 | |
69 report bugs to the Bioperl bug tracking system to help us keep track | |
70 the bugs and their resolution. Bug reports can be submitted via | |
71 email or the web: | |
72 | |
73 bioperl-bugs@bio.perl.org | |
74 http://bugzilla.bioperl.org/ | |
75 | |
76 =head1 AUTHOR - Heikki Lehvaslaiho | |
77 | |
78 Email: heikki@ebi.ac.uk | |
79 Address: | |
80 | |
81 EMBL Outstation, European Bioinformatics Institute | |
82 Wellcome Trust Genome Campus, Hinxton | |
83 Cambs. CB10 1SD, United Kingdom | |
84 | |
85 =head1 CONTRIBUTORS | |
86 | |
87 Ewan Birney, birney@ebi.ac.uk | |
88 | |
89 =head1 APPENDIX | |
90 | |
91 The rest of the documentation details each of the object | |
92 methods. Internal methods are usually preceded with a _ | |
93 | |
94 =cut | |
95 | |
96 | |
97 # Let the code begin... | |
98 | |
99 package Bio::Coordinate::Collection; | |
100 use vars qw(@ISA ); | |
101 use strict; | |
102 | |
103 # Object preamble - inherits from Bio::Root::Root | |
104 use Bio::Root::Root; | |
105 use Bio::Coordinate::MapperI; | |
106 use Bio::Coordinate::Result; | |
107 use Bio::Coordinate::Result::Gap; | |
108 | |
109 @ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI); | |
110 | |
111 | |
112 sub new { | |
113 my($class,@args) = @_; | |
114 my $self = $class->SUPER::new(@args); | |
115 | |
116 $self->{'_mappers'} = []; | |
117 | |
118 my($in, $out, $strict, $mappers, $return_match) = | |
119 $self->_rearrange([qw(IN | |
120 OUT | |
121 STRICT | |
122 MAPPERS | |
123 RETURN_MATCH | |
124 )], | |
125 @args); | |
126 | |
127 $in && $self->in($in); | |
128 $out && $self->out($out); | |
129 $mappers && $self->mappers($mappers); | |
130 $return_match && $self->return_match('return_match'); | |
131 return $self; # success - we hope! | |
132 } | |
133 | |
134 | |
135 =head2 add_mapper | |
136 | |
137 Title : add_mapper | |
138 Usage : $obj->add_mapper($mapper) | |
139 Function: Pushes one Bio::Coodinate::MapperI into the list of mappers. | |
140 Sets _is_sorted() to false. | |
141 Example : | |
142 Returns : 1 when succeeds, 0 for failure. | |
143 Args : mapper object | |
144 | |
145 =cut | |
146 | |
147 sub add_mapper { | |
148 my ($self,$value) = @_; | |
149 | |
150 $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]") | |
151 unless defined $value && $value->isa('Bio::Coordinate::MapperI'); | |
152 | |
153 # test pair range lengths | |
154 $self->warn("Coodinates in pair [". $value . ":" . | |
155 $value->in->seq_id . "/". $value->in->seq_id . | |
156 "] are not right.") | |
157 unless $value->test; | |
158 | |
159 $self->_is_sorted(0); | |
160 push(@{$self->{'_mappers'}},$value); | |
161 } | |
162 | |
163 =head2 mappers | |
164 | |
165 Title : mappers | |
166 Usage : $obj->mappers(); | |
167 Function: Returns or sets a list of mappers. | |
168 Example : | |
169 Returns : array of mappers | |
170 Args : array of mappers | |
171 | |
172 =cut | |
173 | |
174 sub mappers{ | |
175 my ($self,@args) = @_; | |
176 | |
177 if (@args) { | |
178 | |
179 $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]") | |
180 unless defined $args[0] && $args[0]->isa('Bio::Coordinate::MapperI'); | |
181 push(@{$self->{'_mappers'}}, @args); | |
182 } | |
183 | |
184 return @{$self->{'_mappers'}}; | |
185 } | |
186 | |
187 | |
188 =head2 each_mapper | |
189 | |
190 Title : each_mapper | |
191 Usage : $obj->each_mapper(); | |
192 Function: Returns a list of mappers. | |
193 Example : | |
194 Returns : list of mappers | |
195 Args : none | |
196 | |
197 =cut | |
198 | |
199 sub each_mapper{ | |
200 my ($self) = @_; | |
201 return @{$self->{'_mappers'}}; | |
202 } | |
203 | |
204 | |
205 =head2 swap | |
206 | |
207 Title : swap | |
208 Usage : $obj->swap; | |
209 Function: Swap the direction of mapping;input <-> output | |
210 Example : | |
211 Returns : 1 | |
212 Args : | |
213 | |
214 =cut | |
215 | |
216 sub swap { | |
217 my ($self) = @_; | |
218 use Data::Dumper; | |
219 | |
220 $self->sort unless $self->_is_sorted; | |
221 map {$_->swap;} @{$self->{'_mappers'}}; | |
222 ($self->{'_in_ids'}, $self->{'_out_ids'}) = | |
223 ($self->{'_out_ids'}, $self->{'_in_ids'}); | |
224 1; | |
225 } | |
226 | |
227 =head2 test | |
228 | |
229 Title : test | |
230 Usage : $obj->test; | |
231 Function: test that both components of all pairs are of the same length. | |
232 Ran automatically. | |
233 Example : | |
234 Returns : boolean | |
235 Args : | |
236 | |
237 =cut | |
238 | |
239 sub test { | |
240 my ($self) = @_; | |
241 | |
242 my $res = 1; | |
243 | |
244 foreach my $mapper ($self->each_mapper) { | |
245 $self->warn("Coodinates in pair [". $mapper . ":" . | |
246 $mapper->in->seq_id . "/". $mapper->in->seq_id . | |
247 "] are not right.") && ($res = 0) | |
248 unless $mapper->test; | |
249 } | |
250 $res; | |
251 } | |
252 | |
253 | |
254 =head2 map | |
255 | |
256 Title : map | |
257 Usage : $newpos = $obj->map($pos); | |
258 Function: Map the location from the input coordinate system | |
259 to a new value in the output coordinate system. | |
260 Example : | |
261 Returns : new value in the output coordinate system | |
262 Args : integer | |
263 | |
264 =cut | |
265 | |
266 sub map { | |
267 my ($self,$value) = @_; | |
268 | |
269 $self->throw("Need to pass me a value.") | |
270 unless defined $value; | |
271 $self->throw("I need a Bio::Location, not [$value]") | |
272 unless $value->isa('Bio::LocationI'); | |
273 $self->throw("No coordinate mappers!") | |
274 unless $self->each_mapper; | |
275 | |
276 $self->sort unless $self->_is_sorted; | |
277 | |
278 | |
279 if ($value->isa("Bio::Location::SplitLocationI")) { | |
280 | |
281 my $result = new Bio::Coordinate::Result; | |
282 foreach my $loc ( $value->sub_Location(1) ) { | |
283 | |
284 my $res = $self->_map($loc); | |
285 map { $result->add_sub_Location($_) } $res->each_Location; | |
286 | |
287 } | |
288 return $result; | |
289 | |
290 } else { | |
291 return $self->_map($value); | |
292 } | |
293 | |
294 | |
295 } | |
296 | |
297 | |
298 =head2 _map | |
299 | |
300 Title : _map | |
301 Usage : $newpos = $obj->_map($simpleloc); | |
302 Function: Internal method that does the actual mapping. Called multiple times | |
303 by map() if the location to be mapped is a split location | |
304 | |
305 Example : | |
306 Returns : new location in the output coordinate system or undef | |
307 Args : Bio::Location::Simple | |
308 | |
309 =cut | |
310 | |
311 sub _map { | |
312 my ($self,$value) = @_; | |
313 | |
314 my $result = Bio::Coordinate::Result->new(-is_remote=>1); | |
315 | |
316 IDMATCH: { | |
317 | |
318 # bail out now we if are forcing the use of an ID | |
319 # and it is not in this collection | |
320 last IDMATCH if defined $value->seq_id && | |
321 ! $self->{'_in_ids'}->{$value->seq_id}; | |
322 | |
323 foreach my $pair ($self->each_mapper) { | |
324 | |
325 # if we are limiting input to a certain ID | |
326 next if defined $value->seq_id && $value->seq_id ne $pair->in->seq_id; | |
327 | |
328 # if we haven't even reached the start, move on | |
329 next if $pair->in->end < $value->start; | |
330 # if we have over run, break | |
331 last if $pair->in->start > $value->end; | |
332 | |
333 my $subres = $pair->map($value); | |
334 $result->add_result($subres); | |
335 } | |
336 } | |
337 | |
338 $result->seq_id($result->match->seq_id) if $result->match; | |
339 unless ($result->each_Location) { | |
340 #build one gap; | |
341 my $gap = Bio::Location::Simple->new(-start => $value->start, | |
342 -end => $value->end, | |
343 -strand => $value->strand, | |
344 -location_type => $value->location_type | |
345 ); | |
346 $gap->seq_id($value->seq_id) if defined $value->seq_id; | |
347 bless $gap, 'Bio::Coordinate::Result::Gap'; | |
348 $result->seq_id($value->seq_id) if defined $value->seq_id; | |
349 $result->add_sub_Location($gap); | |
350 } | |
351 return $result; | |
352 } | |
353 | |
354 | |
355 =head2 sort | |
356 | |
357 Title : sort | |
358 Usage : $obj->sort; | |
359 Function: Sort function so that all mappings are sorted by | |
360 input coordinate start | |
361 Example : | |
362 Returns : 1 | |
363 Args : | |
364 | |
365 =cut | |
366 | |
367 sub sort{ | |
368 my ($self) = @_; | |
369 | |
370 @{$self->{'_mappers'}} = map { $_->[0] } | |
371 sort { $a->[1] <=> $b->[1] } | |
372 map { [ $_, $_->in->start] } | |
373 @{$self->{'_mappers'}}; | |
374 | |
375 #create hashes for sequence ids | |
376 $self->{'_in_ids'} = (); | |
377 $self->{'_out_ids'} = (); | |
378 foreach ($self->each_mapper) { | |
379 $self->{'_in_ids'}->{$_->in->seq_id} = 1; | |
380 $self->{'_out_ids'}->{$_->out->seq_id} = 1; | |
381 } | |
382 | |
383 $self->_is_sorted(1); | |
384 } | |
385 | |
386 =head2 _is_sorted | |
387 | |
388 Title : _is_sorted | |
389 Usage : $newpos = $obj->_is_sorted; | |
390 Function: toggle for whether the (internal) coodinate mapper data are sorted | |
391 Example : | |
392 Returns : boolean | |
393 Args : boolean | |
394 | |
395 =cut | |
396 | |
397 sub _is_sorted{ | |
398 my ($self,$value) = @_; | |
399 | |
400 $self->{'_is_sorted'} = 1 if defined $value && $value; | |
401 return $self->{'_is_sorted'}; | |
402 } | |
403 | |
404 1; | |
405 |