0
|
1 # $Id: Collection.pm,v 1.11.2.1 2003/02/20 05:11:45 heikki Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::Coordinate::Collection
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Heikki Lehvaslaiho
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Coordinate::Collection - Noncontinuous match between two coordinate sets
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # create Bio::Coordinate::Pairs or other Bio::Coordinate::MapperIs somehow
|
|
20 $pair1; $pair2;
|
|
21
|
|
22 # add them into a Collection
|
|
23 $collection = Bio::Coordinate::Collection->new;
|
|
24 $collection->add_mapper($pair1);
|
|
25 $collection->add_mapper($pair2);
|
|
26
|
|
27 # create a position and map it
|
|
28 $pos = Bio::Location::Simple->new (-start => 5, -end => 9 );
|
|
29 $res = $collection->map($pos);
|
|
30 $res->match->start == 1;
|
|
31 $res->match-> == 5;
|
|
32
|
|
33 # if mapping is many to one (*>1) or many-to-many (*>*)
|
|
34 # you have to give seq_id not get unrelevant entries
|
|
35 $pos = Bio::Location::Simple->new
|
|
36 (-start => 5, -end => 9 -seq_id=>'clone1');
|
|
37
|
|
38 =head1 DESCRIPTION
|
|
39
|
|
40 Generic, context neutral mapper to provide coordinate transforms
|
|
41 between two B<disjoint> coordinate systems. It brings into Bioperl the
|
|
42 functionality from Ewan Birney's Bio::EnsEMBL::Mapper ported into
|
|
43 current bioperl.
|
|
44
|
|
45 This class is aimed for representing mapping between whole chromosomes
|
|
46 and contigs, or between contigs and clones, or between sequencing
|
|
47 reads and assembly. The submaps are automatically sorted, so they can
|
|
48 be added in any order.
|
|
49
|
|
50 To map coordinates to the other direction, you have to swap() the
|
|
51 collection. Keeping track of the direction and ID restrictions
|
|
52 are left to the calling code.
|
|
53
|
|
54
|
|
55
|
|
56 =head1 FEEDBACK
|
|
57
|
|
58 =head2 Mailing Lists
|
|
59
|
|
60 User feedback is an integral part of the evolution of this and other
|
|
61 Bioperl modules. Send your comments and suggestions preferably to the
|
|
62 Bioperl mailing lists Your participation is much appreciated.
|
|
63
|
|
64 bioperl-l@bioperl.org - General discussion
|
|
65 http://bio.perl.org/MailList.html - About the mailing lists
|
|
66
|
|
67 =head2 Reporting Bugs
|
|
68
|
|
69 report bugs to the Bioperl bug tracking system to help us keep track
|
|
70 the bugs and their resolution. Bug reports can be submitted via
|
|
71 email or the web:
|
|
72
|
|
73 bioperl-bugs@bio.perl.org
|
|
74 http://bugzilla.bioperl.org/
|
|
75
|
|
76 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
77
|
|
78 Email: heikki@ebi.ac.uk
|
|
79 Address:
|
|
80
|
|
81 EMBL Outstation, European Bioinformatics Institute
|
|
82 Wellcome Trust Genome Campus, Hinxton
|
|
83 Cambs. CB10 1SD, United Kingdom
|
|
84
|
|
85 =head1 CONTRIBUTORS
|
|
86
|
|
87 Ewan Birney, birney@ebi.ac.uk
|
|
88
|
|
89 =head1 APPENDIX
|
|
90
|
|
91 The rest of the documentation details each of the object
|
|
92 methods. Internal methods are usually preceded with a _
|
|
93
|
|
94 =cut
|
|
95
|
|
96
|
|
97 # Let the code begin...
|
|
98
|
|
99 package Bio::Coordinate::Collection;
|
|
100 use vars qw(@ISA );
|
|
101 use strict;
|
|
102
|
|
103 # Object preamble - inherits from Bio::Root::Root
|
|
104 use Bio::Root::Root;
|
|
105 use Bio::Coordinate::MapperI;
|
|
106 use Bio::Coordinate::Result;
|
|
107 use Bio::Coordinate::Result::Gap;
|
|
108
|
|
109 @ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI);
|
|
110
|
|
111
|
|
112 sub new {
|
|
113 my($class,@args) = @_;
|
|
114 my $self = $class->SUPER::new(@args);
|
|
115
|
|
116 $self->{'_mappers'} = [];
|
|
117
|
|
118 my($in, $out, $strict, $mappers, $return_match) =
|
|
119 $self->_rearrange([qw(IN
|
|
120 OUT
|
|
121 STRICT
|
|
122 MAPPERS
|
|
123 RETURN_MATCH
|
|
124 )],
|
|
125 @args);
|
|
126
|
|
127 $in && $self->in($in);
|
|
128 $out && $self->out($out);
|
|
129 $mappers && $self->mappers($mappers);
|
|
130 $return_match && $self->return_match('return_match');
|
|
131 return $self; # success - we hope!
|
|
132 }
|
|
133
|
|
134
|
|
135 =head2 add_mapper
|
|
136
|
|
137 Title : add_mapper
|
|
138 Usage : $obj->add_mapper($mapper)
|
|
139 Function: Pushes one Bio::Coodinate::MapperI into the list of mappers.
|
|
140 Sets _is_sorted() to false.
|
|
141 Example :
|
|
142 Returns : 1 when succeeds, 0 for failure.
|
|
143 Args : mapper object
|
|
144
|
|
145 =cut
|
|
146
|
|
147 sub add_mapper {
|
|
148 my ($self,$value) = @_;
|
|
149
|
|
150 $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]")
|
|
151 unless defined $value && $value->isa('Bio::Coordinate::MapperI');
|
|
152
|
|
153 # test pair range lengths
|
|
154 $self->warn("Coodinates in pair [". $value . ":" .
|
|
155 $value->in->seq_id . "/". $value->in->seq_id .
|
|
156 "] are not right.")
|
|
157 unless $value->test;
|
|
158
|
|
159 $self->_is_sorted(0);
|
|
160 push(@{$self->{'_mappers'}},$value);
|
|
161 }
|
|
162
|
|
163 =head2 mappers
|
|
164
|
|
165 Title : mappers
|
|
166 Usage : $obj->mappers();
|
|
167 Function: Returns or sets a list of mappers.
|
|
168 Example :
|
|
169 Returns : array of mappers
|
|
170 Args : array of mappers
|
|
171
|
|
172 =cut
|
|
173
|
|
174 sub mappers{
|
|
175 my ($self,@args) = @_;
|
|
176
|
|
177 if (@args) {
|
|
178
|
|
179 $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]")
|
|
180 unless defined $args[0] && $args[0]->isa('Bio::Coordinate::MapperI');
|
|
181 push(@{$self->{'_mappers'}}, @args);
|
|
182 }
|
|
183
|
|
184 return @{$self->{'_mappers'}};
|
|
185 }
|
|
186
|
|
187
|
|
188 =head2 each_mapper
|
|
189
|
|
190 Title : each_mapper
|
|
191 Usage : $obj->each_mapper();
|
|
192 Function: Returns a list of mappers.
|
|
193 Example :
|
|
194 Returns : list of mappers
|
|
195 Args : none
|
|
196
|
|
197 =cut
|
|
198
|
|
199 sub each_mapper{
|
|
200 my ($self) = @_;
|
|
201 return @{$self->{'_mappers'}};
|
|
202 }
|
|
203
|
|
204
|
|
205 =head2 swap
|
|
206
|
|
207 Title : swap
|
|
208 Usage : $obj->swap;
|
|
209 Function: Swap the direction of mapping;input <-> output
|
|
210 Example :
|
|
211 Returns : 1
|
|
212 Args :
|
|
213
|
|
214 =cut
|
|
215
|
|
216 sub swap {
|
|
217 my ($self) = @_;
|
|
218 use Data::Dumper;
|
|
219
|
|
220 $self->sort unless $self->_is_sorted;
|
|
221 map {$_->swap;} @{$self->{'_mappers'}};
|
|
222 ($self->{'_in_ids'}, $self->{'_out_ids'}) =
|
|
223 ($self->{'_out_ids'}, $self->{'_in_ids'});
|
|
224 1;
|
|
225 }
|
|
226
|
|
227 =head2 test
|
|
228
|
|
229 Title : test
|
|
230 Usage : $obj->test;
|
|
231 Function: test that both components of all pairs are of the same length.
|
|
232 Ran automatically.
|
|
233 Example :
|
|
234 Returns : boolean
|
|
235 Args :
|
|
236
|
|
237 =cut
|
|
238
|
|
239 sub test {
|
|
240 my ($self) = @_;
|
|
241
|
|
242 my $res = 1;
|
|
243
|
|
244 foreach my $mapper ($self->each_mapper) {
|
|
245 $self->warn("Coodinates in pair [". $mapper . ":" .
|
|
246 $mapper->in->seq_id . "/". $mapper->in->seq_id .
|
|
247 "] are not right.") && ($res = 0)
|
|
248 unless $mapper->test;
|
|
249 }
|
|
250 $res;
|
|
251 }
|
|
252
|
|
253
|
|
254 =head2 map
|
|
255
|
|
256 Title : map
|
|
257 Usage : $newpos = $obj->map($pos);
|
|
258 Function: Map the location from the input coordinate system
|
|
259 to a new value in the output coordinate system.
|
|
260 Example :
|
|
261 Returns : new value in the output coordinate system
|
|
262 Args : integer
|
|
263
|
|
264 =cut
|
|
265
|
|
266 sub map {
|
|
267 my ($self,$value) = @_;
|
|
268
|
|
269 $self->throw("Need to pass me a value.")
|
|
270 unless defined $value;
|
|
271 $self->throw("I need a Bio::Location, not [$value]")
|
|
272 unless $value->isa('Bio::LocationI');
|
|
273 $self->throw("No coordinate mappers!")
|
|
274 unless $self->each_mapper;
|
|
275
|
|
276 $self->sort unless $self->_is_sorted;
|
|
277
|
|
278
|
|
279 if ($value->isa("Bio::Location::SplitLocationI")) {
|
|
280
|
|
281 my $result = new Bio::Coordinate::Result;
|
|
282 foreach my $loc ( $value->sub_Location(1) ) {
|
|
283
|
|
284 my $res = $self->_map($loc);
|
|
285 map { $result->add_sub_Location($_) } $res->each_Location;
|
|
286
|
|
287 }
|
|
288 return $result;
|
|
289
|
|
290 } else {
|
|
291 return $self->_map($value);
|
|
292 }
|
|
293
|
|
294
|
|
295 }
|
|
296
|
|
297
|
|
298 =head2 _map
|
|
299
|
|
300 Title : _map
|
|
301 Usage : $newpos = $obj->_map($simpleloc);
|
|
302 Function: Internal method that does the actual mapping. Called multiple times
|
|
303 by map() if the location to be mapped is a split location
|
|
304
|
|
305 Example :
|
|
306 Returns : new location in the output coordinate system or undef
|
|
307 Args : Bio::Location::Simple
|
|
308
|
|
309 =cut
|
|
310
|
|
311 sub _map {
|
|
312 my ($self,$value) = @_;
|
|
313
|
|
314 my $result = Bio::Coordinate::Result->new(-is_remote=>1);
|
|
315
|
|
316 IDMATCH: {
|
|
317
|
|
318 # bail out now we if are forcing the use of an ID
|
|
319 # and it is not in this collection
|
|
320 last IDMATCH if defined $value->seq_id &&
|
|
321 ! $self->{'_in_ids'}->{$value->seq_id};
|
|
322
|
|
323 foreach my $pair ($self->each_mapper) {
|
|
324
|
|
325 # if we are limiting input to a certain ID
|
|
326 next if defined $value->seq_id && $value->seq_id ne $pair->in->seq_id;
|
|
327
|
|
328 # if we haven't even reached the start, move on
|
|
329 next if $pair->in->end < $value->start;
|
|
330 # if we have over run, break
|
|
331 last if $pair->in->start > $value->end;
|
|
332
|
|
333 my $subres = $pair->map($value);
|
|
334 $result->add_result($subres);
|
|
335 }
|
|
336 }
|
|
337
|
|
338 $result->seq_id($result->match->seq_id) if $result->match;
|
|
339 unless ($result->each_Location) {
|
|
340 #build one gap;
|
|
341 my $gap = Bio::Location::Simple->new(-start => $value->start,
|
|
342 -end => $value->end,
|
|
343 -strand => $value->strand,
|
|
344 -location_type => $value->location_type
|
|
345 );
|
|
346 $gap->seq_id($value->seq_id) if defined $value->seq_id;
|
|
347 bless $gap, 'Bio::Coordinate::Result::Gap';
|
|
348 $result->seq_id($value->seq_id) if defined $value->seq_id;
|
|
349 $result->add_sub_Location($gap);
|
|
350 }
|
|
351 return $result;
|
|
352 }
|
|
353
|
|
354
|
|
355 =head2 sort
|
|
356
|
|
357 Title : sort
|
|
358 Usage : $obj->sort;
|
|
359 Function: Sort function so that all mappings are sorted by
|
|
360 input coordinate start
|
|
361 Example :
|
|
362 Returns : 1
|
|
363 Args :
|
|
364
|
|
365 =cut
|
|
366
|
|
367 sub sort{
|
|
368 my ($self) = @_;
|
|
369
|
|
370 @{$self->{'_mappers'}} = map { $_->[0] }
|
|
371 sort { $a->[1] <=> $b->[1] }
|
|
372 map { [ $_, $_->in->start] }
|
|
373 @{$self->{'_mappers'}};
|
|
374
|
|
375 #create hashes for sequence ids
|
|
376 $self->{'_in_ids'} = ();
|
|
377 $self->{'_out_ids'} = ();
|
|
378 foreach ($self->each_mapper) {
|
|
379 $self->{'_in_ids'}->{$_->in->seq_id} = 1;
|
|
380 $self->{'_out_ids'}->{$_->out->seq_id} = 1;
|
|
381 }
|
|
382
|
|
383 $self->_is_sorted(1);
|
|
384 }
|
|
385
|
|
386 =head2 _is_sorted
|
|
387
|
|
388 Title : _is_sorted
|
|
389 Usage : $newpos = $obj->_is_sorted;
|
|
390 Function: toggle for whether the (internal) coodinate mapper data are sorted
|
|
391 Example :
|
|
392 Returns : boolean
|
|
393 Args : boolean
|
|
394
|
|
395 =cut
|
|
396
|
|
397 sub _is_sorted{
|
|
398 my ($self,$value) = @_;
|
|
399
|
|
400 $self->{'_is_sorted'} = 1 if defined $value && $value;
|
|
401 return $self->{'_is_sorted'};
|
|
402 }
|
|
403
|
|
404 1;
|
|
405
|