0
|
1 # $Id: Pair.pm,v 1.9.2.1 2003/02/20 05:11:45 heikki Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::Coordinate::Pair
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Heikki Lehvaslaiho
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Coordinate::Pair - Continuous match between two coordinate sets
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Location::Simple;
|
|
20 use Bio::Coordinate::Pair;
|
|
21
|
|
22 my $match1 = Bio::Location::Simple->new
|
|
23 (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 );
|
|
24 my $match2 = Bio::Location::Simple->new
|
|
25 (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 );
|
|
26 my $pair = Bio::Coordinate::Pair->new(-in => $match1,
|
|
27 -out => $match2
|
|
28 );
|
|
29 # location to match
|
|
30 $pos = Bio::Location::Simple->new
|
|
31 (-start => 25, -end => 25, -strand=> -1 );
|
|
32
|
|
33 # results are in a Bio::Coordinate::Result
|
|
34 # they can be Matches and Gaps; are Bio::LocationIs
|
|
35 $res = $pair->map($pos);
|
|
36 $res->isa('Bio::Coordinate::Result');
|
|
37 $res->each_match == 1;
|
|
38 $res->each_gap == 0;
|
|
39 $res->each_Location == 1;
|
|
40 $res->match->start == 5;
|
|
41 $res->match->end == 5;
|
|
42 $res->match->strand == -1;
|
|
43 $res->match->seq_id eq 'peptide';
|
|
44
|
|
45
|
|
46 =head1 DESCRIPTION
|
|
47
|
|
48 This class represents a one continuous match between two coordinate
|
|
49 systems represented by Bio::Location::Simple objects. The relationship
|
|
50 is directed and reversible. It implements methods to ensure internal
|
|
51 consistency, and map continuous and split locations from one
|
|
52 coordinate system to another.
|
|
53
|
|
54 The map() method returns Bio::Coordinate::Results with
|
|
55 Bio::Coordinate::Result::Gaps. The calling code have to deal (process
|
|
56 or ignore) them.
|
|
57
|
|
58 =head1 FEEDBACK
|
|
59
|
|
60 =head2 Mailing Lists
|
|
61
|
|
62 User feedback is an integral part of the evolution of this and other
|
|
63 Bioperl modules. Send your comments and suggestions preferably to the
|
|
64 Bioperl mailing lists Your participation is much appreciated.
|
|
65
|
|
66 bioperl-l@bioperl.org - General discussion
|
|
67 http://bio.perl.org/MailList.html - About the mailing lists
|
|
68
|
|
69 =head2 Reporting Bugs
|
|
70
|
|
71 report bugs to the Bioperl bug tracking system to help us keep track
|
|
72 the bugs and their resolution. Bug reports can be submitted via
|
|
73 email or the web:
|
|
74
|
|
75 bioperl-bugs@bio.perl.org
|
|
76 http://bugzilla.bioperl.org/
|
|
77
|
|
78 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
79
|
|
80 Email: heikki@ebi.ac.uk
|
|
81 Address:
|
|
82
|
|
83 EMBL Outstation, European Bioinformatics Institute
|
|
84 Wellcome Trust Genome Campus, Hinxton
|
|
85 Cambs. CB10 1SD, United Kingdom
|
|
86
|
|
87 =head1 CONTRIBUTORS
|
|
88
|
|
89 Additional contributors names and emails here
|
|
90
|
|
91 =head1 APPENDIX
|
|
92
|
|
93 The rest of the documentation details each of the object
|
|
94 methods. Internal methods are usually preceded with a _
|
|
95
|
|
96 =cut
|
|
97
|
|
98
|
|
99 # Let the code begin...
|
|
100
|
|
101 package Bio::Coordinate::Pair;
|
|
102 use vars qw(@ISA );
|
|
103 use strict;
|
|
104
|
|
105 # Object preamble - inherits from Bio::Root::Root
|
|
106 use Bio::Root::Root;
|
|
107 use Bio::Coordinate::MapperI;
|
|
108 use Bio::Coordinate::Result;
|
|
109 use Bio::Coordinate::Result::Match;
|
|
110 use Bio::Coordinate::Result::Gap;
|
|
111
|
|
112 @ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI);
|
|
113
|
|
114
|
|
115 sub new {
|
|
116 my($class,@args) = @_;
|
|
117 my $self = $class->SUPER::new(@args);
|
|
118
|
|
119 my($in, $out) =
|
|
120 $self->_rearrange([qw(IN
|
|
121 OUT
|
|
122 )],
|
|
123 @args);
|
|
124
|
|
125 $in && $self->in($in);
|
|
126 $out && $self->out($out);
|
|
127 return $self; # success - we hope!
|
|
128 }
|
|
129
|
|
130 =head2 in
|
|
131
|
|
132 Title : in
|
|
133 Usage : $obj->in('peptide');
|
|
134 Function: Set and read the input coordinate system.
|
|
135 Example :
|
|
136 Returns : value of input system
|
|
137 Args : new value (optional), Bio::LocationI
|
|
138
|
|
139 =cut
|
|
140
|
|
141 sub in {
|
|
142 my ($self,$value) = @_;
|
|
143 if( defined $value) {
|
|
144 $self->throw("Not a valid input Bio::Location [$value] ")
|
|
145 unless $value->isa('Bio::LocationI');
|
|
146 $self->{'_in'} = $value;
|
|
147 }
|
|
148 return $self->{'_in'};
|
|
149 }
|
|
150
|
|
151
|
|
152 =head2 out
|
|
153
|
|
154 Title : out
|
|
155 Usage : $obj->out('peptide');
|
|
156 Function: Set and read the output coordinate system.
|
|
157 Example :
|
|
158 Returns : value of output system
|
|
159 Args : new value (optional), Bio::LocationI
|
|
160
|
|
161 =cut
|
|
162
|
|
163 sub out {
|
|
164 my ($self,$value) = @_;
|
|
165 if( defined $value) {
|
|
166 $self->throw("Not a valid output coordinate Bio::Location [$value] ")
|
|
167 unless $value->isa('Bio::LocationI');
|
|
168 $self->{'_out'} = $value;
|
|
169 }
|
|
170 return $self->{'_out'};
|
|
171 }
|
|
172
|
|
173
|
|
174 =head2 swap
|
|
175
|
|
176 Title : swap
|
|
177 Usage : $obj->swap;
|
|
178 Function: Swap the direction of mapping; input <-> output
|
|
179 Example :
|
|
180 Returns : 1
|
|
181 Args :
|
|
182
|
|
183 =cut
|
|
184
|
|
185 sub swap {
|
|
186 my ($self) = @_;
|
|
187 ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'});
|
|
188 return 1;
|
|
189 }
|
|
190
|
|
191 =head2 strand
|
|
192
|
|
193 Title : strand
|
|
194 Usage : $obj->strand;
|
|
195 Function: Get strand value for the pair
|
|
196 Example :
|
|
197 Returns : ( 1 | 0 | -1 )
|
|
198 Args :
|
|
199
|
|
200 =cut
|
|
201
|
|
202 sub strand {
|
|
203 my ($self) = @_;
|
|
204 $self->warn("Outgoing coordinates are not defined")
|
|
205 unless $self->out;
|
|
206 $self->warn("Incoming coordinates are not defined")
|
|
207 unless $self->in;
|
|
208
|
|
209 return $self->in->strand * $self->out->strand;
|
|
210 }
|
|
211
|
|
212 =head2 test
|
|
213
|
|
214 Title : test
|
|
215 Usage : $obj->test;
|
|
216 Function: test that both components are of the same length
|
|
217 Example :
|
|
218 Returns : ( 1 | undef )
|
|
219 Args :
|
|
220
|
|
221 =cut
|
|
222
|
|
223 sub test {
|
|
224 my ($self) = @_;
|
|
225 $self->warn("Outgoing coordinates are not defined")
|
|
226 unless $self->out;
|
|
227 $self->warn("Incoming coordinates are not defined")
|
|
228 unless $self->in;
|
|
229
|
|
230 1 if $self->in->end - $self->in->start == $self->out->end - $self->out->start;
|
|
231 }
|
|
232
|
|
233
|
|
234 =head2 map
|
|
235
|
|
236 Title : map
|
|
237 Usage : $newpos = $obj->map($pos);
|
|
238 Function: Map the location from the input coordinate system
|
|
239 to a new value in the output coordinate system.
|
|
240 Example :
|
|
241 Returns : new Bio::LocationI in the output coordinate system or undef
|
|
242 Args : Bio::LocationI object
|
|
243
|
|
244 =cut
|
|
245
|
|
246 sub map {
|
|
247 my ($self,$value) = @_;
|
|
248
|
|
249 $self->throw("Need to pass me a value.")
|
|
250 unless defined $value;
|
|
251 $self->throw("I need a Bio::Location, not [$value]")
|
|
252 unless $value->isa('Bio::LocationI');
|
|
253 $self->throw("Input coordinate system not set")
|
|
254 unless $self->in;
|
|
255 $self->throw("Output coordinate system not set")
|
|
256 unless $self->out;
|
|
257
|
|
258
|
|
259 if ($value->isa("Bio::Location::SplitLocationI")) {
|
|
260
|
|
261 my $result = new Bio::Coordinate::Result;
|
|
262 my $split = new Bio::Location::Split(-seq_id=>$self->out->seq_id);
|
|
263 foreach my $loc ( $value->sub_Location(1) ) {
|
|
264
|
|
265 my $res = $self->_map($loc);
|
|
266 map { $result->add_sub_Location($_) } $res->each_Location;
|
|
267
|
|
268 }
|
|
269 return $result;
|
|
270
|
|
271 } else {
|
|
272 return $self->_map($value);
|
|
273 }
|
|
274
|
|
275 }
|
|
276
|
|
277
|
|
278 =head2 _map
|
|
279
|
|
280 Title : _map
|
|
281 Usage : $newpos = $obj->_map($simpleloc);
|
|
282 Function: Internal method that does the actual mapping. Called
|
|
283 multiple times by map() if the location to be mapped is a
|
|
284 split location
|
|
285 Example :
|
|
286 Returns : new location in the output coordinate system or undef
|
|
287 Args : Bio::Location::Simple
|
|
288
|
|
289 =cut
|
|
290
|
|
291 sub _map {
|
|
292 my ($self,$value) = @_;
|
|
293
|
|
294 my $result = new Bio::Coordinate::Result;
|
|
295
|
|
296 my $offset = $self->in->start - $self->out->start;
|
|
297 my $start = $value->start - $offset;
|
|
298 my $end = $value->end - $offset;
|
|
299
|
|
300 my $match = Bio::Location::Simple->new;
|
|
301 $match->location_type($value->location_type);
|
|
302 $match->strand($self->strand);
|
|
303
|
|
304 #within
|
|
305 # |-------------------------|
|
|
306 # |-|
|
|
307 if ($start >= $self->out->start and $end <= $self->out->end) {
|
|
308
|
|
309 $match->seq_id($self->out->seq_id);
|
|
310 $result->seq_id($self->out->seq_id);
|
|
311
|
|
312 if ($self->strand == 1) {
|
|
313 $match->start($start);
|
|
314 $match->end($end);
|
|
315 } else {
|
|
316 $match->start($self->out->end - $end + $self->out->start);
|
|
317 $match->end($self->out->end - $start + $self->out->start);
|
|
318 }
|
|
319 if ($value->strand) {
|
|
320 $match->strand($match->strand * $value->strand);
|
|
321 $result->strand($match->strand);
|
|
322 }
|
|
323 bless $match, 'Bio::Coordinate::Result::Match';
|
|
324 $result->add_sub_Location($match);
|
|
325 }
|
|
326 #out
|
|
327 # |-------------------------|
|
|
328 # |-| or |-|
|
|
329 elsif ( ($end < $self->out->start or $start > $self->out->end ) or
|
|
330 #insertions just outside the range need special settings
|
|
331 ($value->location_type eq 'IN-BETWEEN' and
|
|
332 ($end = $self->out->start or $start = $self->out->end))) {
|
|
333
|
|
334 $match->seq_id($self->in->seq_id);
|
|
335 $result->seq_id($self->in->seq_id);
|
|
336 $match->start($value->start);
|
|
337 $match->end($value->end);
|
|
338 $match->strand($value->strand);
|
|
339
|
|
340 bless $match, 'Bio::Coordinate::Result::Gap';
|
|
341 $result->add_sub_Location($match);
|
|
342 }
|
|
343 #partial I
|
|
344 # |-------------------------|
|
|
345 # |-----|
|
|
346 elsif ($start < $self->out->start and $end <= $self->out->end ) {
|
|
347
|
|
348 $result->seq_id($self->out->seq_id);
|
|
349 if ($value->strand) {
|
|
350 $match->strand($match->strand * $value->strand);
|
|
351 $result->strand($match->strand);
|
|
352 }
|
|
353 my $gap = Bio::Location::Simple->new;
|
|
354 $gap->start($value->start);
|
|
355 $gap->end($self->in->start - 1);
|
|
356 $gap->strand($value->strand);
|
|
357 $gap->seq_id($self->in->seq_id);
|
|
358
|
|
359 bless $gap, 'Bio::Coordinate::Result::Gap';
|
|
360 $result->add_sub_Location($gap);
|
|
361
|
|
362 # match
|
|
363 $match->seq_id($self->out->seq_id);
|
|
364
|
|
365 if ($self->strand == 1) {
|
|
366 $match->start($self->out->start);
|
|
367 $match->end($end);
|
|
368 } else {
|
|
369 $match->start($self->out->end - $end + $self->out->start);
|
|
370 $match->end($self->out->end);
|
|
371 }
|
|
372 bless $match, 'Bio::Coordinate::Result::Match';
|
|
373 $result->add_sub_Location($match);
|
|
374 }
|
|
375 #partial II
|
|
376 # |-------------------------|
|
|
377 # |------|
|
|
378 elsif ($start >= $self->out->start and $end > $self->out->end ) {
|
|
379
|
|
380 $match->seq_id($self->out->seq_id);
|
|
381 $result->seq_id($self->out->seq_id);
|
|
382 if ($value->strand) {
|
|
383 $match->strand($match->strand * $value->strand);
|
|
384 $result->strand($match->strand);
|
|
385 }
|
|
386 if ($self->strand == 1) {
|
|
387 $match->start($start);
|
|
388 $match->end($self->out->end);
|
|
389 } else {
|
|
390 $match->start($self->out->start);
|
|
391 $match->end($self->out->end - $start + $self->out->start);
|
|
392 }
|
|
393 bless $match, 'Bio::Coordinate::Result::Match';
|
|
394 $result->add_sub_Location($match);
|
|
395
|
|
396 my $gap = Bio::Location::Simple->new;
|
|
397 $gap->start($self->in->end + 1);
|
|
398 $gap->end($value->end);
|
|
399 $gap->strand($value->strand);
|
|
400 $gap->seq_id($self->in->seq_id);
|
|
401 bless $gap, 'Bio::Coordinate::Result::Gap';
|
|
402 $result->add_sub_Location($gap);
|
|
403
|
|
404 }
|
|
405 #enveloping
|
|
406 # |-------------------------|
|
|
407 # |---------------------------------|
|
|
408 elsif ($start < $self->out->start and $end > $self->out->end ) {
|
|
409
|
|
410 $result->seq_id($self->out->seq_id);
|
|
411 if ($value->strand) {
|
|
412 $match->strand($match->strand * $value->strand);
|
|
413 $result->strand($match->strand);
|
|
414 }
|
|
415 # gap1
|
|
416 my $gap1 = Bio::Location::Simple->new;
|
|
417 $gap1->start($value->start);
|
|
418 $gap1->end($self->in->start - 1);
|
|
419 $gap1->strand($value->strand);
|
|
420 $gap1->seq_id($self->in->seq_id);
|
|
421 bless $gap1, 'Bio::Coordinate::Result::Gap';
|
|
422 $result->add_sub_Location($gap1);
|
|
423
|
|
424 # match
|
|
425 $match->seq_id($self->out->seq_id);
|
|
426
|
|
427 $match->start($self->out->start);
|
|
428 $match->end($self->out->end);
|
|
429 bless $match, 'Bio::Coordinate::Result::Match';
|
|
430 $result->add_sub_Location($match);
|
|
431
|
|
432 # gap2
|
|
433 my $gap2 = Bio::Location::Simple->new;
|
|
434 $gap2->start($self->in->end + 1);
|
|
435 $gap2->end($value->end);
|
|
436 $gap2->strand($value->strand);
|
|
437 $gap2->seq_id($self->in->seq_id);
|
|
438 bless $gap2, 'Bio::Coordinate::Result::Gap';
|
|
439 $result->add_sub_Location($gap2);
|
|
440
|
|
441 } else {
|
|
442 $self->throw("Should not be here!");
|
|
443 }
|
|
444 return $result;
|
|
445 }
|
|
446
|
|
447
|
|
448 1;
|