Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Coordinate/Pair.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Pair.pm,v 1.9.2.1 2003/02/20 05:11:45 heikki Exp $ | |
2 # | |
3 # bioperl module for Bio::Coordinate::Pair | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Coordinate::Pair - Continuous match between two coordinate sets | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::Location::Simple; | |
20 use Bio::Coordinate::Pair; | |
21 | |
22 my $match1 = Bio::Location::Simple->new | |
23 (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 ); | |
24 my $match2 = Bio::Location::Simple->new | |
25 (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 ); | |
26 my $pair = Bio::Coordinate::Pair->new(-in => $match1, | |
27 -out => $match2 | |
28 ); | |
29 # location to match | |
30 $pos = Bio::Location::Simple->new | |
31 (-start => 25, -end => 25, -strand=> -1 ); | |
32 | |
33 # results are in a Bio::Coordinate::Result | |
34 # they can be Matches and Gaps; are Bio::LocationIs | |
35 $res = $pair->map($pos); | |
36 $res->isa('Bio::Coordinate::Result'); | |
37 $res->each_match == 1; | |
38 $res->each_gap == 0; | |
39 $res->each_Location == 1; | |
40 $res->match->start == 5; | |
41 $res->match->end == 5; | |
42 $res->match->strand == -1; | |
43 $res->match->seq_id eq 'peptide'; | |
44 | |
45 | |
46 =head1 DESCRIPTION | |
47 | |
48 This class represents a one continuous match between two coordinate | |
49 systems represented by Bio::Location::Simple objects. The relationship | |
50 is directed and reversible. It implements methods to ensure internal | |
51 consistency, and map continuous and split locations from one | |
52 coordinate system to another. | |
53 | |
54 The map() method returns Bio::Coordinate::Results with | |
55 Bio::Coordinate::Result::Gaps. The calling code have to deal (process | |
56 or ignore) them. | |
57 | |
58 =head1 FEEDBACK | |
59 | |
60 =head2 Mailing Lists | |
61 | |
62 User feedback is an integral part of the evolution of this and other | |
63 Bioperl modules. Send your comments and suggestions preferably to the | |
64 Bioperl mailing lists Your participation is much appreciated. | |
65 | |
66 bioperl-l@bioperl.org - General discussion | |
67 http://bio.perl.org/MailList.html - About the mailing lists | |
68 | |
69 =head2 Reporting Bugs | |
70 | |
71 report bugs to the Bioperl bug tracking system to help us keep track | |
72 the bugs and their resolution. Bug reports can be submitted via | |
73 email or the web: | |
74 | |
75 bioperl-bugs@bio.perl.org | |
76 http://bugzilla.bioperl.org/ | |
77 | |
78 =head1 AUTHOR - Heikki Lehvaslaiho | |
79 | |
80 Email: heikki@ebi.ac.uk | |
81 Address: | |
82 | |
83 EMBL Outstation, European Bioinformatics Institute | |
84 Wellcome Trust Genome Campus, Hinxton | |
85 Cambs. CB10 1SD, United Kingdom | |
86 | |
87 =head1 CONTRIBUTORS | |
88 | |
89 Additional contributors names and emails here | |
90 | |
91 =head1 APPENDIX | |
92 | |
93 The rest of the documentation details each of the object | |
94 methods. Internal methods are usually preceded with a _ | |
95 | |
96 =cut | |
97 | |
98 | |
99 # Let the code begin... | |
100 | |
101 package Bio::Coordinate::Pair; | |
102 use vars qw(@ISA ); | |
103 use strict; | |
104 | |
105 # Object preamble - inherits from Bio::Root::Root | |
106 use Bio::Root::Root; | |
107 use Bio::Coordinate::MapperI; | |
108 use Bio::Coordinate::Result; | |
109 use Bio::Coordinate::Result::Match; | |
110 use Bio::Coordinate::Result::Gap; | |
111 | |
112 @ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI); | |
113 | |
114 | |
115 sub new { | |
116 my($class,@args) = @_; | |
117 my $self = $class->SUPER::new(@args); | |
118 | |
119 my($in, $out) = | |
120 $self->_rearrange([qw(IN | |
121 OUT | |
122 )], | |
123 @args); | |
124 | |
125 $in && $self->in($in); | |
126 $out && $self->out($out); | |
127 return $self; # success - we hope! | |
128 } | |
129 | |
130 =head2 in | |
131 | |
132 Title : in | |
133 Usage : $obj->in('peptide'); | |
134 Function: Set and read the input coordinate system. | |
135 Example : | |
136 Returns : value of input system | |
137 Args : new value (optional), Bio::LocationI | |
138 | |
139 =cut | |
140 | |
141 sub in { | |
142 my ($self,$value) = @_; | |
143 if( defined $value) { | |
144 $self->throw("Not a valid input Bio::Location [$value] ") | |
145 unless $value->isa('Bio::LocationI'); | |
146 $self->{'_in'} = $value; | |
147 } | |
148 return $self->{'_in'}; | |
149 } | |
150 | |
151 | |
152 =head2 out | |
153 | |
154 Title : out | |
155 Usage : $obj->out('peptide'); | |
156 Function: Set and read the output coordinate system. | |
157 Example : | |
158 Returns : value of output system | |
159 Args : new value (optional), Bio::LocationI | |
160 | |
161 =cut | |
162 | |
163 sub out { | |
164 my ($self,$value) = @_; | |
165 if( defined $value) { | |
166 $self->throw("Not a valid output coordinate Bio::Location [$value] ") | |
167 unless $value->isa('Bio::LocationI'); | |
168 $self->{'_out'} = $value; | |
169 } | |
170 return $self->{'_out'}; | |
171 } | |
172 | |
173 | |
174 =head2 swap | |
175 | |
176 Title : swap | |
177 Usage : $obj->swap; | |
178 Function: Swap the direction of mapping; input <-> output | |
179 Example : | |
180 Returns : 1 | |
181 Args : | |
182 | |
183 =cut | |
184 | |
185 sub swap { | |
186 my ($self) = @_; | |
187 ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'}); | |
188 return 1; | |
189 } | |
190 | |
191 =head2 strand | |
192 | |
193 Title : strand | |
194 Usage : $obj->strand; | |
195 Function: Get strand value for the pair | |
196 Example : | |
197 Returns : ( 1 | 0 | -1 ) | |
198 Args : | |
199 | |
200 =cut | |
201 | |
202 sub strand { | |
203 my ($self) = @_; | |
204 $self->warn("Outgoing coordinates are not defined") | |
205 unless $self->out; | |
206 $self->warn("Incoming coordinates are not defined") | |
207 unless $self->in; | |
208 | |
209 return $self->in->strand * $self->out->strand; | |
210 } | |
211 | |
212 =head2 test | |
213 | |
214 Title : test | |
215 Usage : $obj->test; | |
216 Function: test that both components are of the same length | |
217 Example : | |
218 Returns : ( 1 | undef ) | |
219 Args : | |
220 | |
221 =cut | |
222 | |
223 sub test { | |
224 my ($self) = @_; | |
225 $self->warn("Outgoing coordinates are not defined") | |
226 unless $self->out; | |
227 $self->warn("Incoming coordinates are not defined") | |
228 unless $self->in; | |
229 | |
230 1 if $self->in->end - $self->in->start == $self->out->end - $self->out->start; | |
231 } | |
232 | |
233 | |
234 =head2 map | |
235 | |
236 Title : map | |
237 Usage : $newpos = $obj->map($pos); | |
238 Function: Map the location from the input coordinate system | |
239 to a new value in the output coordinate system. | |
240 Example : | |
241 Returns : new Bio::LocationI in the output coordinate system or undef | |
242 Args : Bio::LocationI object | |
243 | |
244 =cut | |
245 | |
246 sub map { | |
247 my ($self,$value) = @_; | |
248 | |
249 $self->throw("Need to pass me a value.") | |
250 unless defined $value; | |
251 $self->throw("I need a Bio::Location, not [$value]") | |
252 unless $value->isa('Bio::LocationI'); | |
253 $self->throw("Input coordinate system not set") | |
254 unless $self->in; | |
255 $self->throw("Output coordinate system not set") | |
256 unless $self->out; | |
257 | |
258 | |
259 if ($value->isa("Bio::Location::SplitLocationI")) { | |
260 | |
261 my $result = new Bio::Coordinate::Result; | |
262 my $split = new Bio::Location::Split(-seq_id=>$self->out->seq_id); | |
263 foreach my $loc ( $value->sub_Location(1) ) { | |
264 | |
265 my $res = $self->_map($loc); | |
266 map { $result->add_sub_Location($_) } $res->each_Location; | |
267 | |
268 } | |
269 return $result; | |
270 | |
271 } else { | |
272 return $self->_map($value); | |
273 } | |
274 | |
275 } | |
276 | |
277 | |
278 =head2 _map | |
279 | |
280 Title : _map | |
281 Usage : $newpos = $obj->_map($simpleloc); | |
282 Function: Internal method that does the actual mapping. Called | |
283 multiple times by map() if the location to be mapped is a | |
284 split location | |
285 Example : | |
286 Returns : new location in the output coordinate system or undef | |
287 Args : Bio::Location::Simple | |
288 | |
289 =cut | |
290 | |
291 sub _map { | |
292 my ($self,$value) = @_; | |
293 | |
294 my $result = new Bio::Coordinate::Result; | |
295 | |
296 my $offset = $self->in->start - $self->out->start; | |
297 my $start = $value->start - $offset; | |
298 my $end = $value->end - $offset; | |
299 | |
300 my $match = Bio::Location::Simple->new; | |
301 $match->location_type($value->location_type); | |
302 $match->strand($self->strand); | |
303 | |
304 #within | |
305 # |-------------------------| | |
306 # |-| | |
307 if ($start >= $self->out->start and $end <= $self->out->end) { | |
308 | |
309 $match->seq_id($self->out->seq_id); | |
310 $result->seq_id($self->out->seq_id); | |
311 | |
312 if ($self->strand == 1) { | |
313 $match->start($start); | |
314 $match->end($end); | |
315 } else { | |
316 $match->start($self->out->end - $end + $self->out->start); | |
317 $match->end($self->out->end - $start + $self->out->start); | |
318 } | |
319 if ($value->strand) { | |
320 $match->strand($match->strand * $value->strand); | |
321 $result->strand($match->strand); | |
322 } | |
323 bless $match, 'Bio::Coordinate::Result::Match'; | |
324 $result->add_sub_Location($match); | |
325 } | |
326 #out | |
327 # |-------------------------| | |
328 # |-| or |-| | |
329 elsif ( ($end < $self->out->start or $start > $self->out->end ) or | |
330 #insertions just outside the range need special settings | |
331 ($value->location_type eq 'IN-BETWEEN' and | |
332 ($end = $self->out->start or $start = $self->out->end))) { | |
333 | |
334 $match->seq_id($self->in->seq_id); | |
335 $result->seq_id($self->in->seq_id); | |
336 $match->start($value->start); | |
337 $match->end($value->end); | |
338 $match->strand($value->strand); | |
339 | |
340 bless $match, 'Bio::Coordinate::Result::Gap'; | |
341 $result->add_sub_Location($match); | |
342 } | |
343 #partial I | |
344 # |-------------------------| | |
345 # |-----| | |
346 elsif ($start < $self->out->start and $end <= $self->out->end ) { | |
347 | |
348 $result->seq_id($self->out->seq_id); | |
349 if ($value->strand) { | |
350 $match->strand($match->strand * $value->strand); | |
351 $result->strand($match->strand); | |
352 } | |
353 my $gap = Bio::Location::Simple->new; | |
354 $gap->start($value->start); | |
355 $gap->end($self->in->start - 1); | |
356 $gap->strand($value->strand); | |
357 $gap->seq_id($self->in->seq_id); | |
358 | |
359 bless $gap, 'Bio::Coordinate::Result::Gap'; | |
360 $result->add_sub_Location($gap); | |
361 | |
362 # match | |
363 $match->seq_id($self->out->seq_id); | |
364 | |
365 if ($self->strand == 1) { | |
366 $match->start($self->out->start); | |
367 $match->end($end); | |
368 } else { | |
369 $match->start($self->out->end - $end + $self->out->start); | |
370 $match->end($self->out->end); | |
371 } | |
372 bless $match, 'Bio::Coordinate::Result::Match'; | |
373 $result->add_sub_Location($match); | |
374 } | |
375 #partial II | |
376 # |-------------------------| | |
377 # |------| | |
378 elsif ($start >= $self->out->start and $end > $self->out->end ) { | |
379 | |
380 $match->seq_id($self->out->seq_id); | |
381 $result->seq_id($self->out->seq_id); | |
382 if ($value->strand) { | |
383 $match->strand($match->strand * $value->strand); | |
384 $result->strand($match->strand); | |
385 } | |
386 if ($self->strand == 1) { | |
387 $match->start($start); | |
388 $match->end($self->out->end); | |
389 } else { | |
390 $match->start($self->out->start); | |
391 $match->end($self->out->end - $start + $self->out->start); | |
392 } | |
393 bless $match, 'Bio::Coordinate::Result::Match'; | |
394 $result->add_sub_Location($match); | |
395 | |
396 my $gap = Bio::Location::Simple->new; | |
397 $gap->start($self->in->end + 1); | |
398 $gap->end($value->end); | |
399 $gap->strand($value->strand); | |
400 $gap->seq_id($self->in->seq_id); | |
401 bless $gap, 'Bio::Coordinate::Result::Gap'; | |
402 $result->add_sub_Location($gap); | |
403 | |
404 } | |
405 #enveloping | |
406 # |-------------------------| | |
407 # |---------------------------------| | |
408 elsif ($start < $self->out->start and $end > $self->out->end ) { | |
409 | |
410 $result->seq_id($self->out->seq_id); | |
411 if ($value->strand) { | |
412 $match->strand($match->strand * $value->strand); | |
413 $result->strand($match->strand); | |
414 } | |
415 # gap1 | |
416 my $gap1 = Bio::Location::Simple->new; | |
417 $gap1->start($value->start); | |
418 $gap1->end($self->in->start - 1); | |
419 $gap1->strand($value->strand); | |
420 $gap1->seq_id($self->in->seq_id); | |
421 bless $gap1, 'Bio::Coordinate::Result::Gap'; | |
422 $result->add_sub_Location($gap1); | |
423 | |
424 # match | |
425 $match->seq_id($self->out->seq_id); | |
426 | |
427 $match->start($self->out->start); | |
428 $match->end($self->out->end); | |
429 bless $match, 'Bio::Coordinate::Result::Match'; | |
430 $result->add_sub_Location($match); | |
431 | |
432 # gap2 | |
433 my $gap2 = Bio::Location::Simple->new; | |
434 $gap2->start($self->in->end + 1); | |
435 $gap2->end($value->end); | |
436 $gap2->strand($value->strand); | |
437 $gap2->seq_id($self->in->seq_id); | |
438 bless $gap2, 'Bio::Coordinate::Result::Gap'; | |
439 $result->add_sub_Location($gap2); | |
440 | |
441 } else { | |
442 $self->throw("Should not be here!"); | |
443 } | |
444 return $result; | |
445 } | |
446 | |
447 | |
448 1; |