Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Coordinate/GeneMapper.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: GeneMapper.pm,v 1.13.2.2 2003/03/13 11:56:30 heikki Exp $ | |
2 # | |
3 # bioperl module for Bio::Coordinate::GeneMapper | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Coordinate::GeneMapper - transformations between gene related coordinate systems | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::Coordinate::GeneMapper; | |
20 | |
21 # get a Bio::RangeI representing the start, end and strand of the CDS | |
22 # in chromosomal (or entry) coordinates | |
23 my $cds; | |
24 | |
25 # get a Bio::Location::Split or an array of Bio::LocationI objects | |
26 # holding the start, end and strand of all the exons in chromosomal | |
27 # (or entry) coordinates | |
28 my $exons; | |
29 | |
30 # create a gene mapper and set it to map from chromosomal to cds coordinates | |
31 my $gene = Bio::Coordinate::GeneMapper->new(-in=>'chr', | |
32 -out=>'cds', | |
33 -cds=>$cds, | |
34 -exons=>$exons | |
35 ); | |
36 | |
37 # get a a Bio::Location or sequence feature in input (chr) coordinates | |
38 my $loc; | |
39 | |
40 # map the location into output coordinates and get a new location object | |
41 $newloc = $gene->map($loc); | |
42 | |
43 | |
44 =head1 DESCRIPTION | |
45 | |
46 Bio::Coordinate::GeneMapper is a module for simplifying the mappings | |
47 of coodinate locations between various gene related locations in human | |
48 genetics. It also adds a special human genetics twist to coordinate | |
49 systems by making it possible to disable the use of zero | |
50 (0). Locations before position one start from -1. See method | |
51 L<nozero>. | |
52 | |
53 It understands by name the following coordinate systems and mapping | |
54 between them: | |
55 | |
56 peptide (peptide length) | |
57 ^ | |
58 | -peptide_offset | |
59 | | |
60 frame propeptide (propeptide length) | |
61 ^ ^ | |
62 \ | | |
63 translate \ | | |
64 \ | | |
65 cds (transcript start and end) | |
66 ^ | |
67 negative_intron | \ | |
68 ^ | \ transcribe | |
69 \ | \ | |
70 intron exon \ | |
71 ^ ^ ^ / | |
72 splice \ \ / | / | |
73 \ \ / | / | |
74 \ inex | / | |
75 \ ^ | / | |
76 \ \ |/ | |
77 ----- gene (gene_length) | |
78 ^ | |
79 | - gene_offset | |
80 | | |
81 chr (or entry) | |
82 | |
83 | |
84 This structure is kept in the global variable $DAG which is a | |
85 representation of a Directed Acyclic Graph. The path calculations | |
86 traversing this graph are done in a helper class. See | |
87 L<Bio::Coordinate::Graph>. | |
88 | |
89 Of these, two operations are special cases, translate and splice. | |
90 Translating and reverse translating are implemented as internal | |
91 methods that do the simple 1E<lt>-E<gt>3 conversion. Splicing needs | |
92 additional information that is provided by method L<exons> which takes | |
93 in an array of Bio::LocationI objects. | |
94 | |
95 Most of the coordinate system names should be selfexplanatory to | |
96 anyone familiar with genes. Negative intron coordinate system is | |
97 starts counting backwards from -1 as the last nucleotide in the | |
98 intron. This used when only exon and a few flanking intron nucleotides | |
99 are known. | |
100 | |
101 | |
102 This class models coordinates within one transcript of a gene, so to | |
103 tackle multiple transcripts you need several instances of the | |
104 class. It is therefore valid to argue that the name of the class | |
105 should be TranscriptMapper. GeneMapper is a catchier name, so it | |
106 stuck. | |
107 | |
108 | |
109 =head1 FEEDBACK | |
110 | |
111 =head2 Mailing Lists | |
112 | |
113 User feedback is an integral part of the evolution of this and other | |
114 Bioperl modules. Send your comments and suggestions preferably to the | |
115 Bioperl mailing lists Your participation is much appreciated. | |
116 | |
117 bioperl-l@bioperl.org - General discussion | |
118 http://bio.perl.org/MailList.html - About the mailing lists | |
119 | |
120 =head2 Reporting Bugs | |
121 | |
122 Report bugs to the Bioperl bug tracking system to help us keep track | |
123 the bugs and their resolution. Bug reports can be submitted via email | |
124 or the web: | |
125 | |
126 bioperl-bugs@bio.perl.org | |
127 http://bugzilla.bioperl.org/ | |
128 | |
129 =head1 AUTHOR - Heikki Lehvaslaiho | |
130 | |
131 Email: heikki@ebi.ac.uk | |
132 Address: | |
133 | |
134 EMBL Outstation, European Bioinformatics Institute | |
135 Wellcome Trust Genome Campus, Hinxton | |
136 Cambs. CB10 1SD, United Kingdom | |
137 | |
138 =head1 APPENDIX | |
139 | |
140 The rest of the documentation details each of the object | |
141 methods. Internal methods are usually preceded with a _ | |
142 | |
143 =cut | |
144 | |
145 | |
146 # Let the code begin... | |
147 | |
148 package Bio::Coordinate::GeneMapper; | |
149 use vars qw(@ISA %COORDINATE_SYSTEMS %COORDINATE_INTS $TRANSLATION $DAG | |
150 $NOZERO_VALUES $NOZERO_KEYS); | |
151 use strict; | |
152 | |
153 # Object preamble - inherits from Bio::Root::Root | |
154 | |
155 use Bio::Root::Root; | |
156 use Bio::Coordinate::Result; | |
157 use Bio::Location::Simple; | |
158 use Bio::Coordinate::Graph; | |
159 use Bio::Coordinate::Collection; | |
160 use Bio::Coordinate::Pair; | |
161 use Bio::Coordinate::ExtrapolatingPair; | |
162 | |
163 @ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI); | |
164 | |
165 # first set internal values for all translation tables | |
166 | |
167 %COORDINATE_SYSTEMS = ( | |
168 peptide => 10, | |
169 propeptide => 9, | |
170 frame => 8, | |
171 cds => 7, | |
172 negative_intron => 6, | |
173 intron => 5, | |
174 exon => 4, | |
175 inex => 3, | |
176 gene => 2, | |
177 chr => 1 | |
178 ); | |
179 | |
180 %COORDINATE_INTS = ( | |
181 10 => 'peptide', | |
182 9 => 'propeptide', | |
183 8 => 'frame', | |
184 7 => 'cds', | |
185 6 => 'negative_intron', | |
186 5 => 'intron', | |
187 4 => 'exon', | |
188 3 => 'inex', | |
189 2 => 'gene', | |
190 1 => 'chr' | |
191 ); | |
192 | |
193 $TRANSLATION = $COORDINATE_SYSTEMS{'cds'}. "-". | |
194 $COORDINATE_SYSTEMS{'propeptide'}; | |
195 | |
196 $DAG = { | |
197 10 => [], | |
198 9 => [10], | |
199 8 => [], | |
200 7 => [8, 9], | |
201 6 => [], | |
202 5 => [6], | |
203 4 => [7], | |
204 3 => [4, 5], | |
205 2 => [3, 4, 5, 7], | |
206 1 => [2] | |
207 }; | |
208 | |
209 $NOZERO_VALUES = {0 => 0, 'in' => 1, 'out' => 2, 'in&out' => 3 }; | |
210 $NOZERO_KEYS = { 0 => 0, 1 => 'in', 2 => 'out', 3 => 'in&out' }; | |
211 | |
212 | |
213 sub new { | |
214 my($class,@args) = @_; | |
215 my $self = $class->SUPER::new(@args); | |
216 | |
217 # prime the graph | |
218 my $graph = new Bio::Coordinate::Graph; | |
219 $graph->hash_of_arrays($DAG); | |
220 $self->graph($graph); | |
221 | |
222 my($in, $out, $peptide_offset, $exons, | |
223 $cds, $nozero, $strict) = | |
224 $self->_rearrange([qw(IN | |
225 OUT | |
226 PEPTIDE_OFFSET | |
227 EXONS | |
228 CDS | |
229 NOZERO | |
230 STRICT | |
231 )], | |
232 @args); | |
233 | |
234 # direction of mapping when going chr to protein | |
235 $self->{_direction} = 1; | |
236 | |
237 $in && $self->in($in); | |
238 $out && $self->out($out); | |
239 $cds && $self->cds($cds); | |
240 $exons && ref($exons) =~ /ARRAY/i && $self->exons(@$exons); | |
241 $peptide_offset && $self->peptide_offset($peptide_offset); | |
242 $nozero && $self->nozero($nozero); | |
243 $strict && $self->strict($strict); | |
244 | |
245 return $self; # success - we hope! | |
246 } | |
247 | |
248 =head2 in | |
249 | |
250 Title : in | |
251 Usage : $obj->in('peptide'); | |
252 Function: Set and read the input coordinate system. | |
253 Example : | |
254 Returns : value of input system | |
255 Args : new value (optional) | |
256 | |
257 =cut | |
258 | |
259 sub in { | |
260 my ($self,$value) = @_; | |
261 if( defined $value) { | |
262 $self->throw("Not a valid input coordinate system name [$value]\n". | |
263 "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS )) | |
264 unless defined $COORDINATE_SYSTEMS{$value}; | |
265 | |
266 $self->{'_in'} = $COORDINATE_SYSTEMS{$value}; | |
267 } | |
268 return $COORDINATE_INTS{ $self->{'_in'} }; | |
269 } | |
270 | |
271 | |
272 =head2 out | |
273 | |
274 Title : out | |
275 Usage : $obj->out('peptide'); | |
276 Function: Set and read the output coordinate system. | |
277 Example : | |
278 Returns : value of output system | |
279 Args : new value (optional) | |
280 | |
281 =cut | |
282 | |
283 sub out { | |
284 my ($self,$value) = @_; | |
285 if( defined $value) { | |
286 $self->throw("Not a valid input coordinate system name [$value]\n". | |
287 "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS )) | |
288 unless defined $COORDINATE_SYSTEMS{$value}; | |
289 | |
290 $self->{'_out'} = $COORDINATE_SYSTEMS{$value}; | |
291 } | |
292 return $COORDINATE_INTS{ $self->{'_out'} }; | |
293 } | |
294 | |
295 =head2 strict | |
296 | |
297 Title : strict | |
298 Usage : $obj->strict('peptide'); | |
299 Function: Set and read weather strict boundaried of coordinate | |
300 systems are enforced. | |
301 When strict is on, the end of the coordinate range must be defined. | |
302 Example : | |
303 Returns : boolean | |
304 Args : boolean (optional) | |
305 | |
306 =cut | |
307 | |
308 sub strict { | |
309 my ($self,$value) = @_; | |
310 if( defined $value) { | |
311 $value ? ( $self->{'_strict'} = 1 ) : ( $self->{'_strict'} = 0 ); | |
312 ## update in each mapper !! | |
313 } | |
314 return $self->{'_strict'} || 0 ; | |
315 } | |
316 | |
317 | |
318 =head2 nozero | |
319 | |
320 Title : nozero | |
321 Usage : $obj->nozero(1); | |
322 Function: Flag to disable the use of zero in the input, | |
323 output or both coordinate systems. Use of coordinate | |
324 systems without zero is a peculiarity common in | |
325 human genetics community. | |
326 Example : | |
327 Returns : 0 (default), or 'in', 'out', 'in&out' | |
328 Args : 0 (default), or 'in', 'out', 'in&out' | |
329 | |
330 =cut | |
331 | |
332 sub nozero { | |
333 my ($self,$value) = @_; | |
334 | |
335 if (defined $value) { | |
336 $self->throw("Not a valid value for nozero [$value]\n". | |
337 "Valid values are ". join(", ", keys %{$NOZERO_VALUES} )) | |
338 unless defined $NOZERO_VALUES->{$value}; | |
339 $self->{'_nozero'} = $NOZERO_VALUES->{$value}; | |
340 } | |
341 | |
342 my $res = $self->{'_nozero'} || 0; | |
343 return $NOZERO_KEYS->{$res}; | |
344 } | |
345 | |
346 =head2 graph | |
347 | |
348 Title : graph | |
349 Usage : $obj->graph($new_graph); | |
350 Function: Set and read the graph object representing relationships | |
351 between coordinate systems | |
352 Example : | |
353 Returns : Bio::Coordinate::Graph object | |
354 Args : new Bio::Coordinate::Graph object (optional) | |
355 | |
356 =cut | |
357 | |
358 sub graph { | |
359 my ($self,$value) = @_; | |
360 if( defined $value) { | |
361 $self->throw("Not a valid graph [$value]\n") | |
362 unless $value->isa('Bio::Coordinate::Graph'); | |
363 $self->{'_graph'} = $value; | |
364 } | |
365 return $self->{'_graph'}; | |
366 } | |
367 | |
368 =head2 peptide | |
369 | |
370 Title : peptide | |
371 Usage : $obj->peptide_offset($peptide_coord); | |
372 Function: Read and write the offset of peptide from the start of propeptide | |
373 and peptide length | |
374 Returns : a Bio::Location::Simple object | |
375 Args : a Bio::LocationI object | |
376 | |
377 =cut | |
378 | |
379 sub peptide { | |
380 my ($self, $value) = @_; | |
381 if( defined $value) { | |
382 $self->throw("I need a Bio::LocationI, not [". $value. "]") | |
383 unless $value->isa('Bio::LocationI'); | |
384 | |
385 $self->throw("Peptide start not defined") | |
386 unless defined $value->start; | |
387 $self->{'_peptide_offset'} = $value->start - 1; | |
388 | |
389 $self->throw("Peptide end not defined") | |
390 unless defined $value->end; | |
391 $self->{'_peptide_length'} = $value->end - $self->{'_peptide_offset'}; | |
392 | |
393 | |
394 my $a = $self->_create_pair | |
395 ('propeptide', 'peptide', $self->strict, | |
396 $self->{'_peptide_offset'}, $self->{'_peptide_length'} ); | |
397 my $mapper = $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'}; | |
398 $self->{'_mappers'}->{$mapper} = $a; | |
399 } | |
400 return Bio::Location::Simple->new | |
401 (-seq_id => 'propeptide', | |
402 -start => $self->{'_peptide_offset'} + 1 , | |
403 -end => $self->{'_peptide_length'} + $self->{'_peptide_offset'}, | |
404 -strand => 1 | |
405 ); | |
406 } | |
407 | |
408 =head2 peptide_offset | |
409 | |
410 Title : peptide_offset | |
411 Usage : $obj->peptide_offset(20); | |
412 Function: Set and read the offset of peptide from the start of propeptide | |
413 Returns : set value or 0 | |
414 Args : new value (optional) | |
415 | |
416 =cut | |
417 | |
418 sub peptide_offset { | |
419 my ($self,$offset, $len) = @_; | |
420 if( defined $offset) { | |
421 $self->throw("I need an integer, not [$offset]") | |
422 unless $offset =~ /^[+-]?\d+$/; | |
423 $self->{'_peptide_offset'} = $offset; | |
424 | |
425 if (defined $len) { | |
426 $self->throw("I need an integer, not [$len]") | |
427 unless $len =~ /^[+-]?\d+$/; | |
428 $self->{'_peptide_length'} = $len; | |
429 } | |
430 | |
431 my $a = $self->_create_pair | |
432 ('propeptide', 'peptide', $self->strict, $offset, $self->{'_peptide_length'} ); | |
433 my $mapper = $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'}; | |
434 $self->{'_mappers'}->{$mapper} = $a; | |
435 } | |
436 return $self->{'_peptide_offset'} || 0; | |
437 } | |
438 | |
439 =head2 peptide_length | |
440 | |
441 Title : peptide_length | |
442 Usage : $obj->peptide_length(20); | |
443 Function: Set and read the offset of peptide from the start of propeptide | |
444 Returns : set value or 0 | |
445 Args : new value (optional) | |
446 | |
447 =cut | |
448 | |
449 | |
450 sub peptide_length { | |
451 my ($self, $len) = @_; | |
452 if( defined $len) { | |
453 $self->throw("I need an integer, not [$len]") | |
454 if defined $len && $len !~ /^[+-]?\d+$/; | |
455 $self->{'_peptide_length'} = $len; | |
456 } | |
457 return $self->{'_peptide_length'}; | |
458 } | |
459 | |
460 | |
461 =head2 exons | |
462 | |
463 Title : exons | |
464 Usage : $obj->exons(@exons); | |
465 Function: Set and read the offset of CDS from the start of transcipt | |
466 You do not have to sort the exons before calling this method as | |
467 they will be sorted automatically. | |
468 If you have not defined the CDS, is will be set to span all | |
469 exons here. | |
470 Returns : array of Bio::LocationI exons in genome coordinates or 0 | |
471 Args : array of Bio::LocationI exons in genome (or entry) coordinates | |
472 | |
473 =cut | |
474 | |
475 sub exons { | |
476 my ($self,@value) = @_; | |
477 my $cds_mapper = $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'cds'}; | |
478 my $inex_mapper = | |
479 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'inex'}; | |
480 my $exon_mapper = | |
481 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'exon'}; | |
482 my $intron_mapper = | |
483 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'intron'}; | |
484 my $negative_intron_mapper = | |
485 $COORDINATE_SYSTEMS{'intron'}. "-". $COORDINATE_SYSTEMS{'negative_intron'}; | |
486 my $exon_cds_mapper = $COORDINATE_SYSTEMS{'exon'}. "-". $COORDINATE_SYSTEMS{'cds'}; | |
487 | |
488 if(@value) { | |
489 if (ref($value[0]) && | |
490 $value[0]->isa('Bio::SeqFeatureI') and | |
491 $value[0]->location->isa('Bio::Location::SplitLocationI')) { | |
492 @value = $value[0]->location->each_Location; | |
493 } else { | |
494 $self->throw("I need an array , not [@value]") | |
495 unless ref \@value eq 'ARRAY'; | |
496 $self->throw("I need a reference to an array of Bio::LocationIs, not to [". | |
497 $value[0]. "]") | |
498 unless ref $value[0] and $value[0]->isa('Bio::LocationI'); | |
499 } | |
500 | |
501 # | |
502 # sort the input array | |
503 # | |
504 # and if the used has not defined CDS assume it is the complete exonic range | |
505 if (defined $value[0]->strand && $value[0]->strand == - 1) { #reverse strand | |
506 @value = map { $_->[0] } | |
507 sort { $b->[1] <=> $a->[1] } | |
508 map { [ $_, $_->start] } | |
509 @value; | |
510 | |
511 unless ($self->cds) { | |
512 $self->cds(new Bio::Location::Simple(-start => $value[-1]->start, | |
513 -end => $value[0]->end, | |
514 -strand=> $value[0]->strand, | |
515 -seq_id=> $value[0]->seq_id, | |
516 ) | |
517 ); | |
518 } | |
519 } else { #undef or forward strand | |
520 @value = map { $_->[0] } | |
521 sort { $a->[1] <=> $b->[1] } | |
522 map { [ $_, $_->start] } | |
523 @value; | |
524 unless ($self->cds) { | |
525 $self->cds(new Bio::Location::Simple(-start => $value[0]->start, | |
526 -end => $value[-1]->end, | |
527 -strand=> $value[0]->strand, | |
528 -seq_id=> $value[0]->seq_id, | |
529 ) | |
530 ); | |
531 } | |
532 | |
533 } | |
534 | |
535 $self->{'_chr_exons'} = \@value; | |
536 | |
537 # transform exons from chromosome to gene coordinates | |
538 # but only if gene coordinate system has been set | |
539 my @exons ; | |
540 #my $gene_mapper = $self->$COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'}; | |
541 my $gene_mapper = "1-2"; | |
542 if (defined $self->{'_mappers'}->{$gene_mapper} ) { | |
543 | |
544 my $tmp_in = $self->{'_in'}; | |
545 my $tmp_out = $self->{'_out'}; | |
546 my $tmp_verb = $self->verbose; | |
547 $self->verbose(0); | |
548 | |
549 $self->in('chr'); | |
550 $self->out('gene'); | |
551 | |
552 @exons = map {$self->map($_)} @value; | |
553 | |
554 $self->{'_in'} = ($tmp_in); | |
555 $self->{'_out'} = ($tmp_out); | |
556 $self->verbose($tmp_verb); | |
557 } else { | |
558 @exons = @value; | |
559 } | |
560 | |
561 my $cds_map = Bio::Coordinate::Collection->new; | |
562 my $inex_map = Bio::Coordinate::Collection->new; | |
563 my $exon_map = Bio::Coordinate::Collection->new; | |
564 my $exon_cds_map = Bio::Coordinate::Collection->new; | |
565 my $intron_map = Bio::Coordinate::Collection->new; | |
566 my $negative_intron_map = Bio::Coordinate::Collection->new; | |
567 | |
568 my $tr_end = 0; | |
569 my $coffset; | |
570 my $exon_counter; | |
571 my $prev_exon_end; | |
572 | |
573 for my $exon ( @exons ) { | |
574 | |
575 $exon_counter++; | |
576 | |
577 # | |
578 # gene -> cds | |
579 # | |
580 | |
581 my $match1 = Bio::Location::Simple->new | |
582 (-seq_id =>'gene' , | |
583 -start => $exon->start, | |
584 -end => $exon->end, -strand=>1 ); | |
585 my $match2 = Bio::Location::Simple->new | |
586 (-seq_id => 'cds', | |
587 -start => $tr_end + 1, | |
588 -end => $tr_end + $exon->end - $exon->start +1, | |
589 -strand=>$exon->strand ); | |
590 | |
591 $cds_map->add_mapper(Bio::Coordinate::Pair->new | |
592 (-in => $match1, | |
593 -out => $match2, | |
594 ) | |
595 ); | |
596 | |
597 if ($exon->start <= 1 and $exon->end >= 1) { | |
598 $coffset = $tr_end - $exon->start + 1; | |
599 } | |
600 $tr_end = $tr_end + $exon->end - $exon->start + 1; | |
601 | |
602 # | |
603 # gene -> intron | |
604 # | |
605 | |
606 if (defined $prev_exon_end) { | |
607 my $match3 = Bio::Location::Simple->new | |
608 (-seq_id =>'gene', | |
609 -start => $prev_exon_end + 1, | |
610 -end => $exon->start -1, -strand=>$exon->strand ); | |
611 | |
612 my $match4 = Bio::Location::Simple->new | |
613 (-seq_id => 'intron'. ($exon_counter -1), | |
614 -start => 1, | |
615 -end => $exon->start - 1 - $prev_exon_end, | |
616 -strand=>$exon->strand ); | |
617 | |
618 # negative intron coordinates | |
619 my $match5 = Bio::Location::Simple->new | |
620 (-seq_id => 'intron'. ($exon_counter -1), | |
621 -start => -1 * ($exon->start - 2 - $prev_exon_end) -1, | |
622 -end => -1, | |
623 -strand=>$exon->strand ); | |
624 | |
625 $inex_map->add_mapper(Bio::Coordinate::Pair->new | |
626 (-in => $match3, | |
627 -out => $match4 | |
628 ) | |
629 ); | |
630 $intron_map->add_mapper(Bio::Coordinate::Pair->new | |
631 (-in => $self->_clone_loc($match3), | |
632 -out => $self->_clone_loc($match4) | |
633 ) | |
634 ); | |
635 $negative_intron_map->add_mapper(Bio::Coordinate::Pair->new | |
636 (-in => $self->_clone_loc($match4), | |
637 -out => $match5 | |
638 )); | |
639 | |
640 } | |
641 | |
642 # store the value | |
643 $prev_exon_end = $exon->end; | |
644 | |
645 # | |
646 # gene -> exon | |
647 # | |
648 my $match6 = Bio::Location::Simple->new | |
649 (-seq_id => 'exon'. $exon_counter, | |
650 -start => 1, | |
651 -end => $exon->end - $exon->start +1, | |
652 -strand=> $exon->strand ); | |
653 | |
654 my $pair2 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match1), | |
655 -out => $match6 | |
656 ); | |
657 my $pair3 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match6), | |
658 -out => $self->_clone_loc($match2) | |
659 ); | |
660 $inex_map->add_mapper(Bio::Coordinate::Pair->new | |
661 (-in => $self->_clone_loc($match1), | |
662 -out => $match6 | |
663 ) | |
664 ); | |
665 $exon_map->add_mapper(Bio::Coordinate::Pair->new | |
666 (-in => $self->_clone_loc($match1), | |
667 -out => $self->_clone_loc($match6) | |
668 ) | |
669 ); | |
670 $exon_cds_map->add_mapper(Bio::Coordinate::Pair->new | |
671 (-in => $self->_clone_loc($match6), | |
672 -out => $self->_clone_loc($match2) | |
673 ) | |
674 ); | |
675 | |
676 } | |
677 | |
678 # move coordinate start if exons have negative values | |
679 if ($coffset) { | |
680 foreach my $m ($cds_map->each_mapper) { | |
681 $m->out->start($m->out->start - $coffset); | |
682 $m->out->end($m->out->end - $coffset); | |
683 } | |
684 | |
685 } | |
686 | |
687 $self->{'_mappers'}->{$cds_mapper} = $cds_map; | |
688 $self->{'_mappers'}->{$exon_cds_mapper} = $exon_cds_map; | |
689 $self->{'_mappers'}->{$inex_mapper} = $inex_map; | |
690 $self->{'_mappers'}->{$exon_mapper} = $exon_map; | |
691 $self->{'_mappers'}->{$intron_mapper} = $intron_map; | |
692 $self->{'_mappers'}->{$negative_intron_mapper} = $negative_intron_map; | |
693 } | |
694 return @{$self->{'_chr_exons'}} || 0; | |
695 } | |
696 | |
697 =head2 _clone_loc | |
698 | |
699 Title : _clone_loc | |
700 Usage : $copy_of_loc = $obj->_clone_loc($loc); | |
701 Function: Make a deep copy of a simple location | |
702 Returns : a Bio::Location::Simple object | |
703 Args : a Bio::Location::Simple object to be cloned | |
704 | |
705 =cut | |
706 | |
707 | |
708 sub _clone_loc { # clone a simple location | |
709 my ($self,$loc) = @_; | |
710 | |
711 $self->throw("I need a Bio::Location::Simple , not [". ref $loc. "]") | |
712 unless $loc->isa('Bio::Location::Simple'); | |
713 | |
714 return Bio::Location::Simple->new | |
715 (-seq_id => $loc->seq_id, | |
716 -start => $loc->start, | |
717 -end => $loc->end, | |
718 -strand=> $loc->strand, | |
719 -location_type => $loc->location_type | |
720 ); | |
721 } | |
722 | |
723 | |
724 =head2 cds | |
725 | |
726 Title : cds | |
727 Usage : $obj->cds(20); | |
728 Function: Set and read the offset of CDS from the start of transcipt | |
729 | |
730 Simple input can be an integer which gives the start of the | |
731 coding region in genomic coordinate. If you want to provide | |
732 the end of the coding region or indicate the use of the | |
733 opposite strand, you have to pass a Bio::RangeI | |
734 (e.g. Bio::Location::Simple or Bio::SegFeature::Generic) | |
735 object to this method. | |
736 | |
737 Returns : set value or 0 | |
738 Args : new value (optional) | |
739 | |
740 =cut | |
741 | |
742 sub cds { | |
743 my ($self,$value) = @_; | |
744 if( defined $value) { | |
745 if ($value =~ /^[+-]?\d+$/ ) { | |
746 my $loc = Bio::Location::Simple->new(-start=>$value); | |
747 $self->{'_cds'} = $loc; | |
748 } | |
749 elsif (ref $value && $value->isa('Bio::RangeI') ) { | |
750 $self->{'_cds'} = $value; | |
751 } else { | |
752 $self->throw("I need an integer or Bio::RangeI, not [$value]") | |
753 } | |
754 # strand !! | |
755 my $len; | |
756 | |
757 $len = $self->{'_cds'}->end - $self->{'_cds'}->start +1 | |
758 if defined $self->{'_cds'}->end; | |
759 | |
760 my $a = $self->_create_pair | |
761 ('chr', 'gene', 0, | |
762 $self->{'_cds'}->start-1, | |
763 $len, | |
764 $self->{'_cds'}->strand); | |
765 my $mapper = $COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'}; | |
766 $self->{'_mappers'}->{$mapper} = $a; | |
767 | |
768 # recalculate exon-based mappers | |
769 if ( defined $self->{'_chr_exons'} ) { | |
770 $self->exons(@{$self->{'_chr_exons'}}); | |
771 } | |
772 | |
773 } | |
774 return $self->{'_cds'} || 0; | |
775 } | |
776 | |
777 | |
778 =head2 map | |
779 | |
780 Title : map | |
781 Usage : $newpos = $obj->map(5); | |
782 Function: Map the location from the input coordinate system | |
783 to a new value in the output coordinate system. | |
784 Example : | |
785 Returns : new value in the output coordiante system | |
786 Args : a Bio::Location::Simple | |
787 | |
788 =cut | |
789 | |
790 sub map { | |
791 my ($self,$value) = @_; | |
792 my ($res); | |
793 | |
794 $self->throw("Need to pass me a Bio::Location::Simple or ". | |
795 "Bio::Location::Simple or Bio::SeqFeatureI, not [". | |
796 ref($value). "]") | |
797 unless ref($value) && ($value->isa('Bio::Location::Simple') or | |
798 $value->isa('Bio::Location::SplitLocationI') or | |
799 $value->isa('Bio::SeqFeatureI')); | |
800 $self->throw("Input coordinate system not set") | |
801 unless $self->{'_in'}; | |
802 $self->throw("Output coordinate system not set") | |
803 unless $self->{'_out'}; | |
804 $self->throw("Do not be silly. Input and output coordinate ". | |
805 "systems are the same!") | |
806 unless $self->{'_in'} != $self->{'_out'}; | |
807 | |
808 $self->_check_direction(); | |
809 | |
810 $value = $value->location if $value->isa('Bio::SeqFeatureI'); | |
811 print STDERR "=== Start location: ". $value->start. ",". | |
812 $value->end. " (". $value->strand. ")\n" if $self->verbose > 0; | |
813 | |
814 | |
815 # if nozero coordinate system is used in the input values | |
816 if ( defined $self->{'_nozero'} && | |
817 ( $self->{'_nozero'} == 1 || $self->{'_nozero'} == 3 ) ) { | |
818 $value->start($value->start + 1) | |
819 if defined $value->start && $value->start < 1; | |
820 $value->end($value->end + 1) | |
821 if defined $value->end && $value->end < 1; | |
822 } | |
823 | |
824 my @steps = $self->_get_path(); | |
825 print "mapping ", $self->{'_in'}, "->", $self->{'_out'}, | |
826 " Mappers: ", join(", ", @steps), "\n" if $self->verbose > 0; | |
827 | |
828 foreach my $mapper (@steps) { | |
829 if ($mapper eq $TRANSLATION) { | |
830 if ($self->direction == 1) { | |
831 $value = $self->_translate($value); | |
832 print STDERR "+ $TRANSLATION cds -> propeptide (translate) \n" | |
833 if $self->verbose > 0; | |
834 } else { | |
835 $value = $self->_reverse_translate($value); | |
836 print STDERR "+ $TRANSLATION propeptide -> cds (reverse translate) \n" | |
837 if $self->verbose > 0; | |
838 } | |
839 } | |
840 # keep the start and end values, and go on to next iteration | |
841 # if this mapper is not set | |
842 elsif ( ! defined $self->{'_mappers'}->{$mapper} ) { | |
843 # update mapper name | |
844 $mapper =~ /\d+-(\d+)/; my ($counter) = $1; | |
845 $value->seq_id($COORDINATE_INTS{$counter}); | |
846 print STDERR "- $mapper\n" if $self->verbose > 0; | |
847 } else { | |
848 | |
849 # | |
850 # the DEFAULT : generic mapping | |
851 # | |
852 $value = $self->{'_mappers'}->{$mapper}->map($value); | |
853 $value->purge_gaps | |
854 if ($value && $value->isa('Bio::Location::SplitLocationI') && $value->can('gap')); | |
855 print STDERR "+ $mapper (", $self->direction, "): start ", | |
856 $value->start, " end ", $value->end, "\n" | |
857 if $value && $self->verbose > 0; | |
858 } | |
859 } | |
860 | |
861 # if nozero coordinate system is asked to be used in the output values | |
862 if ( defined $value && defined $self->{'_nozero'} && | |
863 ( $self->{'_nozero'} == 2 || $self->{'_nozero'} == 3 ) ) { | |
864 | |
865 $value->start($value->start - 1) | |
866 if defined $value->start && $value->start < 1; | |
867 $value->end($value->end - 1) | |
868 if defined $value->end && $value->end < 1; | |
869 } | |
870 | |
871 # handle merging of adjacent split locations! | |
872 | |
873 if (ref $value eq "Bio::Coordinate::Result" && $value->each_match > 1 ) { | |
874 my $prevloc; | |
875 my $merging = 0; | |
876 my $newvalue; | |
877 my @matches; | |
878 foreach my $loc ( $value->each_Location(1) ) { | |
879 unless ($prevloc) { | |
880 $prevloc = $loc; | |
881 push @matches, $prevloc; | |
882 next; | |
883 } | |
884 if ($prevloc->end == ($loc->start - 1) && $prevloc->seq_id eq $loc->seq_id) { | |
885 $prevloc->end($loc->end); | |
886 $merging = 1; | |
887 } else { | |
888 push @matches, $loc; | |
889 $prevloc = $loc; | |
890 } | |
891 } | |
892 if ($merging) { | |
893 if (@matches > 1 ) { | |
894 $newvalue = Bio::Coordinate::Result->new; | |
895 map {$newvalue->add_sub_Location} @matches; | |
896 } else { | |
897 $newvalue = Bio::Coordinate::Result::Match->new | |
898 (-seq_id => $matches[0]->seq_id, | |
899 -start => $matches[0]->start, | |
900 -end => $matches[0]->end, | |
901 -strand=> $matches[0]->strand ); | |
902 } | |
903 $value = $newvalue; | |
904 } | |
905 } | |
906 elsif (ref $value eq "Bio::Coordinate::Result" && $value->each_match == 1 ){ | |
907 $value = $value->match; | |
908 } | |
909 | |
910 | |
911 return $value; | |
912 } | |
913 | |
914 =head2 direction | |
915 | |
916 Title : direction | |
917 Usage : $obj->direction('peptide'); | |
918 Function: Read-only method for the direction of mapping deduced from | |
919 predefined input and output coordinate names. | |
920 Example : | |
921 Returns : 1 or -1, mapping direction | |
922 Args : new value (optional) | |
923 | |
924 =cut | |
925 | |
926 sub direction { | |
927 my ($self) = @_; | |
928 return $self->{'_direction'}; | |
929 } | |
930 | |
931 | |
932 =head2 swap | |
933 | |
934 Title : swap | |
935 Usage : $obj->swap; | |
936 Function: Swap the direction of transformation | |
937 (input <-> output) | |
938 Example : | |
939 Returns : 1 | |
940 Args : | |
941 | |
942 =cut | |
943 | |
944 sub swap { | |
945 my ($self,$value) = @_; | |
946 | |
947 ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'}); | |
948 map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}}; | |
949 | |
950 # record the changed direction; | |
951 $self->{_direction} *= -1; | |
952 | |
953 return 1; | |
954 } | |
955 | |
956 | |
957 =head2 to_string | |
958 | |
959 Title : to_string | |
960 Usage : $newpos = $obj->to_string(5); | |
961 Function: Dump the internal mapper values into a human readable format | |
962 Example : | |
963 Returns : string | |
964 Args : | |
965 | |
966 =cut | |
967 | |
968 sub to_string { | |
969 my ($self) = shift; | |
970 | |
971 print "-" x 40, "\n"; | |
972 | |
973 # chr-gene | |
974 my $mapper_str = 'chr-gene'; | |
975 my $mapper = $self->_mapper_string2code($mapper_str); | |
976 | |
977 printf "\n %-12s (%s)\n", $mapper_str, $mapper ; | |
978 if (defined $self->cds) { | |
979 my $end = $self->cds->end -1 if defined $self->cds->end; | |
980 printf "%16s%s: %s (%s)\n", ' ', 'gene offset', $self->cds->start-1 , $end || ''; | |
981 printf "%16s%s: %s\n", ' ', 'gene strand', $self->cds->strand || 0; | |
982 } | |
983 | |
984 # gene-intron | |
985 $mapper_str = 'gene-intron'; | |
986 $mapper = $self->_mapper_string2code($mapper_str); | |
987 printf "\n %-12s (%s)\n", $mapper_str, $mapper ; | |
988 | |
989 my $i = 1; | |
990 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) { | |
991 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ; | |
992 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ; | |
993 $i++; | |
994 } | |
995 | |
996 # intron-negative_intron | |
997 $mapper_str = 'intron-negative_intron'; | |
998 $mapper = $self->_mapper_string2code($mapper_str); | |
999 printf "\n %-12s (%s)\n", $mapper_str, $mapper ; | |
1000 | |
1001 $i = 1; | |
1002 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) { | |
1003 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ; | |
1004 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ; | |
1005 $i++; | |
1006 } | |
1007 | |
1008 | |
1009 # gene-exon | |
1010 $mapper_str = 'gene-exon'; | |
1011 $mapper = $self->_mapper_string2code($mapper_str); | |
1012 printf "\n %-12s (%s)\n", $mapper_str, $mapper ; | |
1013 | |
1014 $i = 1; | |
1015 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) { | |
1016 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ; | |
1017 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ; | |
1018 $i++; | |
1019 } | |
1020 | |
1021 | |
1022 # gene-cds | |
1023 $mapper_str = 'gene-cds'; | |
1024 $mapper = $self->_mapper_string2code($mapper_str); | |
1025 printf "\n %-12s (%s)\n", $mapper_str, $mapper ; | |
1026 | |
1027 $i = 1; | |
1028 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) { | |
1029 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ; | |
1030 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ; | |
1031 $i++; | |
1032 } | |
1033 | |
1034 # cds-propeptide | |
1035 $mapper_str = 'cds-propeptide'; | |
1036 $mapper = $self->_mapper_string2code($mapper_str); | |
1037 printf "\n %-12s (%s)\n", $mapper_str, $mapper ; | |
1038 printf "%9s%-12s\n", "", '"translate"'; | |
1039 | |
1040 | |
1041 # propeptide-peptide | |
1042 $mapper_str = 'propeptide-peptide'; | |
1043 $mapper = $self->_mapper_string2code($mapper_str); | |
1044 printf "\n %-12s (%s)\n", $mapper_str, $mapper ; | |
1045 printf "%16s%s: %s\n", ' ', "peptide offset", $self->peptide_offset; | |
1046 | |
1047 | |
1048 | |
1049 print "\nin : ", $self->in, "\n"; | |
1050 print "out: ", $self->out, "\n"; | |
1051 my $dir; | |
1052 $self->direction ? ($dir='forward') : ($dir='reverse'); | |
1053 printf "direction: %-8s(%s)\n", $dir, $self->direction; | |
1054 print "\n", "-" x 40, "\n"; | |
1055 | |
1056 1; | |
1057 } | |
1058 | |
1059 sub _mapper_code2string { | |
1060 my ($self, $code) = @_; | |
1061 my ($a, $b) = $code =~ /(\d+)-(\d+)/; | |
1062 return $COORDINATE_INTS{$a}. '-'. $COORDINATE_INTS{$b}; | |
1063 | |
1064 } | |
1065 | |
1066 sub _mapper_string2code { | |
1067 my ($self, $string) =@_; | |
1068 my ($a, $b) = $string =~ /([^-]+)-(.*)/; | |
1069 return $COORDINATE_SYSTEMS{$a}. '-'. $COORDINATE_SYSTEMS{$b}; | |
1070 } | |
1071 | |
1072 | |
1073 =head2 _create_pair | |
1074 | |
1075 Title : _create_pair | |
1076 Usage : $mapper = $obj->_create_pair('chr', 'gene', 0, 2555, 10000, -1); | |
1077 Function: Internal helper method to create a mapper between | |
1078 two coordinate systems | |
1079 Returns : a Bio::Coordinate::Pair object | |
1080 Args : string, input coordinate system name, | |
1081 string, output coordinate system name, | |
1082 boolean, strict mapping | |
1083 positive integer, offset | |
1084 positive integer, length | |
1085 1 || -1 , strand | |
1086 | |
1087 =cut | |
1088 | |
1089 sub _create_pair { | |
1090 my ($self, $in, $out, $strict, $offset, $length, $strand ) = @_; | |
1091 $strict ||= 0; | |
1092 $strand ||= 1; | |
1093 $length ||= 20; | |
1094 | |
1095 my $match1 = Bio::Location::Simple->new | |
1096 (-seq_id => $in, | |
1097 -start => $offset+1, | |
1098 -end => $offset+$length, -strand=>1 ); | |
1099 | |
1100 my $match2 = Bio::Location::Simple->new | |
1101 (-seq_id => $out, | |
1102 -start => 1, | |
1103 -end => $length, -strand=>$strand ); | |
1104 | |
1105 my $pair = Bio::Coordinate::ExtrapolatingPair->new | |
1106 (-in => $match1, | |
1107 -out => $match2, | |
1108 -strict => $strict | |
1109 ); | |
1110 | |
1111 return $pair; | |
1112 | |
1113 } | |
1114 | |
1115 | |
1116 =head2 _translate | |
1117 | |
1118 Title : _translate | |
1119 Usage : $newpos = $obj->_translate($loc); | |
1120 Function: Translate the location from the CDS coordinate system | |
1121 to a new value in the propeptide coordinate system. | |
1122 Example : | |
1123 Returns : new location | |
1124 Args : a Bio::Location::Simple or Bio::Location::SplitLocationI | |
1125 | |
1126 =cut | |
1127 | |
1128 sub _translate { | |
1129 my ($self,$value) = @_; | |
1130 | |
1131 $self->throw("Need to pass me a Bio::Location::Simple or ". | |
1132 "Bio::Location::SplitLocationI, not [". ref($value). "]") | |
1133 unless defined $value && | |
1134 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI')); | |
1135 | |
1136 my $seqid = 'propeptide'; | |
1137 | |
1138 if ($value->isa("Bio::Location::SplitLocationI")) { | |
1139 my $split = new Bio::Location::Split(-seq_id=>$seqid); | |
1140 foreach my $loc ( $value->each_Location(1) ) { | |
1141 | |
1142 my $match = new Bio::Location::Simple(-start => int($loc->start / 3 )+1, | |
1143 -end => int($loc->end / 3 )+1, | |
1144 -seq_id => $seqid, | |
1145 -strand => 1 | |
1146 ); | |
1147 $split->add_sub_Location($match); | |
1148 } | |
1149 return $split; | |
1150 | |
1151 } else { | |
1152 return new Bio::Location::Simple(-start => int($value->start / 3 )+1, | |
1153 -end => int($value->end / 3 )+1, | |
1154 -seq_id => $seqid, | |
1155 -strand => 1 | |
1156 ); | |
1157 } | |
1158 } | |
1159 | |
1160 sub _frame { | |
1161 my ($self,$value) = @_; | |
1162 | |
1163 $self->throw("Need to pass me a Bio::Location::Simple or ". | |
1164 "Bio::Location::SplitLocationI, not [". ref($value). "]") | |
1165 unless defined $value && | |
1166 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI')); | |
1167 | |
1168 my $seqid = 'propeptide'; | |
1169 | |
1170 if ($value->isa("Bio::Location::SplitLocationI")) { | |
1171 my $split = new Bio::Location::Split(-seq_id=>$seqid); | |
1172 foreach my $loc ( $value->each_Location(1) ) { | |
1173 | |
1174 my $match = new Bio::Location::Simple(-start => ($value->start-1) % 3 +1, | |
1175 -end => ($value->end-1) % 3 +1, | |
1176 -seq_id => 'frame', | |
1177 -strand => 1 | |
1178 ); | |
1179 $split->add_sub_Location($match); | |
1180 } | |
1181 return $split; | |
1182 } else { | |
1183 return new Bio::Location::Simple(-start => ($value->start-1) % 3 +1, | |
1184 -end => ($value->end-1) % 3 +1, | |
1185 -seq_id => 'frame', | |
1186 -strand => 1 | |
1187 ); | |
1188 } | |
1189 } | |
1190 | |
1191 | |
1192 =head2 _reverse_translate | |
1193 | |
1194 Title : _reverse_translate | |
1195 Usage : $newpos = $obj->_reverse_translate(5); | |
1196 Function: Reverse translate the location from the propeptide | |
1197 coordinate system to a new value in the CSD. | |
1198 Note that a single peptide location expands to cover | |
1199 the codon triplet | |
1200 Example : | |
1201 Returns : new location in the CDS coordinate system | |
1202 Args : a Bio::Location::Simple or Bio::Location::SplitLocationI | |
1203 | |
1204 =cut | |
1205 | |
1206 sub _reverse_translate { | |
1207 my ($self,$value) = @_; | |
1208 | |
1209 | |
1210 $self->throw("Need to pass me a Bio::Location::Simple or ". | |
1211 "Bio::Location::SplitLocationI, not [". ref($value). "]") | |
1212 unless defined $value && | |
1213 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI')); | |
1214 | |
1215 my $seqid = 'cds'; | |
1216 | |
1217 if ($value->isa("Bio::Location::SplitLocationI")) { | |
1218 my $split = new Bio::Location::Split(-seq_id=>$seqid); | |
1219 foreach my $loc ( $value->each_Location(1) ) { | |
1220 | |
1221 my $match = new Bio::Location::Simple(-start => $value->start * 3 - 2, | |
1222 -end => $value->end * 3, | |
1223 -seq_id => $seqid, | |
1224 -strand => 1 | |
1225 ); | |
1226 $split->add_sub_Location($match); | |
1227 } | |
1228 return $split; | |
1229 | |
1230 } else { | |
1231 return new Bio::Location::Simple(-start => $value->start * 3 - 2, | |
1232 -end => $value->end * 3, | |
1233 -seq_id => $seqid, | |
1234 -strand => 1 | |
1235 ); | |
1236 } | |
1237 } | |
1238 | |
1239 | |
1240 =head2 _check_direction | |
1241 | |
1242 Title : _check_direction | |
1243 Usage : $obj->_check_direction(); | |
1244 Function: Check and swap when needed the direction the location | |
1245 mapping Pairs based on input and output values | |
1246 Example : | |
1247 Returns : new location | |
1248 Args : a Bio::Location::Simple | |
1249 | |
1250 =cut | |
1251 | |
1252 sub _check_direction { | |
1253 my ($self) = @_; | |
1254 | |
1255 my $new_direction = 1; | |
1256 $new_direction = -1 if $self->{'_in'} > $self->{'_out'}; | |
1257 | |
1258 unless ($new_direction == $self->{_direction} ) { | |
1259 map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}}; | |
1260 # record the changed direction; | |
1261 $self->{_direction} *= -1; | |
1262 } | |
1263 1; | |
1264 } | |
1265 | |
1266 | |
1267 =head2 _get_path | |
1268 | |
1269 Title : _get_path | |
1270 Usage : $obj->_get_path('peptide'); | |
1271 Function: internal method for finding that shortest path between | |
1272 input and output coordinate systems. | |
1273 Calculations and caching are handled by the graph class. | |
1274 See L<Bio::Coordinate::Graph>. | |
1275 Example : | |
1276 Returns : array of the mappers | |
1277 Args : none | |
1278 | |
1279 =cut | |
1280 | |
1281 sub _get_path { | |
1282 my ($self) = @_; | |
1283 | |
1284 my $start = $self->{'_in'} || 0; | |
1285 my $end = $self->{'_out'} || 0; | |
1286 | |
1287 # note the order | |
1288 # always go from smaller to bigger: it makes caching more efficient | |
1289 my $reverse; | |
1290 if ($start > $end) { | |
1291 ($start, $end) = ($end, $start ); | |
1292 $reverse++; | |
1293 } | |
1294 | |
1295 my @mappers; | |
1296 if (exists $self->{'_previous_path'} and | |
1297 $self->{'_previous_path'} eq "$start$end" ) { | |
1298 # use cache | |
1299 @mappers = @{$self->{'_mapper_path'}}; | |
1300 } else { | |
1301 my $mapper; | |
1302 my $prev_node = ''; | |
1303 @mappers = | |
1304 map { $mapper = "$prev_node-$_"; $prev_node = $_; $mapper; } | |
1305 $self->{'_graph'}->shortest_path($start, $end); | |
1306 shift @mappers; | |
1307 | |
1308 $self->{'_previous_path'} = "$start$end"; | |
1309 $self->{'_mapper_path'} = \@mappers; | |
1310 } | |
1311 | |
1312 $reverse ? return reverse @mappers : return @mappers; | |
1313 } | |
1314 | |
1315 | |
1316 1; | |
1317 |