comparison variant_effect_predictor/Bio/Coordinate/GeneMapper.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: GeneMapper.pm,v 1.13.2.2 2003/03/13 11:56:30 heikki Exp $
2 #
3 # bioperl module for Bio::Coordinate::GeneMapper
4 #
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
6 #
7 # Copyright Heikki Lehvaslaiho
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Coordinate::GeneMapper - transformations between gene related coordinate systems
16
17 =head1 SYNOPSIS
18
19 use Bio::Coordinate::GeneMapper;
20
21 # get a Bio::RangeI representing the start, end and strand of the CDS
22 # in chromosomal (or entry) coordinates
23 my $cds;
24
25 # get a Bio::Location::Split or an array of Bio::LocationI objects
26 # holding the start, end and strand of all the exons in chromosomal
27 # (or entry) coordinates
28 my $exons;
29
30 # create a gene mapper and set it to map from chromosomal to cds coordinates
31 my $gene = Bio::Coordinate::GeneMapper->new(-in=>'chr',
32 -out=>'cds',
33 -cds=>$cds,
34 -exons=>$exons
35 );
36
37 # get a a Bio::Location or sequence feature in input (chr) coordinates
38 my $loc;
39
40 # map the location into output coordinates and get a new location object
41 $newloc = $gene->map($loc);
42
43
44 =head1 DESCRIPTION
45
46 Bio::Coordinate::GeneMapper is a module for simplifying the mappings
47 of coodinate locations between various gene related locations in human
48 genetics. It also adds a special human genetics twist to coordinate
49 systems by making it possible to disable the use of zero
50 (0). Locations before position one start from -1. See method
51 L<nozero>.
52
53 It understands by name the following coordinate systems and mapping
54 between them:
55
56 peptide (peptide length)
57 ^
58 | -peptide_offset
59 |
60 frame propeptide (propeptide length)
61 ^ ^
62 \ |
63 translate \ |
64 \ |
65 cds (transcript start and end)
66 ^
67 negative_intron | \
68 ^ | \ transcribe
69 \ | \
70 intron exon \
71 ^ ^ ^ /
72 splice \ \ / | /
73 \ \ / | /
74 \ inex | /
75 \ ^ | /
76 \ \ |/
77 ----- gene (gene_length)
78 ^
79 | - gene_offset
80 |
81 chr (or entry)
82
83
84 This structure is kept in the global variable $DAG which is a
85 representation of a Directed Acyclic Graph. The path calculations
86 traversing this graph are done in a helper class. See
87 L<Bio::Coordinate::Graph>.
88
89 Of these, two operations are special cases, translate and splice.
90 Translating and reverse translating are implemented as internal
91 methods that do the simple 1E<lt>-E<gt>3 conversion. Splicing needs
92 additional information that is provided by method L<exons> which takes
93 in an array of Bio::LocationI objects.
94
95 Most of the coordinate system names should be selfexplanatory to
96 anyone familiar with genes. Negative intron coordinate system is
97 starts counting backwards from -1 as the last nucleotide in the
98 intron. This used when only exon and a few flanking intron nucleotides
99 are known.
100
101
102 This class models coordinates within one transcript of a gene, so to
103 tackle multiple transcripts you need several instances of the
104 class. It is therefore valid to argue that the name of the class
105 should be TranscriptMapper. GeneMapper is a catchier name, so it
106 stuck.
107
108
109 =head1 FEEDBACK
110
111 =head2 Mailing Lists
112
113 User feedback is an integral part of the evolution of this and other
114 Bioperl modules. Send your comments and suggestions preferably to the
115 Bioperl mailing lists Your participation is much appreciated.
116
117 bioperl-l@bioperl.org - General discussion
118 http://bio.perl.org/MailList.html - About the mailing lists
119
120 =head2 Reporting Bugs
121
122 Report bugs to the Bioperl bug tracking system to help us keep track
123 the bugs and their resolution. Bug reports can be submitted via email
124 or the web:
125
126 bioperl-bugs@bio.perl.org
127 http://bugzilla.bioperl.org/
128
129 =head1 AUTHOR - Heikki Lehvaslaiho
130
131 Email: heikki@ebi.ac.uk
132 Address:
133
134 EMBL Outstation, European Bioinformatics Institute
135 Wellcome Trust Genome Campus, Hinxton
136 Cambs. CB10 1SD, United Kingdom
137
138 =head1 APPENDIX
139
140 The rest of the documentation details each of the object
141 methods. Internal methods are usually preceded with a _
142
143 =cut
144
145
146 # Let the code begin...
147
148 package Bio::Coordinate::GeneMapper;
149 use vars qw(@ISA %COORDINATE_SYSTEMS %COORDINATE_INTS $TRANSLATION $DAG
150 $NOZERO_VALUES $NOZERO_KEYS);
151 use strict;
152
153 # Object preamble - inherits from Bio::Root::Root
154
155 use Bio::Root::Root;
156 use Bio::Coordinate::Result;
157 use Bio::Location::Simple;
158 use Bio::Coordinate::Graph;
159 use Bio::Coordinate::Collection;
160 use Bio::Coordinate::Pair;
161 use Bio::Coordinate::ExtrapolatingPair;
162
163 @ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI);
164
165 # first set internal values for all translation tables
166
167 %COORDINATE_SYSTEMS = (
168 peptide => 10,
169 propeptide => 9,
170 frame => 8,
171 cds => 7,
172 negative_intron => 6,
173 intron => 5,
174 exon => 4,
175 inex => 3,
176 gene => 2,
177 chr => 1
178 );
179
180 %COORDINATE_INTS = (
181 10 => 'peptide',
182 9 => 'propeptide',
183 8 => 'frame',
184 7 => 'cds',
185 6 => 'negative_intron',
186 5 => 'intron',
187 4 => 'exon',
188 3 => 'inex',
189 2 => 'gene',
190 1 => 'chr'
191 );
192
193 $TRANSLATION = $COORDINATE_SYSTEMS{'cds'}. "-".
194 $COORDINATE_SYSTEMS{'propeptide'};
195
196 $DAG = {
197 10 => [],
198 9 => [10],
199 8 => [],
200 7 => [8, 9],
201 6 => [],
202 5 => [6],
203 4 => [7],
204 3 => [4, 5],
205 2 => [3, 4, 5, 7],
206 1 => [2]
207 };
208
209 $NOZERO_VALUES = {0 => 0, 'in' => 1, 'out' => 2, 'in&out' => 3 };
210 $NOZERO_KEYS = { 0 => 0, 1 => 'in', 2 => 'out', 3 => 'in&out' };
211
212
213 sub new {
214 my($class,@args) = @_;
215 my $self = $class->SUPER::new(@args);
216
217 # prime the graph
218 my $graph = new Bio::Coordinate::Graph;
219 $graph->hash_of_arrays($DAG);
220 $self->graph($graph);
221
222 my($in, $out, $peptide_offset, $exons,
223 $cds, $nozero, $strict) =
224 $self->_rearrange([qw(IN
225 OUT
226 PEPTIDE_OFFSET
227 EXONS
228 CDS
229 NOZERO
230 STRICT
231 )],
232 @args);
233
234 # direction of mapping when going chr to protein
235 $self->{_direction} = 1;
236
237 $in && $self->in($in);
238 $out && $self->out($out);
239 $cds && $self->cds($cds);
240 $exons && ref($exons) =~ /ARRAY/i && $self->exons(@$exons);
241 $peptide_offset && $self->peptide_offset($peptide_offset);
242 $nozero && $self->nozero($nozero);
243 $strict && $self->strict($strict);
244
245 return $self; # success - we hope!
246 }
247
248 =head2 in
249
250 Title : in
251 Usage : $obj->in('peptide');
252 Function: Set and read the input coordinate system.
253 Example :
254 Returns : value of input system
255 Args : new value (optional)
256
257 =cut
258
259 sub in {
260 my ($self,$value) = @_;
261 if( defined $value) {
262 $self->throw("Not a valid input coordinate system name [$value]\n".
263 "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
264 unless defined $COORDINATE_SYSTEMS{$value};
265
266 $self->{'_in'} = $COORDINATE_SYSTEMS{$value};
267 }
268 return $COORDINATE_INTS{ $self->{'_in'} };
269 }
270
271
272 =head2 out
273
274 Title : out
275 Usage : $obj->out('peptide');
276 Function: Set and read the output coordinate system.
277 Example :
278 Returns : value of output system
279 Args : new value (optional)
280
281 =cut
282
283 sub out {
284 my ($self,$value) = @_;
285 if( defined $value) {
286 $self->throw("Not a valid input coordinate system name [$value]\n".
287 "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
288 unless defined $COORDINATE_SYSTEMS{$value};
289
290 $self->{'_out'} = $COORDINATE_SYSTEMS{$value};
291 }
292 return $COORDINATE_INTS{ $self->{'_out'} };
293 }
294
295 =head2 strict
296
297 Title : strict
298 Usage : $obj->strict('peptide');
299 Function: Set and read weather strict boundaried of coordinate
300 systems are enforced.
301 When strict is on, the end of the coordinate range must be defined.
302 Example :
303 Returns : boolean
304 Args : boolean (optional)
305
306 =cut
307
308 sub strict {
309 my ($self,$value) = @_;
310 if( defined $value) {
311 $value ? ( $self->{'_strict'} = 1 ) : ( $self->{'_strict'} = 0 );
312 ## update in each mapper !!
313 }
314 return $self->{'_strict'} || 0 ;
315 }
316
317
318 =head2 nozero
319
320 Title : nozero
321 Usage : $obj->nozero(1);
322 Function: Flag to disable the use of zero in the input,
323 output or both coordinate systems. Use of coordinate
324 systems without zero is a peculiarity common in
325 human genetics community.
326 Example :
327 Returns : 0 (default), or 'in', 'out', 'in&out'
328 Args : 0 (default), or 'in', 'out', 'in&out'
329
330 =cut
331
332 sub nozero {
333 my ($self,$value) = @_;
334
335 if (defined $value) {
336 $self->throw("Not a valid value for nozero [$value]\n".
337 "Valid values are ". join(", ", keys %{$NOZERO_VALUES} ))
338 unless defined $NOZERO_VALUES->{$value};
339 $self->{'_nozero'} = $NOZERO_VALUES->{$value};
340 }
341
342 my $res = $self->{'_nozero'} || 0;
343 return $NOZERO_KEYS->{$res};
344 }
345
346 =head2 graph
347
348 Title : graph
349 Usage : $obj->graph($new_graph);
350 Function: Set and read the graph object representing relationships
351 between coordinate systems
352 Example :
353 Returns : Bio::Coordinate::Graph object
354 Args : new Bio::Coordinate::Graph object (optional)
355
356 =cut
357
358 sub graph {
359 my ($self,$value) = @_;
360 if( defined $value) {
361 $self->throw("Not a valid graph [$value]\n")
362 unless $value->isa('Bio::Coordinate::Graph');
363 $self->{'_graph'} = $value;
364 }
365 return $self->{'_graph'};
366 }
367
368 =head2 peptide
369
370 Title : peptide
371 Usage : $obj->peptide_offset($peptide_coord);
372 Function: Read and write the offset of peptide from the start of propeptide
373 and peptide length
374 Returns : a Bio::Location::Simple object
375 Args : a Bio::LocationI object
376
377 =cut
378
379 sub peptide {
380 my ($self, $value) = @_;
381 if( defined $value) {
382 $self->throw("I need a Bio::LocationI, not [". $value. "]")
383 unless $value->isa('Bio::LocationI');
384
385 $self->throw("Peptide start not defined")
386 unless defined $value->start;
387 $self->{'_peptide_offset'} = $value->start - 1;
388
389 $self->throw("Peptide end not defined")
390 unless defined $value->end;
391 $self->{'_peptide_length'} = $value->end - $self->{'_peptide_offset'};
392
393
394 my $a = $self->_create_pair
395 ('propeptide', 'peptide', $self->strict,
396 $self->{'_peptide_offset'}, $self->{'_peptide_length'} );
397 my $mapper = $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'};
398 $self->{'_mappers'}->{$mapper} = $a;
399 }
400 return Bio::Location::Simple->new
401 (-seq_id => 'propeptide',
402 -start => $self->{'_peptide_offset'} + 1 ,
403 -end => $self->{'_peptide_length'} + $self->{'_peptide_offset'},
404 -strand => 1
405 );
406 }
407
408 =head2 peptide_offset
409
410 Title : peptide_offset
411 Usage : $obj->peptide_offset(20);
412 Function: Set and read the offset of peptide from the start of propeptide
413 Returns : set value or 0
414 Args : new value (optional)
415
416 =cut
417
418 sub peptide_offset {
419 my ($self,$offset, $len) = @_;
420 if( defined $offset) {
421 $self->throw("I need an integer, not [$offset]")
422 unless $offset =~ /^[+-]?\d+$/;
423 $self->{'_peptide_offset'} = $offset;
424
425 if (defined $len) {
426 $self->throw("I need an integer, not [$len]")
427 unless $len =~ /^[+-]?\d+$/;
428 $self->{'_peptide_length'} = $len;
429 }
430
431 my $a = $self->_create_pair
432 ('propeptide', 'peptide', $self->strict, $offset, $self->{'_peptide_length'} );
433 my $mapper = $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'};
434 $self->{'_mappers'}->{$mapper} = $a;
435 }
436 return $self->{'_peptide_offset'} || 0;
437 }
438
439 =head2 peptide_length
440
441 Title : peptide_length
442 Usage : $obj->peptide_length(20);
443 Function: Set and read the offset of peptide from the start of propeptide
444 Returns : set value or 0
445 Args : new value (optional)
446
447 =cut
448
449
450 sub peptide_length {
451 my ($self, $len) = @_;
452 if( defined $len) {
453 $self->throw("I need an integer, not [$len]")
454 if defined $len && $len !~ /^[+-]?\d+$/;
455 $self->{'_peptide_length'} = $len;
456 }
457 return $self->{'_peptide_length'};
458 }
459
460
461 =head2 exons
462
463 Title : exons
464 Usage : $obj->exons(@exons);
465 Function: Set and read the offset of CDS from the start of transcipt
466 You do not have to sort the exons before calling this method as
467 they will be sorted automatically.
468 If you have not defined the CDS, is will be set to span all
469 exons here.
470 Returns : array of Bio::LocationI exons in genome coordinates or 0
471 Args : array of Bio::LocationI exons in genome (or entry) coordinates
472
473 =cut
474
475 sub exons {
476 my ($self,@value) = @_;
477 my $cds_mapper = $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'cds'};
478 my $inex_mapper =
479 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'inex'};
480 my $exon_mapper =
481 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'exon'};
482 my $intron_mapper =
483 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'intron'};
484 my $negative_intron_mapper =
485 $COORDINATE_SYSTEMS{'intron'}. "-". $COORDINATE_SYSTEMS{'negative_intron'};
486 my $exon_cds_mapper = $COORDINATE_SYSTEMS{'exon'}. "-". $COORDINATE_SYSTEMS{'cds'};
487
488 if(@value) {
489 if (ref($value[0]) &&
490 $value[0]->isa('Bio::SeqFeatureI') and
491 $value[0]->location->isa('Bio::Location::SplitLocationI')) {
492 @value = $value[0]->location->each_Location;
493 } else {
494 $self->throw("I need an array , not [@value]")
495 unless ref \@value eq 'ARRAY';
496 $self->throw("I need a reference to an array of Bio::LocationIs, not to [".
497 $value[0]. "]")
498 unless ref $value[0] and $value[0]->isa('Bio::LocationI');
499 }
500
501 #
502 # sort the input array
503 #
504 # and if the used has not defined CDS assume it is the complete exonic range
505 if (defined $value[0]->strand && $value[0]->strand == - 1) { #reverse strand
506 @value = map { $_->[0] }
507 sort { $b->[1] <=> $a->[1] }
508 map { [ $_, $_->start] }
509 @value;
510
511 unless ($self->cds) {
512 $self->cds(new Bio::Location::Simple(-start => $value[-1]->start,
513 -end => $value[0]->end,
514 -strand=> $value[0]->strand,
515 -seq_id=> $value[0]->seq_id,
516 )
517 );
518 }
519 } else { #undef or forward strand
520 @value = map { $_->[0] }
521 sort { $a->[1] <=> $b->[1] }
522 map { [ $_, $_->start] }
523 @value;
524 unless ($self->cds) {
525 $self->cds(new Bio::Location::Simple(-start => $value[0]->start,
526 -end => $value[-1]->end,
527 -strand=> $value[0]->strand,
528 -seq_id=> $value[0]->seq_id,
529 )
530 );
531 }
532
533 }
534
535 $self->{'_chr_exons'} = \@value;
536
537 # transform exons from chromosome to gene coordinates
538 # but only if gene coordinate system has been set
539 my @exons ;
540 #my $gene_mapper = $self->$COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
541 my $gene_mapper = "1-2";
542 if (defined $self->{'_mappers'}->{$gene_mapper} ) {
543
544 my $tmp_in = $self->{'_in'};
545 my $tmp_out = $self->{'_out'};
546 my $tmp_verb = $self->verbose;
547 $self->verbose(0);
548
549 $self->in('chr');
550 $self->out('gene');
551
552 @exons = map {$self->map($_)} @value;
553
554 $self->{'_in'} = ($tmp_in);
555 $self->{'_out'} = ($tmp_out);
556 $self->verbose($tmp_verb);
557 } else {
558 @exons = @value;
559 }
560
561 my $cds_map = Bio::Coordinate::Collection->new;
562 my $inex_map = Bio::Coordinate::Collection->new;
563 my $exon_map = Bio::Coordinate::Collection->new;
564 my $exon_cds_map = Bio::Coordinate::Collection->new;
565 my $intron_map = Bio::Coordinate::Collection->new;
566 my $negative_intron_map = Bio::Coordinate::Collection->new;
567
568 my $tr_end = 0;
569 my $coffset;
570 my $exon_counter;
571 my $prev_exon_end;
572
573 for my $exon ( @exons ) {
574
575 $exon_counter++;
576
577 #
578 # gene -> cds
579 #
580
581 my $match1 = Bio::Location::Simple->new
582 (-seq_id =>'gene' ,
583 -start => $exon->start,
584 -end => $exon->end, -strand=>1 );
585 my $match2 = Bio::Location::Simple->new
586 (-seq_id => 'cds',
587 -start => $tr_end + 1,
588 -end => $tr_end + $exon->end - $exon->start +1,
589 -strand=>$exon->strand );
590
591 $cds_map->add_mapper(Bio::Coordinate::Pair->new
592 (-in => $match1,
593 -out => $match2,
594 )
595 );
596
597 if ($exon->start <= 1 and $exon->end >= 1) {
598 $coffset = $tr_end - $exon->start + 1;
599 }
600 $tr_end = $tr_end + $exon->end - $exon->start + 1;
601
602 #
603 # gene -> intron
604 #
605
606 if (defined $prev_exon_end) {
607 my $match3 = Bio::Location::Simple->new
608 (-seq_id =>'gene',
609 -start => $prev_exon_end + 1,
610 -end => $exon->start -1, -strand=>$exon->strand );
611
612 my $match4 = Bio::Location::Simple->new
613 (-seq_id => 'intron'. ($exon_counter -1),
614 -start => 1,
615 -end => $exon->start - 1 - $prev_exon_end,
616 -strand=>$exon->strand );
617
618 # negative intron coordinates
619 my $match5 = Bio::Location::Simple->new
620 (-seq_id => 'intron'. ($exon_counter -1),
621 -start => -1 * ($exon->start - 2 - $prev_exon_end) -1,
622 -end => -1,
623 -strand=>$exon->strand );
624
625 $inex_map->add_mapper(Bio::Coordinate::Pair->new
626 (-in => $match3,
627 -out => $match4
628 )
629 );
630 $intron_map->add_mapper(Bio::Coordinate::Pair->new
631 (-in => $self->_clone_loc($match3),
632 -out => $self->_clone_loc($match4)
633 )
634 );
635 $negative_intron_map->add_mapper(Bio::Coordinate::Pair->new
636 (-in => $self->_clone_loc($match4),
637 -out => $match5
638 ));
639
640 }
641
642 # store the value
643 $prev_exon_end = $exon->end;
644
645 #
646 # gene -> exon
647 #
648 my $match6 = Bio::Location::Simple->new
649 (-seq_id => 'exon'. $exon_counter,
650 -start => 1,
651 -end => $exon->end - $exon->start +1,
652 -strand=> $exon->strand );
653
654 my $pair2 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match1),
655 -out => $match6
656 );
657 my $pair3 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match6),
658 -out => $self->_clone_loc($match2)
659 );
660 $inex_map->add_mapper(Bio::Coordinate::Pair->new
661 (-in => $self->_clone_loc($match1),
662 -out => $match6
663 )
664 );
665 $exon_map->add_mapper(Bio::Coordinate::Pair->new
666 (-in => $self->_clone_loc($match1),
667 -out => $self->_clone_loc($match6)
668 )
669 );
670 $exon_cds_map->add_mapper(Bio::Coordinate::Pair->new
671 (-in => $self->_clone_loc($match6),
672 -out => $self->_clone_loc($match2)
673 )
674 );
675
676 }
677
678 # move coordinate start if exons have negative values
679 if ($coffset) {
680 foreach my $m ($cds_map->each_mapper) {
681 $m->out->start($m->out->start - $coffset);
682 $m->out->end($m->out->end - $coffset);
683 }
684
685 }
686
687 $self->{'_mappers'}->{$cds_mapper} = $cds_map;
688 $self->{'_mappers'}->{$exon_cds_mapper} = $exon_cds_map;
689 $self->{'_mappers'}->{$inex_mapper} = $inex_map;
690 $self->{'_mappers'}->{$exon_mapper} = $exon_map;
691 $self->{'_mappers'}->{$intron_mapper} = $intron_map;
692 $self->{'_mappers'}->{$negative_intron_mapper} = $negative_intron_map;
693 }
694 return @{$self->{'_chr_exons'}} || 0;
695 }
696
697 =head2 _clone_loc
698
699 Title : _clone_loc
700 Usage : $copy_of_loc = $obj->_clone_loc($loc);
701 Function: Make a deep copy of a simple location
702 Returns : a Bio::Location::Simple object
703 Args : a Bio::Location::Simple object to be cloned
704
705 =cut
706
707
708 sub _clone_loc { # clone a simple location
709 my ($self,$loc) = @_;
710
711 $self->throw("I need a Bio::Location::Simple , not [". ref $loc. "]")
712 unless $loc->isa('Bio::Location::Simple');
713
714 return Bio::Location::Simple->new
715 (-seq_id => $loc->seq_id,
716 -start => $loc->start,
717 -end => $loc->end,
718 -strand=> $loc->strand,
719 -location_type => $loc->location_type
720 );
721 }
722
723
724 =head2 cds
725
726 Title : cds
727 Usage : $obj->cds(20);
728 Function: Set and read the offset of CDS from the start of transcipt
729
730 Simple input can be an integer which gives the start of the
731 coding region in genomic coordinate. If you want to provide
732 the end of the coding region or indicate the use of the
733 opposite strand, you have to pass a Bio::RangeI
734 (e.g. Bio::Location::Simple or Bio::SegFeature::Generic)
735 object to this method.
736
737 Returns : set value or 0
738 Args : new value (optional)
739
740 =cut
741
742 sub cds {
743 my ($self,$value) = @_;
744 if( defined $value) {
745 if ($value =~ /^[+-]?\d+$/ ) {
746 my $loc = Bio::Location::Simple->new(-start=>$value);
747 $self->{'_cds'} = $loc;
748 }
749 elsif (ref $value && $value->isa('Bio::RangeI') ) {
750 $self->{'_cds'} = $value;
751 } else {
752 $self->throw("I need an integer or Bio::RangeI, not [$value]")
753 }
754 # strand !!
755 my $len;
756
757 $len = $self->{'_cds'}->end - $self->{'_cds'}->start +1
758 if defined $self->{'_cds'}->end;
759
760 my $a = $self->_create_pair
761 ('chr', 'gene', 0,
762 $self->{'_cds'}->start-1,
763 $len,
764 $self->{'_cds'}->strand);
765 my $mapper = $COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
766 $self->{'_mappers'}->{$mapper} = $a;
767
768 # recalculate exon-based mappers
769 if ( defined $self->{'_chr_exons'} ) {
770 $self->exons(@{$self->{'_chr_exons'}});
771 }
772
773 }
774 return $self->{'_cds'} || 0;
775 }
776
777
778 =head2 map
779
780 Title : map
781 Usage : $newpos = $obj->map(5);
782 Function: Map the location from the input coordinate system
783 to a new value in the output coordinate system.
784 Example :
785 Returns : new value in the output coordiante system
786 Args : a Bio::Location::Simple
787
788 =cut
789
790 sub map {
791 my ($self,$value) = @_;
792 my ($res);
793
794 $self->throw("Need to pass me a Bio::Location::Simple or ".
795 "Bio::Location::Simple or Bio::SeqFeatureI, not [".
796 ref($value). "]")
797 unless ref($value) && ($value->isa('Bio::Location::Simple') or
798 $value->isa('Bio::Location::SplitLocationI') or
799 $value->isa('Bio::SeqFeatureI'));
800 $self->throw("Input coordinate system not set")
801 unless $self->{'_in'};
802 $self->throw("Output coordinate system not set")
803 unless $self->{'_out'};
804 $self->throw("Do not be silly. Input and output coordinate ".
805 "systems are the same!")
806 unless $self->{'_in'} != $self->{'_out'};
807
808 $self->_check_direction();
809
810 $value = $value->location if $value->isa('Bio::SeqFeatureI');
811 print STDERR "=== Start location: ". $value->start. ",".
812 $value->end. " (". $value->strand. ")\n" if $self->verbose > 0;
813
814
815 # if nozero coordinate system is used in the input values
816 if ( defined $self->{'_nozero'} &&
817 ( $self->{'_nozero'} == 1 || $self->{'_nozero'} == 3 ) ) {
818 $value->start($value->start + 1)
819 if defined $value->start && $value->start < 1;
820 $value->end($value->end + 1)
821 if defined $value->end && $value->end < 1;
822 }
823
824 my @steps = $self->_get_path();
825 print "mapping ", $self->{'_in'}, "->", $self->{'_out'},
826 " Mappers: ", join(", ", @steps), "\n" if $self->verbose > 0;
827
828 foreach my $mapper (@steps) {
829 if ($mapper eq $TRANSLATION) {
830 if ($self->direction == 1) {
831 $value = $self->_translate($value);
832 print STDERR "+ $TRANSLATION cds -> propeptide (translate) \n"
833 if $self->verbose > 0;
834 } else {
835 $value = $self->_reverse_translate($value);
836 print STDERR "+ $TRANSLATION propeptide -> cds (reverse translate) \n"
837 if $self->verbose > 0;
838 }
839 }
840 # keep the start and end values, and go on to next iteration
841 # if this mapper is not set
842 elsif ( ! defined $self->{'_mappers'}->{$mapper} ) {
843 # update mapper name
844 $mapper =~ /\d+-(\d+)/; my ($counter) = $1;
845 $value->seq_id($COORDINATE_INTS{$counter});
846 print STDERR "- $mapper\n" if $self->verbose > 0;
847 } else {
848
849 #
850 # the DEFAULT : generic mapping
851 #
852 $value = $self->{'_mappers'}->{$mapper}->map($value);
853 $value->purge_gaps
854 if ($value && $value->isa('Bio::Location::SplitLocationI') && $value->can('gap'));
855 print STDERR "+ $mapper (", $self->direction, "): start ",
856 $value->start, " end ", $value->end, "\n"
857 if $value && $self->verbose > 0;
858 }
859 }
860
861 # if nozero coordinate system is asked to be used in the output values
862 if ( defined $value && defined $self->{'_nozero'} &&
863 ( $self->{'_nozero'} == 2 || $self->{'_nozero'} == 3 ) ) {
864
865 $value->start($value->start - 1)
866 if defined $value->start && $value->start < 1;
867 $value->end($value->end - 1)
868 if defined $value->end && $value->end < 1;
869 }
870
871 # handle merging of adjacent split locations!
872
873 if (ref $value eq "Bio::Coordinate::Result" && $value->each_match > 1 ) {
874 my $prevloc;
875 my $merging = 0;
876 my $newvalue;
877 my @matches;
878 foreach my $loc ( $value->each_Location(1) ) {
879 unless ($prevloc) {
880 $prevloc = $loc;
881 push @matches, $prevloc;
882 next;
883 }
884 if ($prevloc->end == ($loc->start - 1) && $prevloc->seq_id eq $loc->seq_id) {
885 $prevloc->end($loc->end);
886 $merging = 1;
887 } else {
888 push @matches, $loc;
889 $prevloc = $loc;
890 }
891 }
892 if ($merging) {
893 if (@matches > 1 ) {
894 $newvalue = Bio::Coordinate::Result->new;
895 map {$newvalue->add_sub_Location} @matches;
896 } else {
897 $newvalue = Bio::Coordinate::Result::Match->new
898 (-seq_id => $matches[0]->seq_id,
899 -start => $matches[0]->start,
900 -end => $matches[0]->end,
901 -strand=> $matches[0]->strand );
902 }
903 $value = $newvalue;
904 }
905 }
906 elsif (ref $value eq "Bio::Coordinate::Result" && $value->each_match == 1 ){
907 $value = $value->match;
908 }
909
910
911 return $value;
912 }
913
914 =head2 direction
915
916 Title : direction
917 Usage : $obj->direction('peptide');
918 Function: Read-only method for the direction of mapping deduced from
919 predefined input and output coordinate names.
920 Example :
921 Returns : 1 or -1, mapping direction
922 Args : new value (optional)
923
924 =cut
925
926 sub direction {
927 my ($self) = @_;
928 return $self->{'_direction'};
929 }
930
931
932 =head2 swap
933
934 Title : swap
935 Usage : $obj->swap;
936 Function: Swap the direction of transformation
937 (input <-> output)
938 Example :
939 Returns : 1
940 Args :
941
942 =cut
943
944 sub swap {
945 my ($self,$value) = @_;
946
947 ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'});
948 map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
949
950 # record the changed direction;
951 $self->{_direction} *= -1;
952
953 return 1;
954 }
955
956
957 =head2 to_string
958
959 Title : to_string
960 Usage : $newpos = $obj->to_string(5);
961 Function: Dump the internal mapper values into a human readable format
962 Example :
963 Returns : string
964 Args :
965
966 =cut
967
968 sub to_string {
969 my ($self) = shift;
970
971 print "-" x 40, "\n";
972
973 # chr-gene
974 my $mapper_str = 'chr-gene';
975 my $mapper = $self->_mapper_string2code($mapper_str);
976
977 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
978 if (defined $self->cds) {
979 my $end = $self->cds->end -1 if defined $self->cds->end;
980 printf "%16s%s: %s (%s)\n", ' ', 'gene offset', $self->cds->start-1 , $end || '';
981 printf "%16s%s: %s\n", ' ', 'gene strand', $self->cds->strand || 0;
982 }
983
984 # gene-intron
985 $mapper_str = 'gene-intron';
986 $mapper = $self->_mapper_string2code($mapper_str);
987 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
988
989 my $i = 1;
990 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
991 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
992 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
993 $i++;
994 }
995
996 # intron-negative_intron
997 $mapper_str = 'intron-negative_intron';
998 $mapper = $self->_mapper_string2code($mapper_str);
999 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
1000
1001 $i = 1;
1002 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
1003 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
1004 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
1005 $i++;
1006 }
1007
1008
1009 # gene-exon
1010 $mapper_str = 'gene-exon';
1011 $mapper = $self->_mapper_string2code($mapper_str);
1012 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
1013
1014 $i = 1;
1015 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
1016 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
1017 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
1018 $i++;
1019 }
1020
1021
1022 # gene-cds
1023 $mapper_str = 'gene-cds';
1024 $mapper = $self->_mapper_string2code($mapper_str);
1025 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
1026
1027 $i = 1;
1028 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
1029 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
1030 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
1031 $i++;
1032 }
1033
1034 # cds-propeptide
1035 $mapper_str = 'cds-propeptide';
1036 $mapper = $self->_mapper_string2code($mapper_str);
1037 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
1038 printf "%9s%-12s\n", "", '"translate"';
1039
1040
1041 # propeptide-peptide
1042 $mapper_str = 'propeptide-peptide';
1043 $mapper = $self->_mapper_string2code($mapper_str);
1044 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
1045 printf "%16s%s: %s\n", ' ', "peptide offset", $self->peptide_offset;
1046
1047
1048
1049 print "\nin : ", $self->in, "\n";
1050 print "out: ", $self->out, "\n";
1051 my $dir;
1052 $self->direction ? ($dir='forward') : ($dir='reverse');
1053 printf "direction: %-8s(%s)\n", $dir, $self->direction;
1054 print "\n", "-" x 40, "\n";
1055
1056 1;
1057 }
1058
1059 sub _mapper_code2string {
1060 my ($self, $code) = @_;
1061 my ($a, $b) = $code =~ /(\d+)-(\d+)/;
1062 return $COORDINATE_INTS{$a}. '-'. $COORDINATE_INTS{$b};
1063
1064 }
1065
1066 sub _mapper_string2code {
1067 my ($self, $string) =@_;
1068 my ($a, $b) = $string =~ /([^-]+)-(.*)/;
1069 return $COORDINATE_SYSTEMS{$a}. '-'. $COORDINATE_SYSTEMS{$b};
1070 }
1071
1072
1073 =head2 _create_pair
1074
1075 Title : _create_pair
1076 Usage : $mapper = $obj->_create_pair('chr', 'gene', 0, 2555, 10000, -1);
1077 Function: Internal helper method to create a mapper between
1078 two coordinate systems
1079 Returns : a Bio::Coordinate::Pair object
1080 Args : string, input coordinate system name,
1081 string, output coordinate system name,
1082 boolean, strict mapping
1083 positive integer, offset
1084 positive integer, length
1085 1 || -1 , strand
1086
1087 =cut
1088
1089 sub _create_pair {
1090 my ($self, $in, $out, $strict, $offset, $length, $strand ) = @_;
1091 $strict ||= 0;
1092 $strand ||= 1;
1093 $length ||= 20;
1094
1095 my $match1 = Bio::Location::Simple->new
1096 (-seq_id => $in,
1097 -start => $offset+1,
1098 -end => $offset+$length, -strand=>1 );
1099
1100 my $match2 = Bio::Location::Simple->new
1101 (-seq_id => $out,
1102 -start => 1,
1103 -end => $length, -strand=>$strand );
1104
1105 my $pair = Bio::Coordinate::ExtrapolatingPair->new
1106 (-in => $match1,
1107 -out => $match2,
1108 -strict => $strict
1109 );
1110
1111 return $pair;
1112
1113 }
1114
1115
1116 =head2 _translate
1117
1118 Title : _translate
1119 Usage : $newpos = $obj->_translate($loc);
1120 Function: Translate the location from the CDS coordinate system
1121 to a new value in the propeptide coordinate system.
1122 Example :
1123 Returns : new location
1124 Args : a Bio::Location::Simple or Bio::Location::SplitLocationI
1125
1126 =cut
1127
1128 sub _translate {
1129 my ($self,$value) = @_;
1130
1131 $self->throw("Need to pass me a Bio::Location::Simple or ".
1132 "Bio::Location::SplitLocationI, not [". ref($value). "]")
1133 unless defined $value &&
1134 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
1135
1136 my $seqid = 'propeptide';
1137
1138 if ($value->isa("Bio::Location::SplitLocationI")) {
1139 my $split = new Bio::Location::Split(-seq_id=>$seqid);
1140 foreach my $loc ( $value->each_Location(1) ) {
1141
1142 my $match = new Bio::Location::Simple(-start => int($loc->start / 3 )+1,
1143 -end => int($loc->end / 3 )+1,
1144 -seq_id => $seqid,
1145 -strand => 1
1146 );
1147 $split->add_sub_Location($match);
1148 }
1149 return $split;
1150
1151 } else {
1152 return new Bio::Location::Simple(-start => int($value->start / 3 )+1,
1153 -end => int($value->end / 3 )+1,
1154 -seq_id => $seqid,
1155 -strand => 1
1156 );
1157 }
1158 }
1159
1160 sub _frame {
1161 my ($self,$value) = @_;
1162
1163 $self->throw("Need to pass me a Bio::Location::Simple or ".
1164 "Bio::Location::SplitLocationI, not [". ref($value). "]")
1165 unless defined $value &&
1166 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
1167
1168 my $seqid = 'propeptide';
1169
1170 if ($value->isa("Bio::Location::SplitLocationI")) {
1171 my $split = new Bio::Location::Split(-seq_id=>$seqid);
1172 foreach my $loc ( $value->each_Location(1) ) {
1173
1174 my $match = new Bio::Location::Simple(-start => ($value->start-1) % 3 +1,
1175 -end => ($value->end-1) % 3 +1,
1176 -seq_id => 'frame',
1177 -strand => 1
1178 );
1179 $split->add_sub_Location($match);
1180 }
1181 return $split;
1182 } else {
1183 return new Bio::Location::Simple(-start => ($value->start-1) % 3 +1,
1184 -end => ($value->end-1) % 3 +1,
1185 -seq_id => 'frame',
1186 -strand => 1
1187 );
1188 }
1189 }
1190
1191
1192 =head2 _reverse_translate
1193
1194 Title : _reverse_translate
1195 Usage : $newpos = $obj->_reverse_translate(5);
1196 Function: Reverse translate the location from the propeptide
1197 coordinate system to a new value in the CSD.
1198 Note that a single peptide location expands to cover
1199 the codon triplet
1200 Example :
1201 Returns : new location in the CDS coordinate system
1202 Args : a Bio::Location::Simple or Bio::Location::SplitLocationI
1203
1204 =cut
1205
1206 sub _reverse_translate {
1207 my ($self,$value) = @_;
1208
1209
1210 $self->throw("Need to pass me a Bio::Location::Simple or ".
1211 "Bio::Location::SplitLocationI, not [". ref($value). "]")
1212 unless defined $value &&
1213 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
1214
1215 my $seqid = 'cds';
1216
1217 if ($value->isa("Bio::Location::SplitLocationI")) {
1218 my $split = new Bio::Location::Split(-seq_id=>$seqid);
1219 foreach my $loc ( $value->each_Location(1) ) {
1220
1221 my $match = new Bio::Location::Simple(-start => $value->start * 3 - 2,
1222 -end => $value->end * 3,
1223 -seq_id => $seqid,
1224 -strand => 1
1225 );
1226 $split->add_sub_Location($match);
1227 }
1228 return $split;
1229
1230 } else {
1231 return new Bio::Location::Simple(-start => $value->start * 3 - 2,
1232 -end => $value->end * 3,
1233 -seq_id => $seqid,
1234 -strand => 1
1235 );
1236 }
1237 }
1238
1239
1240 =head2 _check_direction
1241
1242 Title : _check_direction
1243 Usage : $obj->_check_direction();
1244 Function: Check and swap when needed the direction the location
1245 mapping Pairs based on input and output values
1246 Example :
1247 Returns : new location
1248 Args : a Bio::Location::Simple
1249
1250 =cut
1251
1252 sub _check_direction {
1253 my ($self) = @_;
1254
1255 my $new_direction = 1;
1256 $new_direction = -1 if $self->{'_in'} > $self->{'_out'};
1257
1258 unless ($new_direction == $self->{_direction} ) {
1259 map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
1260 # record the changed direction;
1261 $self->{_direction} *= -1;
1262 }
1263 1;
1264 }
1265
1266
1267 =head2 _get_path
1268
1269 Title : _get_path
1270 Usage : $obj->_get_path('peptide');
1271 Function: internal method for finding that shortest path between
1272 input and output coordinate systems.
1273 Calculations and caching are handled by the graph class.
1274 See L<Bio::Coordinate::Graph>.
1275 Example :
1276 Returns : array of the mappers
1277 Args : none
1278
1279 =cut
1280
1281 sub _get_path {
1282 my ($self) = @_;
1283
1284 my $start = $self->{'_in'} || 0;
1285 my $end = $self->{'_out'} || 0;
1286
1287 # note the order
1288 # always go from smaller to bigger: it makes caching more efficient
1289 my $reverse;
1290 if ($start > $end) {
1291 ($start, $end) = ($end, $start );
1292 $reverse++;
1293 }
1294
1295 my @mappers;
1296 if (exists $self->{'_previous_path'} and
1297 $self->{'_previous_path'} eq "$start$end" ) {
1298 # use cache
1299 @mappers = @{$self->{'_mapper_path'}};
1300 } else {
1301 my $mapper;
1302 my $prev_node = '';
1303 @mappers =
1304 map { $mapper = "$prev_node-$_"; $prev_node = $_; $mapper; }
1305 $self->{'_graph'}->shortest_path($start, $end);
1306 shift @mappers;
1307
1308 $self->{'_previous_path'} = "$start$end";
1309 $self->{'_mapper_path'} = \@mappers;
1310 }
1311
1312 $reverse ? return reverse @mappers : return @mappers;
1313 }
1314
1315
1316 1;
1317