diff variant_effect_predictor/Bio/Coordinate/GeneMapper.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Coordinate/GeneMapper.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1317 @@
+# $Id: GeneMapper.pm,v 1.13.2.2 2003/03/13 11:56:30 heikki Exp $
+#
+# bioperl module for Bio::Coordinate::GeneMapper
+#
+# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::GeneMapper - transformations between gene related coordinate systems
+
+=head1 SYNOPSIS
+
+  use Bio::Coordinate::GeneMapper;
+
+  # get a Bio::RangeI representing the start, end and strand of the CDS
+  # in chromosomal (or entry) coordinates
+  my $cds;
+
+  # get a Bio::Location::Split or an array of Bio::LocationI objects
+  # holding the start, end and strand of all the exons in chromosomal
+  # (or entry) coordinates
+  my $exons;
+
+  # create a gene mapper and set it to map from chromosomal to cds coordinates
+  my $gene = Bio::Coordinate::GeneMapper->new(-in=>'chr',
+                                              -out=>'cds',
+                                              -cds=>$cds,
+                                              -exons=>$exons
+                                             );
+
+  # get a a Bio::Location or sequence feature in input (chr) coordinates
+  my $loc;
+
+  # map the location into output coordinates and get a new location object
+  $newloc = $gene->map($loc);
+
+
+=head1 DESCRIPTION
+
+Bio::Coordinate::GeneMapper is a module for simplifying the mappings
+of coodinate locations between various gene related locations in human
+genetics. It also adds a special human genetics twist to coordinate
+systems by making it possible to disable the use of zero
+(0). Locations before position one start from -1. See method
+L<nozero>.
+
+It understands by name the following coordinate systems and mapping
+between them:
+
+                          peptide (peptide length)
+                             ^
+                             | -peptide_offset
+                             |
+                    frame  propeptide (propeptide length)
+                        ^    ^
+                         \   |
+             translate    \  |
+                           \ |
+                            cds  (transcript start and end)
+                             ^
+      negative_intron        | \
+              ^              |  \  transcribe
+               \             |   \
+              intron        exon  \
+               ^   ^         ^     /
+      splice    \   \      / |    /
+                 \   \    /  |   /
+                  \   inex   |  /
+                   \    ^    | /
+                    \    \   |/
+                     ----- gene (gene_length)
+                             ^
+                             | - gene_offset
+                             |
+                            chr (or entry)
+
+
+This structure is kept in the global variable $DAG which is a
+representation of a Directed Acyclic Graph. The path calculations
+traversing this graph are done in a helper class. See
+L<Bio::Coordinate::Graph>.
+
+Of these, two operations are special cases, translate and splice.
+Translating and reverse translating are implemented as internal
+methods that do the simple 1E<lt>-E<gt>3 conversion. Splicing needs
+additional information that is provided by method L<exons> which takes
+in an array of Bio::LocationI objects.
+
+Most of the coordinate system names should be selfexplanatory to
+anyone familiar with genes. Negative intron coordinate system is
+starts counting backwards from -1 as the last nucleotide in the
+intron. This used when only exon and a few flanking intron nucleotides
+are known.
+
+
+This class models coordinates within one transcript of a gene, so to
+tackle multiple transcripts you need several instances of the
+class. It is therefore valid to argue that the name of the class
+should be TranscriptMapper. GeneMapper is a catchier name, so it
+stuck.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                 - General discussion
+  http://bio.perl.org/MailList.html     - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki@ebi.ac.uk
+Address:
+
+     EMBL Outstation, European Bioinformatics Institute
+     Wellcome Trust Genome Campus, Hinxton
+     Cambs. CB10 1SD, United Kingdom
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::GeneMapper;
+use vars qw(@ISA %COORDINATE_SYSTEMS  %COORDINATE_INTS $TRANSLATION $DAG
+            $NOZERO_VALUES $NOZERO_KEYS);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Root::Root;
+use Bio::Coordinate::Result;
+use Bio::Location::Simple;
+use Bio::Coordinate::Graph;
+use Bio::Coordinate::Collection;
+use Bio::Coordinate::Pair;
+use Bio::Coordinate::ExtrapolatingPair;
+
+@ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI);
+
+# first set internal values for all translation tables
+
+%COORDINATE_SYSTEMS = (
+		       peptide          => 10,
+		       propeptide       => 9,
+		       frame            => 8,
+		       cds              => 7,
+		       negative_intron  => 6,
+		       intron           => 5,
+		       exon             => 4,
+		       inex             => 3,
+		       gene             => 2,
+		       chr              => 1
+		      );
+
+%COORDINATE_INTS = (
+		    10 => 'peptide',
+		    9 => 'propeptide',
+		    8 => 'frame',
+		    7 => 'cds',
+		    6 => 'negative_intron',
+		    5 => 'intron',
+		    4 => 'exon',
+		    3 => 'inex',
+		    2 => 'gene',
+		    1 => 'chr'
+		   );
+
+$TRANSLATION = $COORDINATE_SYSTEMS{'cds'}. "-".
+    $COORDINATE_SYSTEMS{'propeptide'};
+
+$DAG = {
+	10 => [],
+	9  => [10],
+	8  => [],
+	7  => [8, 9],
+	6  => [],
+	5  => [6],
+	4  => [7],
+	3  => [4, 5],
+	2  => [3, 4, 5, 7],
+	1  => [2]
+       };
+
+$NOZERO_VALUES = {0 => 0, 'in' => 1, 'out' => 2, 'in&out' => 3 };
+$NOZERO_KEYS = { 0 => 0, 1 => 'in', 2 => 'out', 3 => 'in&out' };
+
+
+sub new {
+    my($class,@args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    # prime the graph
+    my $graph = new Bio::Coordinate::Graph;
+    $graph->hash_of_arrays($DAG);
+    $self->graph($graph);
+
+    my($in, $out, $peptide_offset, $exons,
+       $cds, $nozero, $strict) =
+	$self->_rearrange([qw(IN
+                              OUT
+                              PEPTIDE_OFFSET
+                              EXONS
+                              CDS
+                              NOZERO
+                              STRICT
+			     )],
+			 @args);
+
+    # direction of mapping when going chr to protein
+    $self->{_direction} = 1;
+
+    $in  && $self->in($in);
+    $out  && $self->out($out);
+    $cds && $self->cds($cds);
+    $exons  && ref($exons) =~ /ARRAY/i && $self->exons(@$exons);
+    $peptide_offset && $self->peptide_offset($peptide_offset);
+    $nozero && $self->nozero($nozero);
+    $strict && $self->strict($strict);
+
+    return $self; # success - we hope!
+}
+
+=head2 in
+
+ Title   : in
+ Usage   : $obj->in('peptide');
+ Function: Set and read the input coordinate system.
+ Example :
+ Returns : value of input system
+ Args    : new value (optional)
+
+=cut
+
+sub in {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->throw("Not a valid input coordinate system name [$value]\n".
+		    "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
+	   unless defined $COORDINATE_SYSTEMS{$value};
+
+       $self->{'_in'} = $COORDINATE_SYSTEMS{$value};
+   }
+   return $COORDINATE_INTS{ $self->{'_in'} };
+}
+
+
+=head2 out
+
+ Title   : out
+ Usage   : $obj->out('peptide');
+ Function: Set and read the output coordinate system.
+ Example :
+ Returns : value of output system
+ Args    : new value (optional)
+
+=cut
+
+sub out {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->throw("Not a valid input coordinate system name [$value]\n".
+		    "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
+	   unless defined $COORDINATE_SYSTEMS{$value};
+
+       $self->{'_out'} = $COORDINATE_SYSTEMS{$value};
+   }
+   return $COORDINATE_INTS{ $self->{'_out'} };
+}
+
+=head2 strict
+
+ Title   : strict
+ Usage   : $obj->strict('peptide');
+ Function: Set and read weather strict boundaried of coordinate
+           systems are enforced.
+           When strict is on, the end of the coordinate range must be defined.
+ Example :
+ Returns : boolean
+ Args    : boolean (optional)
+
+=cut
+
+sub strict {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $value ? ( $self->{'_strict'} = 1 ) : ( $self->{'_strict'} = 0 );
+       ## update in each mapper !!
+   }
+   return $self->{'_strict'} || 0 ;
+}
+
+
+=head2 nozero
+
+ Title   : nozero
+ Usage   : $obj->nozero(1);
+ Function: Flag to disable the use of zero in the input,
+           output or both coordinate systems. Use of coordinate
+           systems without zero is a peculiarity  common in
+           human genetics community.
+ Example :
+ Returns : 0 (default), or 'in', 'out', 'in&out'
+ Args    : 0 (default), or 'in', 'out', 'in&out'
+
+=cut
+
+sub nozero {
+   my ($self,$value) = @_;
+
+   if (defined $value) {
+       $self->throw("Not a valid value for nozero [$value]\n".
+		    "Valid values are ". join(", ", keys %{$NOZERO_VALUES} ))
+	   unless defined $NOZERO_VALUES->{$value};
+       $self->{'_nozero'} = $NOZERO_VALUES->{$value};
+   }
+
+   my $res = $self->{'_nozero'} || 0;
+   return $NOZERO_KEYS->{$res};
+}
+
+=head2 graph
+
+ Title   : graph
+ Usage   : $obj->graph($new_graph);
+ Function: Set and read the graph object representing relationships
+           between coordinate systems
+ Example :
+ Returns : Bio::Coordinate::Graph object
+ Args    : new Bio::Coordinate::Graph object (optional)
+
+=cut
+
+sub graph {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->throw("Not a valid graph [$value]\n")
+	   unless $value->isa('Bio::Coordinate::Graph');
+       $self->{'_graph'} = $value;
+   }
+   return $self->{'_graph'};
+}
+
+=head2 peptide
+
+ Title   : peptide
+ Usage   : $obj->peptide_offset($peptide_coord);
+ Function: Read and write the offset of peptide from the start of propeptide
+           and peptide length
+ Returns : a Bio::Location::Simple object
+ Args    : a Bio::LocationI object
+
+=cut
+
+sub peptide {
+   my ($self, $value) = @_;
+   if( defined $value) {
+       $self->throw("I need a Bio::LocationI, not  [". $value. "]")
+	   unless $value->isa('Bio::LocationI');
+
+       $self->throw("Peptide start not defined")
+	   unless defined $value->start;
+       $self->{'_peptide_offset'} = $value->start - 1;
+
+       $self->throw("Peptide end not defined")
+	   unless defined $value->end;
+       $self->{'_peptide_length'} = $value->end - $self->{'_peptide_offset'};
+
+
+       my $a = $self->_create_pair
+	   ('propeptide', 'peptide', $self->strict,
+	    $self->{'_peptide_offset'}, $self->{'_peptide_length'} );
+       my $mapper =  $COORDINATE_SYSTEMS{'propeptide'}. "-".  $COORDINATE_SYSTEMS{'peptide'};
+       $self->{'_mappers'}->{$mapper} = $a;
+   }
+   return  Bio::Location::Simple->new
+       (-seq_id => 'propeptide',
+	-start => $self->{'_peptide_offset'} + 1 ,
+	-end => $self->{'_peptide_length'} + $self->{'_peptide_offset'},
+	-strand => 1
+       );
+}
+
+=head2 peptide_offset
+
+ Title   : peptide_offset
+ Usage   : $obj->peptide_offset(20);
+ Function: Set and read the offset of peptide from the start of propeptide
+ Returns : set value or 0
+ Args    : new value (optional)
+
+=cut
+
+sub peptide_offset {
+   my ($self,$offset, $len) = @_;
+   if( defined $offset) {
+       $self->throw("I need an integer, not [$offset]")
+	   unless $offset =~ /^[+-]?\d+$/;
+       $self->{'_peptide_offset'} = $offset;
+
+       if (defined $len) {
+	   $self->throw("I need an integer, not [$len]")
+	       unless $len =~ /^[+-]?\d+$/;
+	   $self->{'_peptide_length'} = $len;
+       }
+
+       my $a = $self->_create_pair
+	   ('propeptide', 'peptide', $self->strict, $offset, $self->{'_peptide_length'} );
+       my $mapper =  $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'};
+       $self->{'_mappers'}->{$mapper} = $a;
+   }
+   return $self->{'_peptide_offset'} || 0;
+}
+
+=head2 peptide_length
+
+ Title   : peptide_length
+ Usage   : $obj->peptide_length(20);
+ Function: Set and read the offset of peptide from the start of propeptide
+ Returns : set value or 0
+ Args    : new value (optional)
+
+=cut
+
+
+sub peptide_length {
+   my ($self, $len) = @_;
+   if( defined $len) {
+       $self->throw("I need an integer, not [$len]")
+	   if defined $len && $len !~ /^[+-]?\d+$/;
+       $self->{'_peptide_length'} = $len;
+   }
+   return $self->{'_peptide_length'};
+}
+
+
+=head2 exons
+
+ Title   : exons
+ Usage   : $obj->exons(@exons);
+ Function: Set and read the offset of CDS from the start of transcipt
+           You do not have to sort the exons before calling this method as
+           they will be sorted automatically.
+           If you have not defined the CDS, is will be set to span all
+           exons here.
+ Returns : array of Bio::LocationI exons in genome coordinates or 0
+ Args    : array of Bio::LocationI exons in genome (or entry) coordinates
+
+=cut
+
+sub exons {
+   my ($self,@value) = @_;
+   my $cds_mapper =  $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'cds'};
+   my $inex_mapper =
+       $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'inex'};
+   my $exon_mapper =
+       $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'exon'};
+   my $intron_mapper =
+       $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'intron'};
+   my $negative_intron_mapper =
+       $COORDINATE_SYSTEMS{'intron'}. "-". $COORDINATE_SYSTEMS{'negative_intron'};
+   my $exon_cds_mapper =  $COORDINATE_SYSTEMS{'exon'}. "-". $COORDINATE_SYSTEMS{'cds'};
+
+   if(@value) {
+       if (ref($value[0]) &&
+	   $value[0]->isa('Bio::SeqFeatureI') and
+	   $value[0]->location->isa('Bio::Location::SplitLocationI')) {
+	   @value = $value[0]->location->each_Location;
+       } else {
+	   $self->throw("I need an array , not [@value]")
+	       unless ref \@value eq 'ARRAY';
+	   $self->throw("I need a reference to an array of Bio::LocationIs, not to [".
+			$value[0]. "]")
+	       unless ref $value[0] and $value[0]->isa('Bio::LocationI');
+       }
+
+       #
+       # sort the input array
+       #
+       # and if the used has not defined CDS assume it is the complete exonic range
+       if (defined $value[0]->strand && $value[0]->strand == - 1) {  #reverse strand
+	   @value = map { $_->[0] }
+	               sort { $b->[1] <=> $a->[1] }
+                       map { [ $_, $_->start] }
+                       @value;
+
+           unless ($self->cds) {
+               $self->cds(new Bio::Location::Simple(-start => $value[-1]->start,
+                                                    -end   => $value[0]->end,
+                                                    -strand=> $value[0]->strand,
+                                                    -seq_id=> $value[0]->seq_id,
+                                                   )
+                         );
+           }
+       } else {               #undef or forward strand
+	   @value = map { $_->[0] }
+	               sort { $a->[1] <=> $b->[1] }
+                       map { [ $_, $_->start] }
+                       @value;
+           unless ($self->cds) {
+               $self->cds(new Bio::Location::Simple(-start => $value[0]->start,
+                                                    -end   => $value[-1]->end,
+                                                    -strand=> $value[0]->strand,
+                                                    -seq_id=> $value[0]->seq_id,
+                                                   )
+                         );
+           }
+
+       }
+
+       $self->{'_chr_exons'} = \@value;
+
+       # transform exons from chromosome to gene coordinates
+       # but only if gene coordinate system has been set
+       my @exons ;
+       #my $gene_mapper = $self->$COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
+       my $gene_mapper = "1-2";
+       if (defined $self->{'_mappers'}->{$gene_mapper} ) {
+
+	   my $tmp_in = $self->{'_in'};
+	   my $tmp_out = $self->{'_out'};
+	   my $tmp_verb = $self->verbose;
+	   $self->verbose(0);
+
+	   $self->in('chr');
+	   $self->out('gene');
+
+	   @exons = map {$self->map($_)} @value;
+
+	   $self->{'_in'} = ($tmp_in);
+	   $self->{'_out'} = ($tmp_out);
+	   $self->verbose($tmp_verb);
+       } else {
+	   @exons = @value;
+       }
+
+       my $cds_map = Bio::Coordinate::Collection->new;
+       my $inex_map = Bio::Coordinate::Collection->new;
+       my $exon_map = Bio::Coordinate::Collection->new;
+       my $exon_cds_map = Bio::Coordinate::Collection->new;
+       my $intron_map = Bio::Coordinate::Collection->new;
+       my $negative_intron_map = Bio::Coordinate::Collection->new;
+
+       my $tr_end = 0;
+       my $coffset;
+       my $exon_counter;
+       my $prev_exon_end;
+
+       for my $exon ( @exons ) {
+
+	   $exon_counter++;
+
+	   #
+	   # gene -> cds
+	   #
+
+	   my $match1 = Bio::Location::Simple->new
+	       (-seq_id =>'gene' ,
+		-start => $exon->start,
+		-end => $exon->end, -strand=>1 );
+	   my $match2 = Bio::Location::Simple->new
+	       (-seq_id => 'cds',
+		-start => $tr_end + 1,
+		-end => $tr_end + $exon->end - $exon->start +1,
+		-strand=>$exon->strand );
+
+	   $cds_map->add_mapper(Bio::Coordinate::Pair->new
+                                (-in => $match1,
+                                 -out => $match2,
+                                )
+                               );
+
+	   if ($exon->start <= 1 and $exon->end >= 1) {
+	       $coffset = $tr_end - $exon->start + 1;
+	   }
+	   $tr_end = $tr_end  + $exon->end - $exon->start + 1;
+
+	   #
+	   # gene -> intron
+	   #
+
+	   if (defined $prev_exon_end) {
+	       my $match3 = Bio::Location::Simple->new
+		   (-seq_id =>'gene',
+		    -start => $prev_exon_end + 1,
+		    -end => $exon->start -1, -strand=>$exon->strand );
+
+	       my $match4 = Bio::Location::Simple->new
+		   (-seq_id => 'intron'. ($exon_counter -1),
+		    -start => 1,
+		    -end => $exon->start - 1 - $prev_exon_end,
+		    -strand=>$exon->strand );
+
+	       # negative intron coordinates
+	       my $match5 = Bio::Location::Simple->new
+		   (-seq_id => 'intron'. ($exon_counter -1),
+		    -start => -1 * ($exon->start - 2 - $prev_exon_end) -1,
+		    -end => -1,
+		    -strand=>$exon->strand );
+
+	       $inex_map->add_mapper(Bio::Coordinate::Pair->new
+                                     (-in => $match3,
+                                      -out => $match4
+                                     )
+                                    );
+	       $intron_map->add_mapper(Bio::Coordinate::Pair->new
+                                       (-in => $self->_clone_loc($match3),
+                                        -out => $self->_clone_loc($match4)
+                                       )
+                                      );
+	       $negative_intron_map->add_mapper(Bio::Coordinate::Pair->new
+                                                (-in => $self->_clone_loc($match4),
+                                                 -out => $match5
+                                                ));
+
+	   }
+
+	   # store the value
+	   $prev_exon_end = $exon->end;
+
+	   #
+	   # gene -> exon
+	   #
+	   my $match6 = Bio::Location::Simple->new
+	       (-seq_id => 'exon'. $exon_counter,
+		-start => 1,
+		-end => $exon->end - $exon->start +1,
+		-strand=> $exon->strand );
+
+	   my $pair2 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match1),
+						  -out => $match6
+						 );
+	   my $pair3 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match6),
+						  -out => $self->_clone_loc($match2)
+						 );
+	   $inex_map->add_mapper(Bio::Coordinate::Pair->new
+                                 (-in => $self->_clone_loc($match1),
+                                  -out => $match6
+                                 )
+                                );
+	   $exon_map->add_mapper(Bio::Coordinate::Pair->new
+                                 (-in => $self->_clone_loc($match1),
+                                  -out => $self->_clone_loc($match6)
+                                 )
+                                );
+           $exon_cds_map->add_mapper(Bio::Coordinate::Pair->new
+                                     (-in => $self->_clone_loc($match6),
+                                      -out => $self->_clone_loc($match2)
+                                     )
+                                    );
+
+       }
+
+       # move coordinate start if exons have negative values
+       if ($coffset) {
+	   foreach my $m ($cds_map->each_mapper) {
+	       $m->out->start($m->out->start - $coffset);
+	       $m->out->end($m->out->end - $coffset);
+	   }
+
+       }
+
+       $self->{'_mappers'}->{$cds_mapper} = $cds_map;
+       $self->{'_mappers'}->{$exon_cds_mapper} = $exon_cds_map;
+       $self->{'_mappers'}->{$inex_mapper} = $inex_map;
+       $self->{'_mappers'}->{$exon_mapper} = $exon_map;
+       $self->{'_mappers'}->{$intron_mapper} = $intron_map;
+       $self->{'_mappers'}->{$negative_intron_mapper} = $negative_intron_map;
+   }
+   return  @{$self->{'_chr_exons'}}  || 0;
+}
+
+=head2 _clone_loc
+
+ Title   : _clone_loc
+ Usage   : $copy_of_loc = $obj->_clone_loc($loc);
+ Function: Make a deep copy of a simple location
+ Returns : a Bio::Location::Simple object
+ Args    : a Bio::Location::Simple object to be cloned
+
+=cut
+
+
+sub _clone_loc { # clone a simple location
+   my ($self,$loc) = @_;
+
+   $self->throw("I need a Bio::Location::Simple , not [". ref $loc. "]")
+       unless $loc->isa('Bio::Location::Simple');
+
+   return  Bio::Location::Simple->new
+       (-seq_id => $loc->seq_id,
+        -start => $loc->start,
+        -end => $loc->end,
+        -strand=> $loc->strand,
+        -location_type => $loc->location_type
+       );
+}
+
+
+=head2 cds
+
+ Title   : cds
+ Usage   : $obj->cds(20);
+ Function: Set and read the offset of CDS from the start of transcipt
+
+           Simple input can be an integer which gives the start of the
+           coding region in genomic coordinate. If you want to provide
+           the end of the coding region or indicate the use of the
+           opposite strand, you have to pass a Bio::RangeI
+           (e.g. Bio::Location::Simple or Bio::SegFeature::Generic)
+           object to this method.
+
+ Returns : set value or 0
+ Args    : new value (optional)
+
+=cut
+
+sub cds {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if ($value =~ /^[+-]?\d+$/ ) {
+	   my $loc = Bio::Location::Simple->new(-start=>$value);
+	   $self->{'_cds'} = $loc;
+       }
+       elsif (ref $value &&  $value->isa('Bio::RangeI') ) {
+	   $self->{'_cds'} = $value;
+       } else {
+	   $self->throw("I need an integer or Bio::RangeI, not [$value]")
+       }
+       # strand !!
+       my $len;
+
+       $len = $self->{'_cds'}->end - $self->{'_cds'}->start +1
+	   if defined $self->{'_cds'}->end;
+
+       my $a = $self->_create_pair
+	   ('chr', 'gene', 0,
+	    $self->{'_cds'}->start-1,
+	    $len,
+	    $self->{'_cds'}->strand);
+       my $mapper =  $COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
+       $self->{'_mappers'}->{$mapper} = $a;
+
+       # recalculate exon-based mappers
+       if ( defined $self->{'_chr_exons'} ) {
+	   $self->exons(@{$self->{'_chr_exons'}});
+       }
+
+   }
+   return $self->{'_cds'} || 0;
+}
+
+
+=head2 map
+
+ Title   : map
+ Usage   : $newpos = $obj->map(5);
+ Function: Map the location from the input coordinate system
+           to a new value in the output coordinate system.
+ Example :
+ Returns : new value in the output coordiante system
+ Args    : a Bio::Location::Simple
+
+=cut
+
+sub map {
+   my ($self,$value) = @_;
+   my ($res);
+
+   $self->throw("Need to pass me a Bio::Location::Simple or ".
+                "Bio::Location::Simple or Bio::SeqFeatureI, not [".
+		ref($value). "]")
+       unless ref($value) && ($value->isa('Bio::Location::Simple') or
+                              $value->isa('Bio::Location::SplitLocationI') or
+			      $value->isa('Bio::SeqFeatureI'));
+   $self->throw("Input coordinate system not set")
+       unless $self->{'_in'};
+   $self->throw("Output coordinate system not set")
+       unless $self->{'_out'};
+   $self->throw("Do not be silly. Input and output coordinate ".
+		"systems are the same!")
+       unless $self->{'_in'} != $self->{'_out'};
+
+   $self->_check_direction();
+
+   $value = $value->location if $value->isa('Bio::SeqFeatureI');
+   print STDERR "=== Start location: ". $value->start. ",".
+       $value->end. " (". $value->strand. ")\n" if $self->verbose > 0;
+
+
+   # if nozero coordinate system is used in the input values
+   if ( defined $self->{'_nozero'} &&
+	( $self->{'_nozero'} == 1 || $self->{'_nozero'} == 3 ) ) {
+       $value->start($value->start + 1)
+	   if defined $value->start && $value->start < 1;
+       $value->end($value->end + 1)
+	   if defined $value->end && $value->end < 1;
+   }
+
+   my @steps = $self->_get_path();
+   print "mapping ", $self->{'_in'}, "->", $self->{'_out'},
+       "  Mappers: ", join(", ", @steps), "\n"  if $self->verbose > 0;
+
+   foreach my $mapper (@steps) {
+       if ($mapper eq $TRANSLATION) {
+	   if ($self->direction == 1) {
+	       $value = $self->_translate($value);
+	       print STDERR "+   $TRANSLATION cds -> propeptide (translate) \n"
+		   if $self->verbose > 0;
+	   } else {
+	       $value = $self->_reverse_translate($value);
+	       print STDERR "+   $TRANSLATION propeptide -> cds (reverse translate) \n"
+		   if $self->verbose > 0;
+	   }
+       }
+       # keep the start and end values, and go on to next iteration
+       #  if this mapper is not set
+       elsif ( ! defined $self->{'_mappers'}->{$mapper} ) {
+	   # update mapper name
+	   $mapper =~ /\d+-(\d+)/;   my ($counter) = $1;
+	   $value->seq_id($COORDINATE_INTS{$counter});
+	   print STDERR "-   $mapper\n" if $self->verbose > 0;
+       } else {
+
+           #
+	   # the DEFAULT : generic mapping
+           #
+	   $value = $self->{'_mappers'}->{$mapper}->map($value);
+           $value->purge_gaps
+              if ($value && $value->isa('Bio::Location::SplitLocationI') && $value->can('gap'));
+	   print STDERR "+  $mapper (", $self->direction, "):  start ",
+               $value->start, " end ", $value->end, "\n"
+	       if $value && $self->verbose > 0;
+       }
+   }
+
+   # if nozero coordinate system is asked to be used in the output values
+   if ( defined $value && defined $self->{'_nozero'} &&
+	( $self->{'_nozero'} == 2 || $self->{'_nozero'} == 3 ) ) {
+
+       $value->start($value->start - 1)
+	   if defined $value->start && $value->start < 1;
+       $value->end($value->end - 1)
+	   if defined $value->end && $value->end < 1;
+   }
+
+   # handle merging of adjacent split locations!
+
+   if (ref $value eq "Bio::Coordinate::Result" && $value->each_match > 1 ) {
+       my $prevloc;
+       my $merging = 0;
+       my $newvalue;
+       my @matches;
+       foreach my $loc ( $value->each_Location(1) ) {
+           unless ($prevloc) {
+               $prevloc = $loc;
+               push @matches, $prevloc;
+               next;
+           }
+           if ($prevloc->end == ($loc->start - 1) && $prevloc->seq_id eq $loc->seq_id) {
+               $prevloc->end($loc->end);
+               $merging = 1;
+           } else {
+               push @matches, $loc;
+               $prevloc = $loc;
+           }
+       }
+       if ($merging) {
+           if (@matches > 1 ) {
+               $newvalue = Bio::Coordinate::Result->new;
+               map {$newvalue->add_sub_Location} @matches;
+           } else {
+               $newvalue = Bio::Coordinate::Result::Match->new
+                   (-seq_id => $matches[0]->seq_id,
+                    -start => $matches[0]->start,
+                    -end => $matches[0]->end,
+                    -strand=> $matches[0]->strand );
+           }
+           $value = $newvalue;
+       }
+   } 
+   elsif (ref $value eq "Bio::Coordinate::Result" && $value->each_match == 1 ){
+       $value = $value->match;
+   }
+
+
+   return $value;
+}
+
+=head2 direction
+
+ Title   : direction
+ Usage   : $obj->direction('peptide');
+ Function: Read-only method for the direction of mapping deduced from
+           predefined input and output coordinate names.
+ Example :
+ Returns : 1 or -1, mapping direction
+ Args    : new value (optional)
+
+=cut
+
+sub direction {
+   my ($self) = @_;
+   return $self->{'_direction'};
+}
+
+
+=head2 swap
+
+ Title   : swap
+ Usage   : $obj->swap;
+ Function: Swap the direction of transformation
+           (input <-> output)
+ Example :
+ Returns : 1
+ Args    :
+
+=cut
+
+sub swap {
+   my ($self,$value) = @_;
+
+   ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'});
+   map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
+
+   # record the changed direction;
+   $self->{_direction} *= -1;
+
+   return 1;
+}
+
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : $newpos = $obj->to_string(5);
+ Function: Dump the internal mapper values into a human readable format
+ Example :
+ Returns : string
+ Args    :
+
+=cut
+
+sub to_string {
+   my ($self) = shift;
+
+   print "-" x 40, "\n";
+
+   # chr-gene
+   my $mapper_str = 'chr-gene';
+   my $mapper = $self->_mapper_string2code($mapper_str);
+
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+   if (defined $self->cds) {
+       my $end = $self->cds->end -1 if defined $self->cds->end;
+       printf "%16s%s: %s (%s)\n", ' ', 'gene offset', $self->cds->start-1 , $end || '';
+       printf "%16s%s: %s\n", ' ', 'gene strand', $self->cds->strand || 0;
+   }
+
+   # gene-intron
+   $mapper_str = 'gene-intron';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+
+   my $i = 1;
+   foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
+       printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
+       printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
+       $i++;
+   }
+
+   # intron-negative_intron
+   $mapper_str = 'intron-negative_intron';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+
+   $i = 1;
+   foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
+       printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
+       printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
+       $i++;
+   }
+
+
+   # gene-exon
+   $mapper_str = 'gene-exon';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+
+   $i = 1;
+   foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
+       printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
+       printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
+       $i++;
+   }
+
+
+   # gene-cds
+   $mapper_str = 'gene-cds';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+
+   $i = 1;
+   foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
+       printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
+       printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
+       $i++;
+   }
+
+   # cds-propeptide
+   $mapper_str = 'cds-propeptide';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+   printf "%9s%-12s\n", "", '"translate"';
+
+
+   # propeptide-peptide
+   $mapper_str = 'propeptide-peptide';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+   printf "%16s%s: %s\n", ' ', "peptide offset", $self->peptide_offset;
+
+
+
+   print "\nin : ", $self->in, "\n";
+   print "out: ", $self->out, "\n";
+   my $dir;
+   $self->direction ? ($dir='forward') : ($dir='reverse');
+   printf "direction: %-8s(%s)\n",  $dir, $self->direction;
+   print "\n", "-" x 40, "\n";
+
+   1;
+}
+
+sub _mapper_code2string {
+    my ($self, $code) = @_;
+    my ($a, $b) = $code =~ /(\d+)-(\d+)/;
+    return $COORDINATE_INTS{$a}. '-'.  $COORDINATE_INTS{$b};
+
+}
+
+sub _mapper_string2code {
+    my ($self, $string) =@_;
+    my ($a, $b) = $string =~ /([^-]+)-(.*)/;
+    return $COORDINATE_SYSTEMS{$a}. '-'.  $COORDINATE_SYSTEMS{$b};
+}
+
+
+=head2 _create_pair
+
+ Title   : _create_pair
+ Usage   : $mapper = $obj->_create_pair('chr', 'gene', 0, 2555, 10000, -1);
+ Function: Internal helper method to create a mapper between
+           two coordinate systems
+ Returns : a Bio::Coordinate::Pair object
+ Args    : string, input coordinate system name,
+           string, output coordinate system name,
+           boolean, strict mapping
+           positive integer, offset
+           positive integer, length
+           1 || -1 , strand
+
+=cut
+
+sub _create_pair {
+   my ($self, $in, $out, $strict, $offset, $length, $strand ) = @_;
+   $strict ||= 0;
+   $strand ||= 1;
+   $length ||= 20;
+
+   my $match1 = Bio::Location::Simple->new
+       (-seq_id => $in,
+	-start => $offset+1,
+	-end => $offset+$length, -strand=>1 );
+
+   my $match2 = Bio::Location::Simple->new
+       (-seq_id => $out,
+	-start => 1,
+	-end => $length, -strand=>$strand );
+
+   my $pair = Bio::Coordinate::ExtrapolatingPair->new
+       (-in => $match1,
+        -out => $match2,
+        -strict => $strict
+       );
+
+   return $pair;
+
+}
+
+
+=head2 _translate
+
+ Title   : _translate
+ Usage   : $newpos = $obj->_translate($loc);
+ Function: Translate the location from the CDS coordinate system
+           to a new value in the propeptide coordinate system.
+ Example :
+ Returns : new location
+ Args    : a Bio::Location::Simple or Bio::Location::SplitLocationI
+
+=cut
+
+sub _translate {
+   my ($self,$value) = @_;
+
+   $self->throw("Need to pass me a Bio::Location::Simple or ".
+                "Bio::Location::SplitLocationI, not [". ref($value). "]")
+       unless defined $value &&
+           ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
+
+   my $seqid = 'propeptide';
+
+   if ($value->isa("Bio::Location::SplitLocationI")) {
+       my $split = new Bio::Location::Split(-seq_id=>$seqid);
+       foreach my $loc ( $value->each_Location(1) ) {
+
+           my $match = new Bio::Location::Simple(-start => int($loc->start / 3 )+1,
+                                                 -end => int($loc->end / 3 )+1,
+                                                 -seq_id => $seqid,
+                                                 -strand => 1
+                                                );
+           $split->add_sub_Location($match);
+       }
+       return $split;
+
+   } else {
+       return new Bio::Location::Simple(-start => int($value->start / 3 )+1,
+                                        -end => int($value->end / 3 )+1,
+                                        -seq_id => $seqid,
+                                        -strand => 1
+                                       );
+   }
+}
+
+sub _frame {
+   my ($self,$value) = @_;
+
+   $self->throw("Need to pass me a Bio::Location::Simple or ".
+                "Bio::Location::SplitLocationI, not [". ref($value). "]")
+       unless defined $value &&
+           ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
+
+   my $seqid = 'propeptide';
+
+   if ($value->isa("Bio::Location::SplitLocationI")) {
+       my $split = new Bio::Location::Split(-seq_id=>$seqid);
+       foreach my $loc ( $value->each_Location(1) ) {
+
+           my $match = new Bio::Location::Simple(-start => ($value->start-1) % 3 +1,
+                                                 -end => ($value->end-1) % 3 +1,
+                                                 -seq_id => 'frame',
+                                                 -strand => 1
+                                       );
+           $split->add_sub_Location($match);
+       }
+       return $split;
+   } else {
+       return new Bio::Location::Simple(-start => ($value->start-1) % 3 +1,
+                                        -end => ($value->end-1) % 3 +1,
+                                        -seq_id => 'frame',
+                                        -strand => 1
+                                       );
+   }
+}
+
+
+=head2 _reverse_translate
+
+ Title   : _reverse_translate
+ Usage   : $newpos = $obj->_reverse_translate(5);
+ Function: Reverse translate the location from the propeptide
+           coordinate system to a new value in the CSD.
+           Note that a single peptide location expands to cover
+           the codon triplet
+ Example :
+ Returns : new location in the CDS coordinate system
+ Args    : a Bio::Location::Simple or Bio::Location::SplitLocationI
+
+=cut
+
+sub _reverse_translate {
+   my ($self,$value) = @_;
+
+
+   $self->throw("Need to pass me a Bio::Location::Simple or ".
+                "Bio::Location::SplitLocationI, not [". ref($value). "]")
+       unless defined $value &&
+           ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
+
+   my $seqid = 'cds';
+
+   if ($value->isa("Bio::Location::SplitLocationI")) {
+       my $split = new Bio::Location::Split(-seq_id=>$seqid);
+       foreach my $loc ( $value->each_Location(1) ) {
+
+           my $match = new Bio::Location::Simple(-start => $value->start * 3 - 2,
+                                               -end => $value->end * 3,
+                                               -seq_id => $seqid,
+                                               -strand => 1
+                                              );
+           $split->add_sub_Location($match);
+       }
+       return $split;
+
+   } else {
+       return new Bio::Location::Simple(-start => $value->start * 3 - 2,
+                                        -end => $value->end * 3,
+                                        -seq_id => $seqid,
+                                        -strand => 1
+                                       );
+   }
+}
+
+
+=head2 _check_direction
+
+ Title   : _check_direction
+ Usage   : $obj->_check_direction();
+ Function: Check and swap when needed the direction the location
+           mapping Pairs based on input and output values
+ Example :
+ Returns : new location
+ Args    : a Bio::Location::Simple
+
+=cut
+
+sub _check_direction {
+   my ($self) = @_;
+
+   my $new_direction = 1;
+   $new_direction = -1 if $self->{'_in'} > $self->{'_out'};
+
+   unless ($new_direction == $self->{_direction} ) {
+       map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
+       # record the changed direction;
+       $self->{_direction} *= -1;
+   }
+   1;
+}
+
+
+=head2 _get_path
+
+ Title   : _get_path
+ Usage   : $obj->_get_path('peptide');
+ Function: internal method for finding that shortest path between
+           input and output coordinate systems.
+           Calculations and caching are handled by the graph class.
+           See L<Bio::Coordinate::Graph>.
+ Example :
+ Returns : array of the mappers
+ Args    : none
+
+=cut
+
+sub _get_path {
+   my ($self) = @_;
+
+   my $start = $self->{'_in'} || 0;
+   my $end = $self->{'_out'} || 0;
+
+   # note the order
+   # always go from smaller to bigger: it  makes caching more efficient
+   my $reverse;
+   if ($start > $end) {
+       ($start, $end) = ($end, $start );
+       $reverse++;
+   }
+
+   my @mappers;
+   if (exists $self->{'_previous_path'} and
+       $self->{'_previous_path'} eq "$start$end" ) {
+       # use cache
+       @mappers = @{$self->{'_mapper_path'}};
+   } else {
+       my $mapper;
+       my $prev_node = '';
+       @mappers =
+	   map { $mapper = "$prev_node-$_"; $prev_node = $_; $mapper; }
+	       $self->{'_graph'}->shortest_path($start, $end);
+       shift @mappers;
+
+       $self->{'_previous_path'} = "$start$end";
+       $self->{'_mapper_path'} = \@mappers;
+   }
+
+   $reverse ? return reverse @mappers : return @mappers;
+}
+
+
+1;
+