diff variant_effect_predictor/Bio/EnsEMBL/SeqFeature.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/SeqFeature.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1255 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::SeqFeature - Ensembl specific sequence feature.
+
+=head1 DESCRIPTION
+
+Do not use this module if you can avoid it. It has been replaced by
+Bio::EnsEMBL::Feature.  This module has a long history of usage but has
+become very bloated, and quite unweildy.  It was decided to replace
+it completely with a smaller, light-weight feature class rather than
+attempting to refactor this class, and maintain strict backwards
+compatibility.
+
+Part of the complexity of this class was in its extensive
+inheritance. As an example the following is a simplified inheritance
+heirarchy that was present for Bio::EnsEMBL::DnaAlignFeature:
+
+  Bio::EnsEMBL::DnaAlignFeature
+  Bio::EnsEMBL::BaseAlignFeature
+  Bio::EnsEMBL::FeaturePair
+  Bio::EnsEMBL::SeqFeature
+  Bio::EnsEMBL::SeqFeatureI
+  Bio::SeqFeatureI
+  Bio::RangeI
+  Bio::Root::RootI
+
+The new Bio::EnsEMBL::Feature class is much shorter, and hopefully much
+easier to understand and maintain.
+
+=head1 METHODS
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::EnsEMBL::SeqFeature;
+
+use vars qw(@ISA);
+use strict;
+
+
+use Bio::EnsEMBL::SeqFeatureI;
+use Bio::EnsEMBL::Analysis;
+use Bio::EnsEMBL::Root;
+
+@ISA = qw(Bio::EnsEMBL::Root Bio::EnsEMBL::SeqFeatureI);
+
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+
+sub new {
+  my($caller,@args) = @_;
+
+  my $self = {};
+
+  if(ref $caller) {
+    bless $self, ref $caller;
+  } else {
+    bless $self, $caller;
+  }
+
+  $self->{'_gsf_tag_hash'} = {};
+  $self->{'_gsf_sub_array'} = [];
+  $self->{'_parse_h'} = {};
+  $self->{'_is_splittable'} = 0;
+
+  my ($start,$end,$strand,$frame,$score,$analysis,$seqname, $source_tag,
+      $primary_tag, $percent_id, $p_value, $phase, $end_phase) =
+
+      &rearrange([qw(START
+                            END
+                            STRAND
+                            FRAME
+                            SCORE
+                            ANALYSIS
+                            SEQNAME
+                            SOURCE_TAG
+                            PRIMARY_TAG
+                            PERCENT_ID
+                            P_VALUE
+                            PHASE
+                            END_PHASE
+                            )],@args);
+
+  #  $gff_string && $self->_from_gff_string($gff_string);
+
+  if ( defined $analysis  && $analysis ne "")   { $self->analysis($analysis)};
+  if ( defined ($start) && $start ne "" )       { $self->start($start)};
+  if ( defined ($end )  && $end   ne "" )       { $self->end($end)}
+  if ( defined $strand  && $strand ne "")       { $self->strand($strand)}
+  if ( defined $frame  && $frame ne "")         { $self->frame($frame)}
+  if ( defined $score  && $score ne "")         { $self->score($score)}
+  if ( defined $seqname && $seqname ne "")      { $self->seqname($seqname)};
+  if ( defined $percent_id && $percent_id ne ""){ $self->percent_id($percent_id)};
+  if ( defined $p_value && $p_value ne "")      { $self->p_value($p_value)};
+  if ( defined $phase && $phase ne "")          { $self->phase($phase)};
+  if ( defined $end_phase && $end_phase ne "")  { $self->end_phase($end_phase)};
+
+  return $self; # success - we hope!
+
+}
+
+
+
+
+
+
+=head2 start
+
+ Title   : start
+ Usage   : $start = $feat->start
+           $feat->start(20)
+ Function: Get/set on the start coordinate of the feature
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub start{
+    my ($self,$value) = @_;
+
+    if (defined($value)) {
+        if ($value !~ /^\-?\d+/ ) {
+        $self->throw("$value is not a valid start");
+    }
+	
+    $self->{'_gsf_start'} = $value
+   }
+
+    return $self->{'_gsf_start'};
+
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $end = $feat->end
+           $feat->end($end)
+ Function: get/set on the end coordinate of the feature
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub end{
+    my ($self,$value) = @_;
+
+    if (defined($value)) {
+        if( $value !~ /^\-?\d+/ ) {
+            $self->throw("[$value] is not a valid end");
+        }
+	
+        $self->{'_gsf_end'} = $value;
+    }
+
+   return $self->{'_gsf_end'};
+}
+
+=head2 length
+
+ Title   : length
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub length{
+   my ($self,@args) = @_;
+
+   return $self->end - $self->start +1;
+}
+
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $strand = $feat->strand()
+           $feat->strand($strand)
+ Function: get/set on strand information, being 1,-1 or 0
+ Returns : -1,1 or 0
+ Args    : none
+
+
+=cut
+
+sub strand {
+    my ($self,$value) = @_;
+
+    if (defined($value)) {
+        if( $value eq '+' ) { $value = 1; }
+        if( $value eq '-' ) { $value = -1; }
+        if( $value eq '.' ) { $value = 0; }
+
+        if( $value != -1 && $value != 1 && $value != 0 ) {
+            $self->throw("$value is not a valid strand info");
+        }
+        $self->{'_gsf_strand'} = $value;
+    }
+
+    return $self->{'_gsf_strand'};
+}
+
+
+=head2 move
+
+  Arg [1]    : int $start
+  Arg [2]    : int $end
+  Arg [3]    : (optional) int $strand 
+  Example    : $feature->move(100, 200, -1);
+  Description: Moves a feature to a different location.  This is faster
+               then calling 3 seperate accesors in a large loop.
+  Returntype : none
+  Exceptions : none
+  Caller     : BaseFeatureAdaptor
+
+=cut
+
+sub move {
+  my ($self, $start, $end, $strand)  = @_;
+
+  $self->{'_gsf_start'} = $start;
+  $self->{'_gsf_end'} = $end;
+  if(defined $strand) {
+    $self->{'_gsf_strand'} = $strand;
+  }
+}
+
+
+=head2 score
+
+ Title   : score
+ Usage   : $score = $feat->score()
+           $feat->score($score)
+ Function: get/set on score information
+ Returns : float
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub score {
+    my ($self,$value) = @_;
+
+    if(defined ($value) ) {
+      if( $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ ) {
+          $self->throw("'$value' is not a valid score");
+      }
+      $self->{'_gsf_score'} = $value;
+  }
+
+  return $self->{'_gsf_score'};
+}
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $frame = $feat->frame()
+           $feat->frame($frame)
+ Function: get/set on frame information
+ Returns : 0,1,2
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub frame {
+    my ($self,$value) = @_;
+
+    if (defined($value)) {
+        if( $value != 1 && $value != 2 && $value != 3 ) {
+            $self->throw("'$value' is not a valid frame");
+       }
+        $self->{'_gsf_frame'} = $value;
+    }
+
+    return $self->{'_gsf_frame'};
+}
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $tag = $feat->primary_tag()
+           $feat->primary_tag('exon')
+ Function: get/set on the primary tag for a feature,
+           eg 'exon'
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub primary_tag{
+  my ($self,$arg) = @_;
+  
+  if (defined($arg)) {
+    # throw warnings about setting primary tag
+    my ($p,$f,$l) = caller;
+    $self->warn("$f:$l setting primary_tag now deprecated." .
+		"Primary tag is delegated to analysis object");
+  }
+
+  unless($self->analysis) {
+    return '';
+  }
+  
+  return $self->analysis->gff_feature();
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $tag = $feat->source_tag()
+           $feat->source_tag('genscan');
+ Function: Returns the source tag for a feature,
+           eg, 'genscan'
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub source_tag{
+    my ($self,$arg) = @_;
+
+    if (defined($arg)) {
+        # throw warnings about setting primary tag
+        my ($p,$f,$l) = caller;
+        $self->warn("$f:$l setting source_tag now deprecated. " .
+		    "Source tag is delegated to analysis object");
+    }
+
+    unless($self->analysis) {
+      return "";
+    }
+
+   return $self->analysis->gff_source();
+}
+
+
+=head2 analysis
+
+ Title   : analysis
+ Usage   : $sf->analysis();
+ Function: Store details of the program/database
+           and versions used to create this feature.
+
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub analysis {
+   my ($self,$value) = @_;
+
+   if (defined($value)) {
+     unless(ref($value) && $value->isa('Bio::EnsEMBL::Analysis')) {
+       $self->throw("Analysis is not a Bio::EnsEMBL::Analysis object "
+		    . "but a $value object");
+     }
+
+     $self->{_analysis} = $value;
+   } else {
+     #if _analysis is not defined, create a new analysis object
+     unless(defined $self->{_analysis}) {
+       $self->{_analysis} = new Bio::EnsEMBL::Analysis();
+     }
+   }
+
+   return $self->{_analysis};
+}
+
+=head2 validate
+
+ Title   : validate
+ Usage   : $sf->validate;
+ Function: Checks whether all the data is present in the
+           object.
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub validate {
+    my ($self) = @_;
+
+    $self->vthrow("Seqname not defined in feature")     unless defined($self->seqname);
+    $self->vthrow("start not defined in feature")       unless defined($self->start);
+    $self->vthrow("end not defined in feature")         unless defined($self->end);
+    $self->vthrow("strand not defined in feature")      unless defined($self->strand);
+    $self->vthrow("score not defined in feature")       unless defined($self->score);
+    $self->vthrow("analysis not defined in feature")    unless defined($self->analysis);
+
+    if ($self->end < $self->start) {
+      $self->vthrow("End coordinate < start coordinate");
+    }
+
+}
+
+
+
+sub vthrow {
+    my ($self,$message) = @_;
+
+    print(STDERR "Error validating feature [$message]\n");
+    print(STDERR "   Seqname     : [" . $self->{_seqname} . "]\n");
+    print(STDERR "   Start       : [" . $self->{_gsf_start} . "]\n");
+    print(STDERR "   End         : [" . $self->{_gsf_end} . "]\n");
+    print(STDERR "   Strand      : [" .
+        ((defined ($self->{_gsf_strand})) ? $self->{_gsf_strand} : "undefined") . "]\n");
+
+    print(STDERR "   Score       : [" . $self->{_gsf_score} . "]\n");
+
+    print(STDERR "   Analysis    : [" . $self->{_analysis}->dbID . "]\n");
+
+    $self->throw("Invalid feature - see dump on STDERR");
+}
+
+
+=head2 validate_prot_feature
+
+ Title   : validate_prot_feature
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+# Shouldn't this go as "validate" into Pro_SeqFeature?
+sub validate_prot_feature{
+    my ($self,$num) = @_;
+    $self->throw("Seqname not defined in feature")     unless defined($self->seqname);
+    $self->throw("start not defined in feature")       unless defined($self->start);
+    $self->throw("end not defined in feature")         unless defined($self->end);
+    if ($num == 1) {
+        $self->throw("score not defined in feature")       unless defined($self->score);
+        $self->throw("percent_id not defined in feature") unless defined($self->percent_id);
+        $self->throw("evalue not defined in feature") unless defined($self->p_value);
+    }
+    $self->throw("analysis not defined in feature")    unless defined($self->analysis);
+}
+
+
+
+# These methods are specified in the SeqFeatureI interface but we don't want
+# people to store data in them.  These are just here in order to keep
+# existing code working
+
+
+=head2 has_tag
+
+ Title   : has_tag
+ Usage   : $value = $self->has_tag('some_tag')
+ Function: Returns the value of the tag (undef if
+           none)
+ Returns :
+ Args    :
+
+
+=cut
+
+sub has_tag{
+   my ($self,$tag) = (shift, shift);
+
+   return exists $self->{'_gsf_tag_hash'}->{$tag};
+}
+
+=head2 add_tag_value
+
+ Title   : add_tag_value
+ Usage   : $self->add_tag_value('note',"this is a note");
+ Returns : nothing
+ Args    : tag (string) and value (any scalar)
+
+
+=cut
+
+sub add_tag_value{
+   my ($self,$tag,$value) = @_;
+
+   if( !defined $self->{'_gsf_tag_hash'}->{$tag} ) {
+       $self->{'_gsf_tag_hash'}->{$tag} = [];
+   }
+
+   push(@{$self->{'_gsf_tag_hash'}->{$tag}},$value);
+}
+
+=head2 each_tag_value
+
+ Title   : each_tag_value
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub each_tag_value {
+   my ($self,$tag) = @_;
+   if( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
+       $self->throw("asking for tag value that does not exist $tag");
+   }
+
+   return @{$self->{'_gsf_tag_hash'}->{$tag}};
+}
+
+
+=head2 all_tags
+
+ Title   : all_tags
+ Usage   : @tags = $feat->all_tags()
+ Function: gives all tags for this feature
+ Returns : an array of strings
+ Args    : none
+
+
+=cut
+
+sub all_tags{
+   my ($self,@args) = @_;
+
+   return keys %{$self->{'_gsf_tag_hash'}};
+}
+
+
+
+=head2 seqname
+
+  Arg [1]    : string $seqname
+  Example    : $seqname = $self->seqname();
+  Description: Obtains the seqname of this features sequence.  This is set
+               automatically when a sequence with a name is attached, or may
+               be set manually.
+  Returntype : string
+  Exceptions : none
+  Caller     : general, attach_seq
+
+=cut
+
+sub seqname{
+   my ($self,$seqname) = @_;
+
+   my $seq = $self->contig();
+
+   if(defined $seqname) {
+     $self->{_seqname} = $seqname;
+   } else {
+     if($seq && ref $seq && $seq->can('name')) {
+       $self->{_seqname} = $seq->name();
+     } 
+   }
+
+   return $self->{_seqname};
+}
+
+
+=head2 attach_seq
+
+ Title   : attach_seq
+ Usage   : $sf->attach_seq($seq)
+ Function: Attaches a Bio::PrimarySeqI object to this feature. This
+           Bio::PrimarySeqI object is for the *entire* sequence: ie
+           from 1 to 10000
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub attach_seq{
+   my ($self, $seq) = @_;
+
+   $self->contig($seq);
+}
+
+=head2 seq
+
+ Example : $tseq = $sf->seq()
+ Function: returns the sequence (if any ) for this feature truncated to the range spanning the feature
+ Returns : a Bio::PrimarySeq object (I reckon)
+
+=cut
+
+sub seq{
+   my ($self,$arg) = @_;
+
+   if( defined $arg ) {
+       $self->throw("Calling SeqFeature::Generic->seq with an argument. " .
+		    "You probably want attach_seq");
+   }
+
+   if( ! exists $self->{'_gsf_seq'} ) {
+       return undef;
+   }
+
+   # assumming our seq object is sensible, it should not have to yank
+   # the entire sequence out here.
+
+   my $seq = $self->{'_gsf_seq'}->trunc($self->start(),$self->end());
+
+
+   if( $self->strand == -1 ) {
+
+       # ok. this does not work well (?)
+       #print STDERR "Before revcom", $seq->str, "\n";
+       $seq = $seq->revcom;
+       #print STDERR "After  revcom", $seq->str, "\n";
+   }
+
+   return $seq;
+}
+
+=head2 entire_seq
+
+ Title   : entire_seq
+ Usage   : $whole_seq = $sf->entire_seq()
+ Function: gives the entire sequence that this seqfeature is attached to
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub entire_seq{
+   my ($self) = @_;
+
+   return $self->contig;
+}
+
+
+=head2 sub_SeqFeature
+
+ Title   : sub_SeqFeature
+ Usage   : @feats = $feat->sub_SeqFeature();
+ Function: Returns an array of sub Sequence Features
+ Returns : An array
+ Args    : none
+
+
+=cut
+
+sub sub_SeqFeature {
+  my ($self) = @_;
+
+  if ( $self->{'_gsf_sub_array'} ) {
+    return @{ $self->{'_gsf_sub_array'} };
+  } else {
+    return ();
+  }
+}
+
+=head2 add_sub_SeqFeature
+
+ Title   : add_sub_SeqFeature
+ Usage   : $feat->add_sub_SeqFeature($subfeat);
+           $feat->add_sub_SeqFeature($subfeat,'EXPAND')
+ Function: adds a SeqFeature into the subSeqFeature array.
+           with no 'EXPAND' qualifer, subfeat will be tested
+           as to whether it lies inside the parent, and throw
+           an exception if not.
+
+           If EXPAND is used, the parents start/end/strand will
+           be adjusted so that it grows to accommodate the new
+           subFeature
+ Returns : nothing
+ Args    : An object which has the SeqFeatureI interface
+
+
+=cut
+
+sub add_sub_SeqFeature{
+   my ($self,$feat,$expand) = @_;
+
+   if( !$feat->isa('Bio::SeqFeatureI') ) {
+       $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
+   }
+
+   if( $expand eq 'EXPAND' ) {
+       # if this doesn't have start/end set - forget it!
+       if( !defined $self->start && !defined $self->end ) {
+           $self->start($feat->start());
+           $self->end($feat->end());
+           $self->strand($feat->strand);
+       } else {
+           my ($start,$end);
+           if( $feat->start < $self->start ) {
+               $start = $feat->start;
+           }
+
+           if( $feat->end > $self->end ) {
+               $end = $feat->end;
+           }
+
+           $self->start($start);
+           $self->end($end);
+
+       }
+   } else {
+     if( !defined($feat->start()) || !defined($feat->end()) ||
+         !defined($self->start())  || !defined($self->end())) {
+       $self->throw( "This SeqFeature and the sub_SeqFeature must define".
+                     " start and end.");
+     }
+     if($feat->start() > $feat->end() || $self->start() > $self->end()) {
+       $self->throw("This SeqFeature and the sub_SeqFeature must have " .
+                    "start that is less than or equal to end.");
+     }
+     if($feat->start() < $self->start() || $feat->end() > $self->end() ) {
+       $self->throw("$feat is not contained within parent feature, " .
+                    "and expansion is not valid");
+     }
+   }
+
+   push(@{$self->{'_gsf_sub_array'}},$feat);
+
+}
+
+=head2 flush_sub_SeqFeature
+
+ Title   : flush_sub_SeqFeature
+ Usage   : $sf->flush_sub_SeqFeature
+ Function: Removes all sub SeqFeature
+           (if you want to remove only a subset, take
+            an array of them all, flush them, and add
+            back only the guys you want)
+ Example :
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub flush_sub_SeqFeature {
+   my ($self) = @_;
+
+   $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
+}
+
+
+sub id {
+    my ($self,$value) = @_;
+
+    if (defined($value)) {
+        $self->{_id} = $value;
+    }
+
+    return $self->{_id};
+
+}
+
+=head2 percent_id
+
+ Title   : percent_id
+ Usage   : $pid = $feat->percent_id()
+           $feat->percent_id($pid)
+ Function: get/set on percentage identity information
+ Returns : float
+ Args    : none if get, the new value if set
+
+=cut
+
+sub percent_id {
+    my ($self,$value) = @_;
+
+    if (defined($value))
+    {
+            $self->{_percent_id} = $value;
+    }
+
+    return $self->{_percent_id};
+}
+
+=head2 p_value
+
+ Title   : p_value
+ Usage   : $p_val = $feat->p_value()
+           $feat->p_value($p_val)
+ Function: get/set on p value information
+ Returns : float
+ Args    : none if get, the new value if set
+
+=cut
+
+sub p_value {
+    my ($self,$value) = @_;
+
+    if (defined($value))
+    {
+            $self->{_p_value} = $value;
+    }
+
+    return $self->{_p_value};
+}
+
+=head2 phase
+
+ Title   : phase
+ Usage   : $phase = $feat->phase()
+           $feat->phase($phase)
+ Function: get/set on start phase of predicted exon feature
+ Returns : [0,1,2]
+ Args    : none if get, 0,1 or 2 if set.
+
+=cut
+
+sub phase {
+    my ($self, $value) = @_;
+
+    if (defined($value) )
+    {
+        $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
+            $self->{_phase} = $value;
+    }
+
+    return $self->{_phase};
+}
+
+=head2 end_phase
+
+ Title   : end_phase
+ Usage   : $end_phase = $feat->end_phase()
+           $feat->end_phase($end_phase)
+ Function: returns end_phase based on phase and length of feature
+ Returns : [0,1,2]
+ Args    : none if get, 0,1 or 2 if set.
+
+=cut
+
+sub end_phase {
+   my ($self, $value) = @_;
+
+    if (defined($value))
+    {
+            $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
+            $self->{_end_phase} = $value;
+    }
+
+    return $self->{_end_phase};
+}
+
+
+sub gffstring {
+   my ($self) = @_;
+
+   my $str;
+
+   my $strand = "+";
+   
+   if ((defined $self->strand)&&($self->strand == -1)) {
+     $strand = "-";
+   }
+   
+   $str .= (defined $self->seqname)     ?   $self->seqname."\t"      :  "\t";
+   $str .= (defined $self->source_tag)  ?   $self->source_tag."\t"   :  "\t";
+   $str .= (defined $self->primary_tag) ?   $self->primary_tag."\t"  :  "\t";
+   $str .= (defined $self->start)       ?   $self->start."\t"        :  "\t";
+   $str .= (defined $self->end)         ?   $self->end."\t"          :  "\t";
+   $str .= (defined $self->score)       ?   $self->score."\t"        :  "\t";
+   $str .= (defined $self->strand)      ?   $strand."\t"             :  ".\t";
+   $str .= (defined $self->phase)       ?   $self->phase."\t"        :  ".\t";
+   eval{
+     $str .= (defined $self->end_phase)   ?   $self->end_phase."\t"        :  ".\t";
+   };
+
+   return $str;
+}
+
+
+=head2 external_db
+
+ Title   : external_db
+ Usage   : $pid = $feat->external_db()
+           $feat->external_db($dbid)
+ Function: get/set for an external db accession number (e.g.: Interpro)
+ Returns :
+ Args    : none if get, the new value if set
+
+=cut
+
+sub external_db {
+    my ($self,$value) = @_;
+
+    if (defined($value))
+    {
+            $self->{'_external_db'} = $value;
+    }
+
+    return $self->{'_external_db'};
+}
+
+
+
+=head2 contig
+
+  Arg [1]    : Bio::PrimarySeqI $seq
+  Example    : $seq = $self->contig;
+  Description: Accessor to attach/retrieve a sequence to/from a feature
+  Returntype : Bio::PrimarySeqI
+  Exceptions : none
+  Caller     : general
+
+=cut
+
+sub contig {
+  my ($self, $arg) = @_;
+
+  if ($arg) {
+    unless (defined $arg && ref $arg && $arg->isa("Bio::PrimarySeqI")) {
+      $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
+    }
+    
+    $self->{'_gsf_seq'} = $arg;
+    
+    # attach to sub features if they want it
+    
+    foreach my $sf ($self->sub_SeqFeature) {
+      if ($sf->can("attach_seq")) {
+	$sf->attach_seq($arg);
+      }
+    }
+  }
+  #print STDERR "contig is ".$self->{'_gsf_seq'}." with name ".$self->{'_gsf_seq'}->name."\n" unless(!$self->{'_gsf_seq'});
+#  my ($p, $f, $l) = caller;
+#  print STDERR "Caller = ".$f." ".$l."\n";
+  return $self->{'_gsf_seq'};
+}
+
+
+
+sub is_splittable {
+   my ($self, $arg) = @_;
+
+   if (defined $arg) {
+       $self->{'_is_splittable'} = $arg;
+   }
+   return $self->{'_is_splittable'};
+}
+
+
+sub transform {
+  my ($self, $slice) = @_;
+
+  unless (defined $slice) {
+
+    if ((defined $self->contig) &&
+      ($self->contig->isa("Bio::EnsEMBL::RawContig"))) {
+
+      # we are already in rawcontig coords, nothing needs to be done
+      return $self;
+
+    }
+    else {
+      # transform to raw_contig coords from Slice coords
+      return $self->_transform_to_RawContig();
+    }
+  }
+
+  if (defined $self->contig) {
+
+    if ($self->contig->isa("Bio::EnsEMBL::RawContig"))  {
+
+      # transform to slice coords from raw contig coords
+      return $self->_transform_to_Slice($slice);
+    }
+    elsif ($self->contig->isa( "Bio::EnsEMBL::Slice" ) or $self->contig->isa( "Bio::EnsEMBL::LRGSlice" )) {
+
+      # transform to slice coords from other slice coords
+      return $self->_transform_between_Slices($slice);
+    }
+    else {
+
+      # Unknown contig type
+      $self->throw("Cannot transform unknown contig type @{[$self->contig]}");
+    }
+  }
+  else {
+
+    #Can't convert to slice coords without a contig to work with
+    return $self->throw("Object's contig is not defined - cannot transform");
+  }
+
+}
+
+
+sub _transform_to_Slice {
+  my ($self, $slice) = @_;
+
+  $self->throw("can't transform coordinates of $self without a contig defined")
+    unless $self->contig;
+
+  unless($self->contig->adaptor) {
+    $self->throw("cannot transform coordinates of $self without adaptor " .
+		 "attached to contig");
+  }
+
+  my $dbh = $self->contig->adaptor->db;
+
+  my $mapper = 
+    $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
+  my $rca = $dbh->get_RawContigAdaptor;
+
+  my @mapped = $mapper->map_coordinates_to_assembly(
+    $self->contig->dbID,
+    $self->start,
+    $self->end,
+    $self->strand
+  );
+
+  unless (@mapped) {
+    $self->throw("couldn't map $self to Slice");
+  }
+
+  unless (@mapped == 1) {
+    $self->throw("$self should only map to one chromosome - " .
+		 "something bad has happened ...");
+  }
+
+  if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
+    $self->warn("feature lies on gap\n");
+    return;
+  }
+
+  if( ! defined $slice->chr_name() ) {
+    my $slice_adaptor = $slice->adaptor();
+    %$slice = %{$slice_adaptor->fetch_by_chr_name( $mapped[0]->id() )};
+  }
+   
+  # mapped coords are on chromosome - need to convert to slice
+  if($slice->strand == 1) {
+    $self->start  ($mapped[0]->start - $slice->chr_start + 1);
+    $self->end    ($mapped[0]->end   - $slice->chr_start + 1);
+    $self->strand ($mapped[0]->strand);
+  } else {
+    $self->start  ($slice->chr_end - $mapped[0]->end   + 1);
+    $self->end    ($slice->chr_end - $mapped[0]->start + 1);
+    $self->strand ($mapped[0]->strand * -1);
+  }
+
+  $self->seqname($mapped[0]->id);
+
+  #set the contig to the slice
+  $self->contig($slice);
+
+  return $self;
+}
+
+
+sub _transform_between_Slices {
+  my ($self, $to_slice) = @_;
+
+  my $from_slice = $self->contig;
+
+  $self->throw("New contig [$to_slice] is not a Bio::EnsEMBL::Slice")
+   unless ($to_slice->isa("Bio::EnsEMBL::Slice") or $to_slice->isa("Bio::EnsEMBL::LRGSlice") );
+
+  if ((my $c1 = $from_slice->chr_name) ne (my $c2 = $to_slice->chr_name)) {
+    $self->warn("Can't transform between chromosomes: $c1 and $c2");
+    return;
+  }
+
+  my($start, $end, $strand);
+
+  #first convert to assembly coords
+  if($from_slice->strand == 1) {
+    $start  = $from_slice->chr_start + $self->start - 1;
+    $end    = $from_slice->chr_start + $self->end   - 1;
+    $strand = $self->strand;
+  } else {
+    $start  = $from_slice->chr_end - $self->end   + 1;
+    $end    = $from_slice->chr_end - $self->start + 1;
+    $strand = $self->strand;
+  }
+
+  #now convert to the other slice's coords 
+  if($to_slice->strand == 1) {
+    $self->start ($start - $to_slice->chr_start + 1); 
+    $self->end   ($end   - $to_slice->chr_start + 1); 
+    $self->strand($strand);
+  } else {
+    $self->start ($to_slice->chr_end - $end   + 1);
+    $self->end   ($to_slice->chr_end - $start + 1);
+    $self->strand($strand * -1);
+  }
+
+  $self->contig($to_slice);
+
+  return $self;
+}
+
+
+sub _transform_to_RawContig {
+  my($self) = @_;
+
+  #print STDERR "transforming ".$self." to raw contig coords\n";
+  $self->throw("can't transform coordinates of $self without a contig defined")
+   unless $self->contig;
+
+  my $slice = $self->contig;
+
+  unless($slice->adaptor) {
+    $self->throw("can't transform coordinates of $self without an adaptor " .
+		 "attached to the feature's slice");
+  }
+
+  my $dbh = $slice->adaptor->db;
+
+  my $mapper = 
+    $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
+  my $rca = $dbh->get_RawContigAdaptor;
+
+  #first convert the features coordinates to assembly coordinates
+  my($start, $end, $strand);
+  if($slice->strand == 1) {
+    $start  = $slice->chr_start + $self->start - 1;
+    $end    = $slice->chr_start + $self->end   - 1;
+    $strand = $self->strand;
+  } else {
+    $start = $slice->chr_end - $self->end   + 1;
+    $end   = $slice->chr_end - $self->start + 1;
+    $strand = $self->strand * -1;
+  }
+
+  #convert the assembly coordinates to RawContig coordinates
+  my @mapped = $mapper->map_coordinates_to_rawcontig(
+    $slice->chr_name,
+    $start,
+    $end,
+    $strand
+  );
+
+  unless (@mapped) {
+    $self->throw("couldn't map $self");
+    return $self;
+  }
+
+  if (@mapped == 1) {
+
+    if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
+      $self->warn("feature lies on gap\n");
+      return;
+    }
+
+    my $rc = $rca->fetch_by_dbID($mapped[0]->id);
+
+    $self->start     ($mapped[0]->start);
+    $self->end       ($mapped[0]->end);
+    $self->strand    ($mapped[0]->strand);
+    $self->seqname   ($mapped[0]->id);
+    #print STDERR "setting contig to be ".$mapped[0]->id."\n";
+    $self->contig($rca->fetch_by_dbID($mapped[0]->id));
+
+    return $self;
+  }
+  else {
+
+    # more than one object returned from mapper
+    # possibly more than one RawContig in region
+
+    my (@gaps, @coords);
+
+    foreach my $m (@mapped) {
+
+	if ($m->isa("Bio::EnsEMBL::Mapper::Gap")) {
+	    push @gaps, $m;
+	}
+	elsif ($m->isa("Bio::EnsEMBL::Mapper::Coordinate")) {
+	    push @coords, $m;
+	}
+    }
+
+    # case where only one RawContig maps
+    if (@coords == 1) {
+
+        $self->start  ($coords[0]->start);
+        $self->end    ($coords[0]->end);
+        $self->strand ($coords[0]->strand);
+        $self->seqname($coords[0]->id);
+	#print STDERR "2 setting contig to be ".$coords[0]->id."\n";
+        $self->contig ($rca->fetch_by_dbID($coords[0]->id));
+
+	$self->warn("Feature [$self] truncated as lies partially on a gap");
+        return $self;
+    }
+
+    unless ($self->is_splittable) {
+        $self->warn("Feature spans >1 raw contig - can't split\n");
+        return;
+    }
+
+    my @out;
+    my $obj = ref $self;
+
+    SPLIT: foreach my $map (@mapped) {
+
+      if ($map->isa("Bio::EnsEMBL::Mapper::Gap")) {
+	$self->warn("piece of evidence lies on gap\n");
+	next SPLIT;
+      }
+
+      my $feat = $obj->new;
+
+      $feat->start  ($map->start);
+      $feat->end    ($map->end);
+      $feat->strand ($map->strand);
+      #print STDERR "3 setting contig to be ".$mapped[0]->id."\n";
+      $feat->contig ($rca->fetch_by_dbID($map->id));
+      $feat->adaptor($self->adaptor) if $self->adaptor();
+      $feat->display_label($self->display_label) if($self->can('display_label'));
+      $feat->analysis($self->analysis);
+      push @out, $feat;
+    }
+    
+    return @out;
+  }
+}
+
+
+1;