diff variant_effect_predictor/Bio/LiveSeq/Transcript.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/LiveSeq/Transcript.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,858 @@
+# $Id: Transcript.pm,v 1.17 2002/09/25 08:58:23 heikki Exp $
+#
+# bioperl module for Bio::LiveSeq::Transcript
+#
+# Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Transcript - Transcript class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+This stores informations about coding sequences (CDS).
+The implementation is that a Transcript object accesses a collection of
+Exon objects, inferring from them the nucleotide structure and sequence.
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana@ebi.ac.uk, jinsana@gmx.net
+
+Address: 
+
+     EMBL Outstation, European Bioinformatics Institute
+     Wellcome Trust Genome Campus, Hinxton
+     Cambs. CB10 1SD, United Kingdom 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Transcript;
+$VERSION=5.2;
+
+# Version history:
+# Tue Mar 21 14:38:02 GMT 2000 v 1.0 begun
+# Tue Mar 21 17:45:31 GMT 2000 v 1.1 new() created
+# Wed Mar 22 19:40:13 GMT 2000 v 1.4 all_Exons() seq(), length(), all_labels()
+# Thu Mar 23 19:08:36 GMT 2000 v 1.5 follows() replaces is_downstream()
+# Thu Mar 23 20:59:02 GMT 2000 v 2.0 valid _inside_position label position
+# Fri Mar 24 18:33:18 GMT 2000 v 2.2 rewritten position(), now should work with diverse coordinate_starts
+# Sat Mar 25 04:08:18 GMT 2000 v 2.21 added firstlabel to position and label so that Translation can exploit it
+# Sat Mar 25 06:39:27 GMT 2000 v 2.3 started override of subseq, works just internally
+# Mon Mar 27 19:05:15 BST 2000 v 2.4 subseq finished, it works with coord_start
+# Fri Mar 31 18:48:07 BST 2000 v 2.5 started downstream_seq()
+# Mon Apr  3 17:37:34 BST 2000 v 2.52 upstream_seq added
+# Fri Apr  7 03:29:43 BST 2000 v 2.6 up/downstream now can use Gene information
+# Sat Apr  8 12:59:58 BST 2000 v 3.0 all_Exons now skips no more valid exons
+# Sat Apr  8 13:32:08 BST 2000 v 3.1 get_Translation added
+# Wed Apr 12 12:37:08 BST 2000 v 3.2 all_Exons updates Transcript's start/end
+# Wed Apr 12 12:41:22 BST 2000 v 3.3 each Exon has "transcript" attribute added
+# Wed Apr 12 16:35:56 BST 2000 v 3.4 started coding _deletecheck
+# Wed Apr 12 23:40:19 BST 2000 v 3.5 start and end redefined here, no more checks after deletion to refix start/end attributes. And no need of those. Eliminated hence from new()
+# Wed Apr 12 23:47:02 BST 2000 v 3.9 finished _deletecheck, debugging starts
+# Thu Apr 13 00:37:16 BST 2000 v 4.0 debugging done: seems working OK
+# Thu Apr 27 16:18:55 BST 2000 v 4.1 translation_table added
+# Tue May 16 17:57:40 BST 2000 v 4.11 corrected bug in docs of downstream_seq
+# Wed May 17 16:48:34 BST 2000 v 4.2 frame() added
+# Mon May 22 15:22:12 BST 2000 v 4.21 labelsubseq tweaked for cases where startlabel==endlabel (no useless follow() query!)
+# Thu Jun 22 20:02:39 BST 2000 v 4.3 valid() moved to SeqI, to be inherited as the general one
+# Thu Jun 22 20:27:57 BST 2000 v 4.4 optimized labelsubseq coded!
+# Thu Jun 22 21:17:51 BST 2000 v 4.44 in_which_Exon() added
+# Sat Jun 24 00:49:55 BST 2000 v 4.5 new subseq() that exploits the new fast labelsubseq
+# Thu Jun 29 16:31:19 BST 2000 v 5.0 downsream_seq and upstream_seq recoded so that if entry is RNA it will return sequences up to the entry limits -> it should be properly debugged, expecially the upstream_seq one
+# Wed Jul 12 04:01:53 BST 2000 v 5.1 croak -> carp+return(-1)
+# Wed Mar 28 15:16:21 BST 2001 v 5.2 carp -> warn,throw (coded methods in SeqI)
+
+use strict;
+# use Carp qw(carp cluck);
+use vars qw($VERSION @ISA);
+use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it
+use Bio::LiveSeq::Exon 1.0; # uses Exon to create new exon in case of deletion
+@ISA=qw(Bio::LiveSeq::SeqI);
+
+=head2 new
+
+  Title   : new
+  Usage   : $transcript = Bio::LiveSeq::Transcript->new(-exons => \@obj_refs);
+
+  Function: generates a new Bio::LiveSeq::Transcript
+  Returns : reference to a new object of class Transcript
+  Errorcode -1
+  Args    : reference to an array of Exon object references
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my ($obj,%transcript);
+
+  my @exons=@{$args{-exons}};
+
+  $obj = \%transcript;
+  $obj = bless $obj, $class;
+
+  unless (@exons) {
+    $obj->warn("$class not initialised because exons array empty");
+    return(-1);
+  }
+
+  # now useless, after start and end methods have been overridden here
+  my $firstexon = $exons[0];
+  #my $lastexon = $exons[-1];
+  #my $start = $firstexon->start;
+  #my $end = $lastexon->end;
+  my $strand = $firstexon->strand;
+  my $seq = $firstexon->{'seq'};
+  $obj->alphabet('rna');
+
+  unless (_checkexons(\@exons)) {
+    $obj->warn("$class not initialised because of problems in the exon structure");
+    return(-1);
+  }
+  $obj->{'strand'}=$strand;
+  $obj->{'exons'}=\@exons;
+  $obj->{'seq'}=$seq;
+
+  # set Transcript into each Exon
+  my $exon;
+  foreach $exon (@exons) {
+    $exon->{'transcript'}=$obj;
+  }
+  return $obj;
+}
+
+
+=head2 all_Exons
+ 
+ Title   : all_Exons
+ Usage   : $transcript_obj->all_Exons()
+ Function: returns references to all Exon objects the Transcript is composed of
+ Example : foreach $exon ($transcript->all_Exons()) { do_something }
+ Returns : array of object references
+ Args    : none
+
+=cut
+
+sub all_Exons {
+  my $self=shift;
+  my $exonsref=$self->{'exons'};
+  my @exons=@{$exonsref};
+  my @newexons;
+  my $exon;
+  foreach $exon (@exons) {
+    unless ($exon->obj_valid) {
+      $self->warn("$exon no more valid, start or end label lost, skipping....",1); # ignorable
+    } else {
+      push(@newexons,$exon);
+    }
+  }
+  if ($#exons != $#newexons) {
+    # update exons field
+    $self->{'exons'}=\@newexons;
+  }
+  return (@newexons);
+}
+
+=head2 downstream_seq
+ 
+ Title   : downstream_seq
+ Usage   : $transcript_obj->downstream_seq()
+         : $transcript_obj->downstream_seq(64)
+ Function: returns a string of nucleotides downstream of the end of the
+           CDS. If there is some information of the real mRNA, from features in
+           an attached Gene object, it will return up to those boundaries.
+           Otherwise it will return 1000 nucleotides.
+           If an argument is given it will override the default 1000 number
+           and return instead /that/ requested number of nucleotides.
+           But if a Gene object is attached, this argument will be ignored.
+ Returns : string
+ Args    : an optional integer number of nucleotides to be returned instead of
+           the default if no gene attached
+
+=cut
+
+sub downstream_seq {
+  my ($self,$howmany)=@_;
+  my $str;
+  if (defined ($howmany)) {
+    unless ($howmany > 0) {
+      $self->throw("No sense in asking less than 1 downstream nucleotides!");
+    }
+  } else {
+    unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve until the end
+      #$str=$DNAobj->labelsubseq($self->end,undef,undef,"unsecuremoderequested");
+      #return(substr($str,1)); # delete first nucleotide that is the last of Transcript
+      if ($self->gene) { # if there is Gene object attached fetch relevant info
+	$str=$self->{'seq'}->labelsubseq($self->end,undef,$self->gene->maxtranscript->end); # retrieve from end of this Transcript to end of the maxtranscript
+	$str=substr($str,1); # delete first nucleotide that is the last of Transcript
+	if (CORE::length($str) > 0) {
+	  return($str);
+	} else { # if there was no downstream through the gene's maxtranscript, go the usual way
+	  $howmany = 1000;
+	}
+      } else {
+	$howmany = 1000;
+      }
+    }
+  }
+  my @exons=$self->all_Exons;
+  my $strand=$self->strand();
+  my $lastexon=$exons[-1];
+  my $lastexonlength=$lastexon->length;
+  # $howmany nucs after end of last exon
+  #my $downstream_seq=$lastexon->subseq($lastexonlength+1,undef,$howmany);
+  my $downstream_seq;
+
+  if ($howmany) {
+      $downstream_seq=substr($lastexon->labelsubseq($self->end,$howmany,undef,"unsecuremoderequested"),1);
+  } else {
+    if ($strand == 1) {
+      $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->end,"unsecuremoderequested"),1);
+    } else {
+      $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->start,"unsecuremoderequested"),1);
+    }
+  }
+  return $downstream_seq;
+}
+
+=head2 upstream_seq
+ 
+ Title   : upstream_seq
+ Usage   : $transcript_obj->upstream_seq()
+         : $transcript_obj->upstream_seq(64)
+ Function: just like downstream_seq but returns nucleotides before the ATG
+ Note    : the default, if no Gene information present and no nucleotides
+           number given, is to return up to 400 nucleotides.
+
+=cut
+
+sub upstream_seq {
+  my ($self,$howmany)=@_;
+  if (defined ($howmany)) {
+    unless ($howmany > 0) {
+      $self->throw("No sense in asking less than 1 upstream nucleotides!");
+    }
+  } else {
+    unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve from the start
+      if ($self->gene) { # if there is Gene object attached fetch relevant info
+	my $str=$self->{'seq'}->labelsubseq($self->gene->maxtranscript->start,undef,$self->start); # retrieve from start of maxtranscript to start of this Transcript
+	chop $str; # delete last nucleotide that is the A of starting ATG
+	if (length($str) > 0) {
+	  return($str);
+	} else { # if there was no upstream through the gene's maxtranscript, go the usual way
+	  $howmany = 400;
+	}
+      } else {
+	$howmany = 400;
+      }
+    }
+  }
+  my @exons=$self->all_Exons;
+  my $firstexon=$exons[0];
+  
+  my $upstream_seq;
+  my $strand=$self->strand();
+
+  if ($howmany) {# $howmany nucs before begin of first exon
+    my $labelbefore=$firstexon->label(-$howmany,$firstexon->start);
+    if ($labelbefore < 1) {
+      if ($strand == 1) {
+	$labelbefore=$self->{'seq'}->start;
+      } else {
+	$labelbefore=$self->{'seq'}->end;
+      }
+    }
+    $upstream_seq=$firstexon->labelsubseq($labelbefore,undef,$firstexon->start,"unsecuremoderequested");
+    chop $upstream_seq;
+  } else {
+    if ($strand == 1) {
+      $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->start,undef,$self->start,"unsecuremoderequested");
+      chop $upstream_seq; # delete last nucleotide that is the A of starting ATG
+    } else {
+      $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->end,undef,$self->start,"unsecuremoderequested");
+      chop $upstream_seq; # delete last nucleotide that is the A of starting ATG
+    }
+  }
+  return $upstream_seq;
+}
+
+# These get redefined here, overriding the SeqI one because they draw their
+# information from the Exons a Transcript is built of
+# optional argument: firstlabel. If not given, it checks coordinate_start
+#                                This is useful when called by Translation
+#                                also used by _delete
+sub label {
+  my ($self,$position,$firstlabel)=@_;
+  unless ($position) {  # if position = 0 complain ?
+    $self->warn("Position not given or position 0");
+    return (-1);
+  }
+  my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
+  my ($label,@labels,$length,$arraypos);
+  unless (defined ($firstlabel)) {
+    $firstlabel=$self->coordinate_start; # this is inside Transcript obj
+  }
+  my $coord_pos=$self->_inside_position($firstlabel);
+  $length=$self->length;
+  #if ($strand == 1) {
+    if ($position < 1) {
+      $position++; # to account for missing of 0 position
+    }
+    $arraypos=$position+$coord_pos-2;
+    #print "\n=-=-=-=-DEBUG: arraypos $arraypos, pos $position, coordpos: $coord_pos";
+    if ($arraypos < 0) {
+      $label=$self->{'seq'}->label($arraypos,$start,$strand); #?
+    } elsif ($arraypos >= $length) {
+      $label=$self->{'seq'}->label($arraypos-$length+2,$end,$strand); #?
+    } else { # inside the Transcript
+      @labels=$self->all_labels;
+      $label=$labels[$arraypos];
+    }
+  #}
+}
+
+# argument: label
+# returns: position of label according to coord_start
+# errorcode: 0 label not found
+# optional argument: firstlabel. If not given, it checks coordinate_start
+#                                This is useful when called by Translation
+sub position {
+  my ($self,$label,$firstlabel)=@_;
+  unless ($self->{'seq'}->valid($label)) {
+    $self->warn("label is not valid");
+    return (0);
+  }
+  unless (defined ($firstlabel)) {
+    $firstlabel=$self->coordinate_start; # this is inside Transcript obj
+  }
+  if ($label == $firstlabel) {
+    return (1);
+  }
+  my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
+  my ($position,$in_pos,$out_pos,$coord_pos);
+  my $length=$self->length;
+  $coord_pos=$self->_inside_position($firstlabel);
+  if ($self->valid($label)) { # if label is inside the Transcript
+    $in_pos=$self->_inside_position($label);
+    $position=$in_pos-$coord_pos+1;
+    if ($position <= 0) {
+      return ($position-1); # accounts for the missing of the 0 position
+    }
+  } else {
+    if ($self->follows($end,$label)) { # label after end of transcript
+      $out_pos=$self->{'seq'}->position($label,$end,$strand);
+      #print "\n+++++++++DEBUG label $label FOLLOWS end $end outpos $out_pos coordpos $coord_pos";
+      $position=$out_pos+$length-$coord_pos;
+    } elsif ($self->follows($label,$start)) { # label before begin of transcript
+      #print "\n+++++++++DEBUG label $label BEFORE start $start outpos $out_pos coordpos $coord_pos";
+      $out_pos=$self->{'seq'}->position($label,$start,$strand);
+      $position=$out_pos-$coord_pos+1;
+    } else { # label is in intron (not valid, not after, not before)!
+      $self->warn("Cannot give position of label pointing to intron according to CDS numbering!",1);
+      return (0);
+    }
+  }
+  return ($position);
+}
+
+sub seq {
+  my $self=shift;
+  my ($exon,$str);
+  my @exons=$self->all_Exons();
+  foreach $exon (@exons) {
+    $str .= $exon->seq();
+  }
+  return $str;
+}
+
+sub length {
+  my $self=shift;
+  my ($exon,$length);
+  my @exons=$self->all_Exons();
+  foreach $exon (@exons) {
+    $length += $exon->length();
+  }
+  return $length;
+}
+
+sub all_labels {
+  my $self=shift;
+  my ($exon,@labels);
+  my @exons=$self->all_Exons();
+  foreach $exon (@exons) {
+    push (@labels,$exon->all_labels());
+  }
+  return @labels;
+}
+
+# redefined here so that it will retrieve effective subseq without introns
+# otherwise it would have retrieved an underlying DNA (possibly with introns)
+# subsequence
+# Drawback: this is really bulky, label->position and then a call to
+# subseq that will do the opposite position-> label
+#
+# one day this can be rewritten as the main one so that the normal subseq
+# will rely on this one and hence avoid this double (useless and lengthy)
+# conversion between labels and positions
+sub old_labelsubseq {
+  my ($self,$start,$length,$end)=@_;
+  my ($pos1,$pos2);
+  if ($start) {
+    unless ($self->valid($start)) {
+      $self->warn("Start label not valid"); return (-1);
+    }
+    $pos1=$self->position($start);
+  }
+  if ($end) {
+    if ($end == $start) {
+      $length=1;
+    } else {
+      unless ($self->valid($end)) {
+	$self->warn("End label not valid"); return (-1);
+      }
+      unless ($self->follows($start,$end) == 1) {
+	$self->warn("End label does not follow Start label!"); return (-1);
+      }
+      $pos2=$self->position($end);
+      undef $length;
+    }
+  }
+  return ($self->subseq($pos1,$pos2,$length));
+}
+
+# rewritten, eventually
+
+sub labelsubseq {
+  my ($self,$start,$length,$end,$unsecuremode)=@_;
+  unless (defined $unsecuremode && 
+	  $unsecuremode eq "unsecuremoderequested") 
+  { # to skip security checks (faster)
+    if ($start) {
+      unless ($self->valid($start)) {
+	$self->warn("Start label not valid"); return (-1);
+      }
+    } else {
+      $start=$self->start;
+    }
+    if ($end) {
+      if ($end == $start) {
+	$length=1;
+	undef $end;
+      } else {
+	undef $length; # end argument overrides length argument
+	unless ($self->valid($end)) {
+	  $self->warn("End label not valid"); return (-1);
+	}
+	unless ($self->follows($start,$end) == 1) {
+	  $self->warn("End label does not follow Start label!"); return (-1);
+	}
+      }
+    } else {
+      $end=$self->end;
+    }
+  }
+  my ($seq,$exon,$startexon,$endexon); my @exonlabels;
+  my @exons=$self->all_Exons;
+  EXONCHECK:
+  foreach $exon (@exons) {
+    if ((!(defined($startexon)))&&($exon->valid($start))) { # checks only if not yet found
+      $startexon=$exon;
+    }
+    if ($exon->valid($end)) {
+      $endexon=$exon;
+    }
+    if ((!(defined($seq)) && (defined($startexon)))) { # initializes only once
+      if ((defined($endexon)) && ($endexon eq $startexon)) { # then perfect, we are finished
+	if ($length) {
+	  $seq = $startexon->labelsubseq($start,$length,undef,"unsecuremoderequested");
+
+
+	  last EXONCHECK;
+	} else {
+	  $seq = $startexon->labelsubseq($start,undef,$end,"unsecuremoderequested");
+	}
+	last EXONCHECK;
+      } else { # get up to the end of the exon
+	$seq = $startexon->labelsubseq($start,undef,undef,"unsecuremoderequested");
+      }
+    }
+    if (($startexon)&&($exon ne $startexon)) {
+      if (defined($endexon)) { # we arrived to the last exon
+	$seq .= $endexon->labelsubseq(undef,undef,$end,"unsecuremoderequested"); # get from the start of the exon
+	last EXONCHECK;
+
+      } elsif (defined($startexon)) { # we are in a whole-exon-in-the-middle case
+	  $seq .= $exon->seq; # we add it completely to the seq
+      } # else, we still have to reach the start point, exon useless, we move on
+      if ($length) { # if length argument specified
+	if (($seq && (CORE::length($seq) >= $length))) {
+	  last EXONCHECK;
+	}
+      }
+    }
+  }
+  if ($length) {
+    return (substr($seq,0,$length));
+  } else {
+    return ($seq);
+  }
+}
+
+
+# argument: label
+# returns: the objref and progressive number of the Exon containing that label
+# errorcode: -1
+sub in_which_Exon {
+  my ($self,$label)=@_;
+  my ($count,$exon);
+  my @exons=$self->all_Exons;
+  foreach $exon (@exons) {
+    $count++; # 1st exon is numbered "1"
+    if ($exon->valid($label)) {
+      return ($exon,$count)
+    }
+  }
+  return (-1); # if nothing found
+}
+
+# recoded to exploit the new fast labelsubseq()
+# valid only inside Transcript
+sub subseq {
+  my ($self,$pos1,$pos2,$length) = @_;
+  my ($str,$startlabel,$endlabel);
+  if (defined ($pos1)) {
+    if ($pos1 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    }
+    if ((defined ($pos2))&&($pos1>$pos2)) {
+      $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+    }
+    $startlabel=$self->label($pos1);
+    unless ($self->valid($startlabel)) {
+      $self->warn("Start label not valid"); return (-1);
+    }
+    if ($startlabel < 1) {
+      $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
+    }
+  } else {
+    $startlabel=$self->start;
+  }
+  if (defined ($pos2)) {
+    if ($pos2 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    }
+    undef $length;
+    if ((defined ($pos1))&&($pos1>$pos2)) {
+      $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+    }
+    $endlabel=$self->label($pos2);
+    unless ($self->valid($endlabel)) {
+      $self->warn("End label not valid"); return (-1);
+    }
+    if ($endlabel < 1) {
+      $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
+    }
+  } else {
+    unless (defined ($length)) {
+      $endlabel=$self->end;
+    }
+  }
+  return ($self->labelsubseq($startlabel,$length,$endlabel,"unsecuremoderequested"));
+}
+
+# works only inside the transcript, complains if asked outside
+sub old_subseq {
+  my ($self,$pos1,$pos2,$length) = @_;
+  my ($str,$startcount,$endcount,$seq,$seqlength);
+  if (defined ($length)) {
+    if ($length < 1) {
+      $self->warn("No sense asking for a subseq of length < 1");
+      return (-1);
+    }
+  }
+  my $firstlabel=$self->coordinate_start; # this is inside Transcript obj
+  my $coord_pos=$self->_inside_position($firstlabel); # TESTME old
+  $seq=$self->seq;
+  $seqlength=CORE::length($seq);
+  unless (defined ($pos1)) {
+    $startcount=1+$coord_pos-1; # i.e. coord_pos
+  } else {
+    if ($pos1 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    } elsif ($pos1 < 0) {
+      $pos1++;
+    }
+    if ((defined ($pos2))&&($pos1>$pos2)) {
+      $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!");
+      return (-1);
+    }
+    $startcount=$pos1+$coord_pos-1;
+  }
+  unless (defined ($pos2)) {
+  ;
+  } else {
+    if ($pos2 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    } elsif ($pos2 < 0) {
+      $pos2++;
+    }
+    if ((defined ($pos1))&&($pos1>$pos2)) {
+      $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!");
+      return (-1);
+    }
+    $endcount=$pos2+$coord_pos-1;
+    if ($endcount > $seqlength) {
+      #print "\n###DEBUG###: pos1 $pos1 pos2 $pos2 coordpos $coord_pos endcount $endcount seqln $seqlength\n";
+      $self->warn("Cannot access end position after the end of Transcript");
+      return (-1);
+    }
+    $length=$endcount-$startcount+1;
+  }
+  #print "\n###DEBUG pos1 $pos1 pos2 $pos2 start $startcount end $endcount length $length coordpos $coord_pos\n";
+  my $offset=$startcount-1;
+  if ($offset < 0) {
+    $self->warn("Cannot access startposition before the beginning of Transcript, returning from start",1); # ignorable
+    return (substr($seq,0,$length));
+  } elsif ($offset >= $seqlength) {
+    $self->warn("Cannot access startposition after the end of Transcript");
+    return (-1);
+  } else {
+    $str=substr($seq,$offset,$length);
+    if (CORE::length($str) < $length) {
+      $self->warn("Attention, cannot return the length requested ".
+		  "for subseq",1) if $self->verbose > 0; # ignorable
+    }
+    return $str;
+  }
+}
+
+# redefined so that it doesn't require other methods (after deletions) to
+# reset it.
+sub start {
+  my $self = shift;
+  my $exonsref=$self->{'exons'};
+  my @exons=@{$exonsref};
+  return ($exons[0]->start);
+}
+
+sub end {
+  my $self = shift;
+  my $exonsref=$self->{'exons'};
+  my @exons=@{$exonsref};
+  return ($exons[-1]->end);
+}
+
+
+# internal methods begin here
+
+# returns: position of label in transcript's all_labels
+#          with STARTlabel == 1
+# errorcode 0 -> label not found
+# argument: label
+sub _inside_position {
+  my ($self,$label)=@_;
+  my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
+  my ($position,$checkme);
+  my @labels=$self->all_labels;
+  foreach $checkme (@labels) {
+    $position++;
+    if ($label == $checkme) {
+      return ($position);
+    }
+  }
+  return (0);
+}
+
+# returns 1 OK or 0 ERROR
+# arguments: reference to array of Exon object references
+sub _checkexons {
+  my ($exon,$thisstart);
+  my $self=$exon;
+  my $exonsref=$_[0];
+  my @exons=@{$exonsref};
+
+  my $firstexon = $exons[0];
+
+  unless (ref($firstexon) eq "Bio::LiveSeq::Exon") {
+    $self->warn("Object not of class Exon");
+    return (0);
+  }
+  my $strand = $firstexon->strand;
+
+  my $prevend = $firstexon->end;
+  shift @exons; # skip first one
+  foreach $exon (@exons) {
+    unless (ref($exon) eq "Bio::LiveSeq::Exon") { # object class check
+      $self->warn("Object not of class Exon");
+      return (0);
+    }
+    if ($exon->strand != $strand) { # strand consistency check
+      $self->warn("Exons' strands not consistent when trying to create Transcript");
+      return (0);
+    }
+    $thisstart = $exon->start;
+    unless ($exon->{'seq'}->follows($prevend,$thisstart,$strand)) {
+      $self->warn("Exons not in correct order when trying to create Transcript");
+      return (0);
+    }
+    $prevend = $exon->end;
+  }
+  return (1);
+}
+
+=head2 get_Translation
+
+  Title   : valid
+  Usage   : $translation = $obj->get_Translation()
+  Function: retrieves the reference to the object of class Translation (if any)
+            attached to a LiveSeq object
+  Returns : object reference
+  Args    : none
+
+=cut
+
+sub get_Translation {
+  my $self=shift;
+  return ($self->{'translation'}); # this is set when Translation->new is called
+}
+
+# this checks so that deletion spanning multiple exons is
+# handled accordingly and correctly
+# arguments: begin and end label of a deletion
+# this is called BEFORE any deletion in the chain
+sub _deletecheck {
+  my ($self,$startlabel,$endlabel)=@_;
+  my $exonsref=$self->{'exons'};
+  my @exons=@{$exonsref};
+  my ($startexon,$endexon,$exon);
+  $startexon=$endexon=0;
+  foreach $exon (@exons) {
+    if (($startexon == 0)&&($exon->valid($startlabel))) {
+      $startexon=$exon; # exon containing start of deletion
+    }
+    if (($endexon == 0)&&($exon->valid($endlabel))) {
+      $endexon=$exon; # exon containing end of deletion
+    }
+    if (($startexon)&&($endexon)) {
+      last; # don't check further
+    }
+  }
+  my $nextend=$self->label(2,$endlabel); # retrieve the next label
+  my $prevstart=$self->label(-1,$startlabel); # retrieve the prev label
+
+  if ($startexon eq $endexon) { # intra-exon deletion
+    if (($startexon->start eq $startlabel) && ($startexon->end eq $endlabel)) {
+      # let's delete the entire exon
+      my @newexons;
+      foreach $exon (@exons) {
+	unless ($exon eq $startexon) {
+	  push(@newexons,$exon);
+	}
+      }
+      $self->{'exons'}=\@newexons;
+    } elsif ($startexon->start eq $startlabel) { # special cases
+      $startexon->{'start'}=$nextend; # set a new start of exon
+    } elsif ($startexon->end eq $endlabel) {
+      $startexon->{'end'}=$prevstart; # set a new end of exon
+    } else {
+      return; # no problem
+    }
+  } else { # two new exons to be created, inter-exons deletion
+    my @newexons;
+    my $exonobj;
+    my $dna=$self->{'seq'};
+    my $strand=$self->strand;
+    my $notmiddle=1; # flag for skipping exons in the middle of deletion
+    foreach $exon (@exons) {
+      if ($exon eq $startexon) {
+	$exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$exon->start,'-end'=>$prevstart,'-strand'=>$strand); # new partial exon
+	push(@newexons,$exonobj);
+	$notmiddle=0; # now we enter totally deleted exons
+      } elsif ($exon eq $endexon) {
+	$exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$nextend,'-end'=>$exon->end,'-strand'=>$strand); # new partial exon
+	push(@newexons,$exonobj);
+	$notmiddle=1; # exiting totally deleted exons
+      } else {
+	if ($notmiddle) { # if before or after exons with deletion
+	  push(@newexons,$exon); 
+	}# else skip them
+      }
+    }
+    $self->{'exons'}=\@newexons;
+  }
+}
+
+=head2 translation_table
+
+ Title   : translation_table
+ Usage   : $name = $obj->translation_table;
+         : $name = $obj->translation_table(11);
+ Function: Returns or sets the translation_table used for translating the
+           transcript.
+           If it has never been set, it will return undef.
+ Returns : an integer
+
+=cut
+
+sub translation_table {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'translation_table'} = $value;
+  }
+  unless (exists $self->{'translation_table'}) {
+    return (undef);
+  } else {
+    return $self->{'translation_table'};
+  }
+}
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $frame = $transcript->frame($label);
+ Function: Returns the frame of a particular nucleotide.
+           Frame can be 0 1 or 2 and means the position in the codon triplet
+           of the particulat nucleotide. 0 is the first codon_position.
+           Codon_position (1 2 3) is simply frame+1.
+           If the label asked for is not inside the Transcript, -1 will be
+           returned.
+ Args    : a label
+ Returns : 0 1 or 2
+ Errorcode -1
+
+=cut
+
+# args: label
+# returns: frame of nucleotide (0 1 2)
+# errorcode: -1
+sub frame {
+  my ($self,$inputlabel)=@_;
+  my @labels=$self->all_labels;
+  my ($label,$frame,$count);
+  foreach $label (@labels) {
+    if ($inputlabel == $label) {
+      return ($count % 3);
+    }
+    $count++; # 0 1 2 3 4....
+  }
+  return (-1); # label not found amid Transcript labels
+}
+
+1;