diff variant_effect_predictor/Bio/LiveSeq/AARange.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/LiveSeq/AARange.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,423 @@
+# $Id: AARange.pm,v 1.10 2001/10/22 08:22:49 heikki Exp $
+#
+# bioperl module for Bio::LiveSeq::AARange
+#
+# Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::AARange - AARange abstract class for LiveSeq
+
+=head1 SYNOPSIS
+
+  #documentation needed
+
+=head1 DESCRIPTION
+
+This is used as possible parent for aminoacid range object classes.
+Or it can be used straight away to define aminoacid ranges.  The idea
+is that the ranges defined are attached to a Translation object and
+they refer to its coordinate-system when they are first created (via
+the new() method).  When they are created they are anyway linked to
+the underlying DNA LiveSeq by way of the LiveSeq labels. This allows
+to preserve the ranges even if the numbering changes in the
+Translation due to deletions or insertions.
+
+The protein sequence associated with the AARange can be accessed via
+the usual seq() or subseq() methods.
+
+The start and end of the AARange in protein coordinate system can be
+fetched with aa_start() and aa_end() methods. Note: the behaviour of
+these methods would be influenced by the coordinate_start set in the
+corresponding Translation object. This can be desirable but can also
+lead to confusion if the coordinate_start had been changed and the
+original position of the AARange was to be retrieved.
+
+start() and end() methods of the AARange will point to the labels
+identifying the first nucleotide of the first and last triplet coding
+for the start and end of the AminoAcidRange.
+
+The underlying nucleotide sequence of the AARange can be retrieved
+with the labelsubseq() method. This would retrieve the whole DNA
+sequence, including possible introns. This is called "DNA_sequence".
+
+To fetch the nucleotide sequence of the Transcript, without introns,
+the labelsubseq() of the attached Transcript (the Transcript the
+Translation comes from) has to be accessed. This is called
+"cDNA_sequence".
+
+Here are the operations to retrieve these latter two kinds of
+sequences:
+
+   $startlabel=$AARange->start;
+   $endtripletlabel=$AARange->end;
+   $endlabel=$AARange->{'seq'}->label(3,$endtripletlabel,$AARange->strand);
+
+   $dnaseq=$AARange->labelsubseq($startlabel,undef,$endlabel));
+
+   $cdnaseq=$AARange->get_Transcript->labelsubseq($startlabel,undef,$endlabel);
+
+To simplify, these operations have been included in two additional
+methods: dna_seq() and cdna_seq().
+
+These would return the whole sequence, as in the examples above.  But
+the above general scheme can be used by specifying different labels,
+to retrieve hypothetical subsequences of interest.
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana@ebi.ac.uk, jinsana@gmx.net
+
+Address: 
+
+     EMBL Outstation, European Bioinformatics Institute
+     Wellcome Trust Genome Campus, Hinxton
+     Cambs. CB10 1SD, United Kingdom 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::AARange;
+$VERSION=1.8;
+
+# Version history:
+# Wed Apr 19 15:10:29 BST 2000 v 1.0 begun
+# Wed Apr 19 17:26:58 BST 2000 v 1.4 new, aa_start, aa_end, seq, length created
+# Wed Apr 19 19:57:42 BST 2000 v 1.5 subseq position label added
+# Thu Apr 20 16:13:58 BST 2000 v 1.7 added: documentation, dna_seq, cdna_seq
+# Wed Mar 28 16:58:02 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI)
+
+use strict;
+use vars qw($VERSION @ISA);
+use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it
+@ISA=qw(Bio::LiveSeq::SeqI);
+
+=head2 new
+
+  Title   : new
+  Usage   : $aarange = Bio::LiveSeq::AARange->new(-translation => $obj_ref,
+                                               -start => $beginaa,
+                                               -end => $endaa,
+                                               -name => "ABCD",
+                                               -description => "DCBA",
+                                               -translength => $length);
+
+  Function: generates a new AminoAcidRange LiveSeq object
+  Returns : reference to a new object of class AARange
+  Errorcode -1
+  Args    : two positions in AminoAcid coordinate numbering
+            an object reference specifying to which translation the aminoacid
+            ranges refer to
+            a name and a description (optional)
+            an optional "translength" argument: this can be given when
+            a lot of AARanges are to be created at the same time for the same
+            Translation object, calculating it with $translation->length
+            This would increase the speed, avoiding the new() function to
+            calculate everytime the same length again and again for every obj.
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my ($obj,%range);
+
+  $obj = \%range;
+  $obj = bless $obj, $class;
+  my $self=$obj;
+
+  my ($translation,$start,$end,$name,$description,$translength)=($args{-translation},$args{-start},$args{-end},$args{-name},$args{-description},$args{-translength});
+
+  unless (($translation)&&(ref($translation) eq "Bio::LiveSeq::Translation")) {
+    $self->warn("No -translation or wrong type given");
+    return (-1);
+  }
+  unless ($translength) { # if it's not given, fetch it
+    $translength=$translation->length;
+  }
+  my $seq=$translation->{'seq'};
+
+  if (($start < 1)&&($start > $translength)) {
+    $self->warn("$class not initialised because start aminoacid position not valid");
+    return (-1);
+  }
+  if (($end < 1)&&($end > $translength)) {
+    $self->warn("$class not initialised because end aminoacid position not valid");
+    return (-1);
+  }
+  if ($start > $end) {
+    $self->warn("$class not initialised because start position > end position!");
+    return (-1);
+  }
+
+  my ($starttripletlabel,$endtripletlabel);
+  if ($start == $end) { # trick to increase speed
+    $starttripletlabel=$endtripletlabel=$translation->label($start);
+  } else {
+    ($starttripletlabel,$endtripletlabel)=($translation->label($start),$translation->label($end));
+  }
+  unless (($starttripletlabel > 0)&&($endtripletlabel > 0)) {
+    $self->warn("$class not initialised because of problems in retrieving start or end label!");
+    return (-1);
+  }
+
+  # unsure if needed:
+  #my $endlabel=$seq->label(3,$endtripletlabel); # to get the real end
+  #unless ($endlabel > 0) {
+    #carp "$class not initialised because of problems retrieving the last nucleotide of the triplet coding for the end aminoacid";
+    #return (-1);
+  #}
+  $self->{'seq'}=$seq;
+  $self->{'start'}=$starttripletlabel;
+  $self->{'end'}=$endtripletlabel;
+  $self->{'strand'}=$translation->strand;
+  $self->{'translation'}=$translation;
+  $self->{'name'}=$name;
+  $self->{'description'}=$description;
+  $self->{'alphabet'}="protein";
+
+  return $obj;
+}
+
+sub coordinate_start {
+  my $self=shift;
+  $self->warn("Cannot perform this operation in an AminoAcidRange object!");
+  return (-1);
+}
+
+sub all_labels {
+  my $self=shift;
+  $self->warn("Cannot perform this operation in an AminoAcidRange object!");
+  return (-1);
+}
+
+sub valid {
+  my $self=shift;
+  $self->warn("Cannot perform this operation in an AminoAcidRange object!");
+  return (-1);
+}
+
+=head2 get_Transcript
+
+  Title   : valid
+  Usage   : $transcript = $obj->get_Transcript()
+  Function: retrieves the reference to the object of class Transcript (if any)
+            attached to a LiveSeq object
+  Returns : object reference
+  Args    : none
+
+=cut
+
+sub get_Transcript {
+  my $self=shift;
+  return ($self->get_Translation->get_Transcript);
+}
+
+=head2 get_Translation
+
+  Title   : valid
+  Usage   : $translation = $obj->get_Translation()
+  Function: retrieves the reference to the object of class Translation (if any)
+            attached to a LiveSeq object
+  Returns : object reference
+  Args    : none
+
+=cut
+
+sub get_Translation {
+  my $self=shift;
+  return ($self->{'translation'});
+}
+
+sub change {
+  my $self=shift;
+  $self->warn("Cannot change an AminoAcidRange object!");
+  return (-1);
+}
+sub positionchange {
+  my $self=shift;
+  $self->warn("Cannot change an AminoAcidRange object!");
+  return (-1);
+}
+sub labelchange {
+  my $self=shift;
+  $self->warn("Cannot change an AminoAcidRange object!");
+  return (-1);
+}
+
+sub subseq {
+  my ($self,$pos1,$pos2,$length) = @_;
+  if (defined ($length)) {
+    if ($length < 1) {
+      $self->warn("No sense asking for a subseq of length < 1");
+      return (-1);
+    }
+  }
+  unless (defined ($pos1)) {
+    $pos1=1;
+  } elsif ($pos1 < 1) { # if position out of boundaries
+    $self->warn("Starting position for AARange cannot be < 1!"); return (-1);
+    if ((defined ($pos2))&&($pos1>$pos2)) {
+      $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+    }
+  }
+  my $seq=$self->seq;
+  my $objlength=length($seq);
+  unless (defined ($length)) {
+    $length=$objlength-$pos1+1;
+  }
+  if (defined ($pos2)) {
+    if ($pos2 > $objlength) { # if position out of boundaries
+      $self->warn("Ending position for AARange cannot be > length of AARange!"); return (-1);
+    }
+    $length=$pos2-$pos1+1;
+    if ((defined ($pos1))&&($pos1>$pos2)) {
+      $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+    }
+  }
+  my $str=substr($seq,$pos1-1,$length);
+  if (length($str) < $length) {
+    $self->warn("Attention, cannot return the length requested for subseq",1);
+  }
+  return $str;
+}
+
+sub seq {
+  my $self=shift;
+  my ($aa_start,$aa_end)=($self->aa_start,$self->aa_end);
+  unless (($aa_start)&&($aa_end)) { # they must both exist
+    $self->warn("Not able to find start or end of the AminoAcid Range");
+    return (0);
+  }
+  my $translseq=$self->get_Translation->seq;
+  return substr($translseq,$aa_start-1,$aa_end-$aa_start+1);
+  # Note: it will return "undef" if the translation stops before the start
+  # of the aarange (because of upstream nonsense mutation creating STOP).
+  # For the same reason it would return uncomplete (up to the STOP) string
+  # if the stop happens in between aarange's start and stop
+}
+
+sub length {
+  my $self=shift;
+  my $seq=$self->seq;
+  my $length=length($seq);
+  return $length;
+}
+
+sub label {
+  my ($self,$position)=@_;
+  my $translation=$self->get_Translation;
+  my $origstart=$translation->coordinate_start; # preserve it
+  $translation->coordinate_start($self->start); # change it
+  my $label=$translation->label($position);
+  $translation->coordinate_start($origstart); # restore it
+  return ($label);
+}
+
+sub position {
+  my ($self,$label)=@_;
+  my $translation=$self->get_Translation;
+  my $origstart=$translation->coordinate_start; # preserve it
+  $translation->coordinate_start($self->start); # change it
+  my $position=$translation->position($label);
+  $translation->coordinate_start($origstart); # restore it
+  return ($position);
+}
+
+=head2 aa_start
+
+  Title   : aa_start
+  Usage   : $end = $aarange->aa_start()
+  Returns : integer (position, according to Translation coordinate system) of
+            the start of an AminoAcidRange object
+  Args    : none
+
+=cut
+
+sub aa_start {
+  my $self=shift;
+  my $aastart=$self->get_Translation->position($self->{'start'});
+}
+
+=head2 aa_end
+
+  Title   : aa_end
+  Usage   : $end = $aarange->aa_end()
+  Returns : integer (position, according to Translation coordinate system) of
+            the end of an AminoAcidRange object
+  Args    : none
+
+=cut
+
+sub aa_end {
+  my $self=shift;
+  my $aastart=$self->get_Translation->position($self->{'end'});
+}
+
+=head2 dna_seq
+
+  Title   : dna_seq
+  Usage   : $end = $aarange->dna_seq()
+  Returns : the sequence at DNA level of the entire AminoAcidRange
+            this would include introns (if present)
+  Args    : none
+
+=cut
+
+sub dna_seq {
+  my $self=shift;
+  my $startlabel=$self->start;
+  my $endtripletlabel=$self->end;
+  my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
+  return ($self->labelsubseq($startlabel,undef,$endlabel));
+}
+
+=head2 cdna_seq
+
+  Title   : cdna_seq
+  Usage   : $end = $aarange->cdna_seq()
+  Returns : the sequence at cDNA level of the entire AminoAcidRange
+            i.e. this is the part of the Transcript that codes for the
+            AminoAcidRange. It would be composed just of exonic DNA.
+  Args    : none
+
+=cut
+
+sub cdna_seq {
+  my $self=shift;
+  my $startlabel=$self->start;
+  my $endtripletlabel=$self->end;
+  my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
+  return ($self->get_Transcript->labelsubseq($startlabel,undef,$endlabel));
+}
+
+# this checks if the attached Transcript has a Gene object attached
+sub gene {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'gene'} = $value;
+  }
+  unless (exists $self->{'gene'}) {
+    unless (exists $self->get_Transcript->{'gene'}) {
+      return (0);
+    } else {
+      return ($self->get_Transcript->{'gene'});
+    }
+  } else {
+    return $self->{'gene'};
+  }
+}
+
+1;