diff variant_effect_predictor/Bio/Variation/AAChange.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Variation/AAChange.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,477 @@
+# $Id: AAChange.pm,v 1.13 2002/10/22 07:38:49 lapp Exp $
+#
+# BioPerl module for Bio::Variation::AAChange
+#
+# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::AAChange - Sequence change class for polypeptides
+
+=head1 SYNOPSIS
+
+   $aamut = Bio::Variation::AAChange->new
+       ('-start'         => $start,
+ 	'-end'           => $end,
+ 	'-length'        => $len,
+ 	'-proof'         => $proof,
+ 	'-isMutation'    => 1,
+ 	'-mut_number'    => $mut_number
+ 	);
+
+   my $a1 = Bio::Variation::Allele->new;
+   $a1->seq($ori) if $ori;
+   $aamut->allele_ori($a1);
+   my $a2 = Bio::Variation::Allele->new;
+   $a2->seq($mut) if $mut;
+   $aachange->add_Allele($a2);
+   $aachange->allele_mut($a2);
+
+   print  "\n";
+
+   # add it to a SeqDiff container object
+   $seqdiff->add_Variant($rnachange);
+
+   # and create links to and from RNA level variant objects
+   $aamut->RNAChange($rnachange);
+   $rnachange->AAChange($rnachange);
+
+=head1 DESCRIPTION
+
+The instantiable class Bio::Variation::RNAChange describes basic
+sequence changes at polypeptide  level. It uses methods defined in
+superclass Bio::Variation::VariantI, see L<Bio::Variation::VariantI>
+for details.
+
+If the variation described by a AAChange object has a known
+Bio::Variation::RNAAChange object, create the link with method
+AAChange(). See L<Bio::Variation::AAChange> for more information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                         - General discussion
+  http://bio.perl.org/MailList.html             - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.  Bug reports can be submitted via
+ email or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki@ebi.ac.uk
+
+Address:
+
+     EMBL Outstation, European Bioinformatics Institute
+     Wellcome Trust Genome Campus, Hinxton
+     Cambs. CB10 1SD, United Kingdom
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Variation::AAChange;
+my $VERSION=1.0;
+use vars qw(@ISA $MATRIX);
+use strict;
+
+# Object preamble - inheritance
+use Bio::Variation::VariantI;
+
+@ISA = qw( Bio::Variation::VariantI );
+
+BEGIN {
+
+my $matrix = << "__MATRIX__";
+#  Matrix made by matblas from blosum62.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 62
+#  Entropy =   0.6979, Expected =  -0.5209
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0 -4
+R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1 -4
+N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1 -4
+D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4  1 -1 -4
+C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
+Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0  3 -1 -4
+E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4
+G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -2 -1 -4
+H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0  0 -1 -4
+I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3 -3 -1 -4
+L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4 -3 -1 -4
+K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0  1 -1 -4
+M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3 -1 -1 -4
+F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3 -3 -1 -4
+P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -1 -2 -4
+S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0  0  0 -4
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1  0 -4
+W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -3 -2 -4
+Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -2 -1 -4
+V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3 -2 -1 -4
+B -2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4  1 -1 -4
+Z -1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4
+X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1 -1 -1 -1 -1 -4
+* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1
+__MATRIX__
+
+    my %blosum = ();
+    $matrix =~ /^ +(.+)$/m;
+    my @aas = split / +/, $1;
+    foreach my $aa (@aas) {
+	my $tmp = $aa;
+	$tmp = "\\$aa" if $aa eq '*';
+	$matrix =~ /^($tmp) +([-+]?\d.*)$/m;
+	my @scores = split / +/, $2 if defined $2;
+	my $count = 0;
+	foreach my $ak (@aas) {
+	    $blosum{$aa}->{$aas[$count]} = $scores[$count];
+	    $count++;
+	}
+    }
+    sub _matrix;
+    $MATRIX = \%blosum;
+}
+
+sub new {
+    my($class,@args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($start, $end, $length, $strand, $primary, $source,
+	$frame, $score, $gff_string,
+	$allele_ori,  $allele_mut,  $upstreamseq,  $dnstreamseq,
+	$label,  $status,  $proof,  $re_changes,  $region, $region_value,
+        $region_dist,
+	$numbering,  $mut_number,  $ismutation) =
+	    $self->_rearrange([qw(START
+				  END
+				  LENGTH
+				  STRAND
+				  PRIMARY
+				  SOURCE
+				  FRAME
+				  SCORE
+				  GFF_STRING
+				  ALLELE_ORI
+				  ALLELE_MUT
+				  UPSTREAMSEQ
+				  DNSTREAMSEQ
+				  LABEL
+				  STATUS
+				  PROOF
+				  RE_CHANGES
+				  REGION
+				  REGION_VALUE
+				  REGION_DIST
+				  NUMBERING
+				  MUT_NUMBER
+				  ISMUTATION
+				  )],@args);
+
+    $self->primary_tag("Variation");
+
+    $self->{ 'alleles' } = [];
+
+    $start && $self->start($start);
+    $end   && $self->end($end);
+    $length && $self->length($length);
+    $strand && $self->strand($strand);
+    $primary && $self->primary_tag($primary);
+    $source  && $self->source_tag($source);
+    $frame   && $self->frame($frame);
+    $score   && $self->score($score);
+    $gff_string && $self->_from_gff_string($gff_string);
+
+    $allele_ori && $self->allele_ori($allele_ori);
+    $allele_mut  && $self->allele_mut($allele_mut);
+    $upstreamseq  && $self->upstreamseq($upstreamseq);
+    $dnstreamseq  && $self->dnstreamseq($dnstreamseq);
+
+    $label  && $self->label($label);
+    $status  && $self->status($status);
+    $proof && $self->proof($proof);
+    $region  && $self->region($region);
+    $region_value  && $self->region_value($region_value);
+    $region_dist  && $self->region_dist($region_dist);
+    $numbering && $self->numbering($numbering);
+    $mut_number && $self->mut_number($mut_number);
+    $ismutation && $self->isMutation($ismutation);
+
+    return $self; # success - we hope!
+}
+
+=head2 RNAChange
+
+ Title   : RNAChange
+ Usage   : $mutobj = $self->RNAChange;
+         : $mutobj = $self->RNAChange($objref);
+ Function: Returns or sets the link-reference to a mutation/change object.
+           If there is no link, it will return undef
+ Returns : an obj_ref or undef
+
+=cut
+
+sub RNAChange {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::RNAChange') ) {
+	  $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
+	  return (undef);
+      }
+      else {
+	  $self->{'RNAChange'} = $value;
+      }
+  }
+  unless (exists $self->{'RNAChange'}) {
+      return (undef);
+  } else {
+      return $self->{'RNAChange'};
+  }
+}
+
+
+
+=head2 label
+
+ Title   : label
+ Usage   : $obj->label();
+ Function:
+
+            Sets and returns mutation event label(s).  If value is not
+            set, or no argument is given returns false.  Each
+            instantiable subclass of L<Bio::Variation::VariantI> needs
+            to implement this method. Valid values are listed in
+            'Mutation event controlled vocabulary' in
+            http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub label {
+    my ($self) = @_;
+    my ($o, $m, $type);
+    $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
+    $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
+
+    if ($self->start == 1 ) {
+	if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) {
+	    $type = 'no translation';
+	}
+	elsif ($o and $m and $o eq $m ) {
+	    $type = 'silent';
+	}
+	# more ...
+    }
+    elsif ($o and substr($o, 0, 1) eq '*' ) {
+	if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) {
+	    $type = 'post-elongation';
+	}
+	elsif ($m and $o eq $m ) {
+	    $type = 'silent, conservative';
+	}
+    }
+    elsif ($o and $m and $o eq $m) {
+	$type = 'silent, conservative';
+    }
+    elsif ($m and $m eq '*') {
+	$type = 'truncation';
+    }
+    elsif ($o and $m and $o eq $m) {
+	$type = 'silent, conservative';
+    }
+    elsif (not $m or
+	   ($o and $m and  length($o) > length($m) and
+	    substr($m, -1, 1) ne '*')) {
+	$type = 'deletion';
+	if ($o and $m and $o !~ $m and $o !~ $m) {
+	    $type .= ', complex';
+	}
+    }
+    elsif (not $o or
+	   ($o and $m and length($o) < length($m) and
+	    substr($m, -1, 1) ne '*' ) ) {
+	$type = 'insertion';
+	if ($o and $m and $o !~ $m and $o !~ $m) {
+	    $type .= ', complex';
+	}
+    }
+    elsif  ($o and $m and $o ne $m and
+	    length $o == 1 and  length $m  == 1 ) {
+	$type = 'substitution';
+	my $value = $self->similarity_score;
+	if (defined $value) {
+	    my $cons = ($value < 0) ? 'nonconservative' : 'conservative';
+	    $type .= ", ". $cons;
+	}
+    } else {
+	$type = 'out-of-frame translation, truncation';
+    }
+    $self->{'label'} = $type;
+    return $self->{'label'};
+}
+
+
+=head2 similarity_score
+
+ Title   : similarity_score
+ Usage   : $self->similarity_score
+ Function: Measure for evolutionary conservativeness
+           of single amino substitutions. Uses BLOSUM62.
+           Negative numbers are noncoservative changes.
+ Returns : integer, undef if not single amino acid change
+
+=cut
+
+sub similarity_score {
+    my ($self) = @_;
+    my ($o, $m, $type);
+    $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
+    $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
+    return undef unless $o and $m and length $o == 1 and length $m == 1;
+    return undef unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and
+	$m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i;
+    return $MATRIX->{"\U$o"}->{"\U$m"};
+}
+
+=head2 trivname
+
+ Title   : trivname
+ Usage   : $self->trivname
+ Function: 
+
+           Given a Bio::Variation::AAChange object with linked
+           Bio::Variation::RNAChange and Bio::Variation::DNAMutation
+           objects, this subroutine creates a string corresponding to
+           the 'trivial name' of the mutation. Trivial name is
+           specified in Antonorakis & MDI Nomenclature Working Group:
+           Human Mutation 11:1-3, 1998.
+           http://www.interscience.wiley.com/jpages/1059-7794/nomenclature.html
+
+ Returns : string
+
+=cut
+
+
+sub trivname {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'trivname'} = $value;
+    } else {
+	my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) =
+	    ('', '', '', '', '');
+	my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
+	#my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
+
+	$aaori = substr ($o, 0, 1) if $o;
+	$aaori =~ tr/\*/X/;
+
+	my $sep;
+	if ($self->isMutation) {
+	    $sep = '>';
+	} else {
+	    $sep = '|';
+	}
+	my $trivname = $aaori. $self->start;
+	$trivname .= $sep if $sep eq '|';
+
+	my @alleles = $self->each_Allele;
+	foreach my $allele (@alleles) {
+	    my $m = $allele->seq if $allele->seq;
+
+	    $self->allele_mut($allele);
+	    #$trivname .=  $sep. uc $m if $m;
+
+	    $aamutterm = substr ($m, -1, 1) if $m;
+	    if ($self->RNAChange->label =~ /initiation codon/ and
+		( $o and $m and $o ne $m)) {
+		$aamut = 'X';
+	    }
+	    elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) {
+		$aamutsymbol = '';
+		$aamut = $aamutterm;
+	    }
+	    elsif ($self->RNAChange->label =~ /deletion/) {
+		$aamutsymbol = 'del';
+		if ($aamutterm eq '*') {
+		    $aatermnumber = $self->start + length($m) -1;
+		    $aamut = 'X'. $aatermnumber;
+		}
+		if ($self->RNAChange  && $self->RNAChange->label =~ /inframe/){
+		    $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ;
+		}
+	    }
+	    elsif ($self->RNAChange->label =~ /insertion/) {
+		$aamutsymbol = 'ins';
+		if (($aamutterm eq '*') && (length($m)-1 != 0)) {
+		    $aatermnumber = $self->start + length($m)-1;
+		    $aamut =  $aatermnumber. 'X';
+		}
+		if ($self->RNAChange->label =~ /inframe/){
+		    $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ;
+		}
+	    }
+	    elsif ($self->RNAChange->label =~ /complex/ ) {
+		my $diff = length($m) - length($o);
+		if ($diff >= 0 ) {
+		    $aamutsymbol = 'ins';
+		} else {
+		    $aamutsymbol = 'del' ;
+		}
+		if (($aamutterm eq '*') && (length($m)-1 != 0)) {
+		    $aatermnumber = $self->start + length($m)-1;
+		    $aamut =  $aatermnumber. 'X';
+		}
+		if ($self->RNAChange->label =~ /inframe/){
+
+		    if ($diff >= 0 ) {
+			$aamut = '+'. $diff ;
+		    } else {
+			$aamut =  $diff ;
+		    }
+		}
+	    }
+	    elsif ($self->label =~ /truncation/) {
+		$aamut = $m;
+	    } else {
+		$aamutsymbol = '';
+		$aamut = $aamutterm;
+	    }
+	    $aamut =~ tr/\*/X/;
+	    $trivname .= $aamutsymbol. $aamut. $sep;
+	}
+	chop $trivname;
+	$self->{'trivname'} = $trivname;
+    }
+    return $self->{'trivname'};
+}
+
+1;