diff variant_effect_predictor/Bio/Tools/OddCodes.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Tools/OddCodes.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,443 @@
+#$Id: OddCodes.pm,v 1.10.2.1 2003/04/07 04:27:42 heikki Exp $
+#-----------------------------------------------------------------------------
+# PACKAGE    : OddCodes.pm
+# PURPOSE    : To write amino acid sequences in alternative alphabets
+# AUTHOR     : Derek Gatherer (D.Gatherer@organon.nhe.akzonobel.nl)
+# SOURCE     : 
+# CREATED    : 8th July 2000
+# MODIFIED   : 
+# DISCLAIMER : I am employed in the pharmaceutical industry but my 
+#            : employers do not endorse or sponsor this module
+#            : in any way whatsoever.  The above email address is
+#            : given purely for the purpose of easy communication
+#            : with the author, and does not imply any connection
+#	     : between my employers and anything written below.
+# LICENCE    : You may distribute this module under the same terms 
+#	     : as the rest of BioPerl.
+#----------------------------------------------------------------------------
+
+=head1 NAME
+
+Bio::Tools::OddCodes - Object holding alternative alphabet coding for 
+one protein sequence
+
+=head1 SYNOPSIS
+
+Take a sequence object from eg, an inputstream, and creates an object
+for the purposes of rewriting that sequence in another alphabet.
+These are abbreviated amino acid sequence alphabets, designed to
+simplify the statistical aspects of analysing protein sequences, by
+reducing the combinatorial explosion of the 20-letter alphabet.  These
+abbreviated alphabets range in size from 2 to 8.
+
+Creating the OddCodes object, eg:
+
+	my $inputstream = Bio::SeqIO->new( '-file' => "seqfile", 
+                                           '-format' => 'Fasta');
+	my $seqobj = $inputstream->next_seq();
+	my $oddcode_obj = Bio::Tools::Oddcodes->new(-seq => $seqobj);
+
+or:
+
+	my $seqobj = Bio::PrimarySeq->new
+              (-seq=>'[cut and paste a sequence here]',
+               -alphabet => 'protein',
+               -id => 'test');
+	my $oddcode_obj  =  Bio::Tools::OddCodes->new(-seq => $seqobj);
+
+do the alternative coding, returning the answer as a reference to a string
+
+	my $output = $oddcode_obj->structural();
+	my $output = $oddcode_obj->chemical();
+	my $output = $oddcode_obj->functional();
+	my $output = $oddcode_obj->charge();
+	my $output = $oddcode_obj->hydrophobic();
+	my $output = $oddcode_obj->Dayhoff();
+	my $output = $oddcode_obj->Sneath();
+	my $output = $oddcode_obj->Stanfel();
+
+
+display sequence in new form, eg:
+
+	my $new_coding = $$output;
+	print "\n$new_coding";
+
+=head1 DESCRIPTION
+
+Bio::Tools::Oddcodes is a welterweight object for rewriting a protein
+sequence in an alternative alphabet.  8 of these are provided, ranging
+from the the 2-letter hydrophobic alphabet, to the 8-letter chemical
+alphabet.  These are useful for the statistical analysis of protein
+sequences since they can partially avoid the combinatorial explosion
+produced by the full 20-letter alphabet (eg. 400 dimers, 8000 trimers
+etc.)
+
+The objects will print out a warning if the input sequence is not a
+protein. If you know what you are doing, you can silence the warning
+by setting verbose() to a negetive value.
+
+See Synopsis above for object creation code.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                - General discussion
+  http://www.bioperl.org/MailList.html - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bioperl.org
+  http://www.bugzilla.bioperl.org/
+
+=head1 AUTHOR
+
+Derek Gatherer
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=cut
+
+#'
+
+package Bio::Tools::OddCodes;
+use vars qw(@ISA);
+use strict;
+
+use Bio::Root::Root;
+
+@ISA = qw(Bio::Root::Root);
+
+
+sub new 
+{
+    my($class,@args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    my ($seqobj) = $self->_rearrange([qw(SEQ)],@args);
+    if((! defined($seqobj)) && @args && ref($args[0])) {
+	# parameter not passed as named parameter?
+	$seqobj = $args[0];
+    }
+    unless  ($seqobj->isa("Bio::PrimarySeqI")) 
+    {
+	die("die in _init, OddCodes works only on PrimarySeqI
+objects\n");
+    }
+
+    $self->{'_seqref'} = $seqobj;
+
+    return $self;
+}
+
+=head2 structural
+
+ Title   : structural
+ Usage   : $output = $oddcode_obj->structural(); 
+ Function: turns amino acid sequence into 3-letter structural alphabet
+	 : A (ambivalent), E (external), I (internal)
+ Example : a sequence ACDEFGH will become AAEEIAE
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub structural()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);	# see _pullseq() below
+
+# now the real business
+	
+	$seqstring =~ tr/[ACGPSTWY]/1/;
+	$seqstring =~ tr/[RNDQEHK]/2/;
+	$seqstring =~ tr/[ILMFV]/3/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/E/;
+	$seqstring =~ tr/3/I/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 functional
+
+ Title   : functional
+ Usage   : $output = $oddcode_obj->functional(); 
+ Function: turns amino acid sequence into 4-letter functional alphabet
+	 : A (acidic), C (basic), H (hydrophobic), P (polar)
+ Example : a sequence ACDEFGH will become HPAAHHC 
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub functional()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+	
+	$seqstring =~ tr/[DE]/1/;
+	$seqstring =~ tr/[HKR]/2/;
+	$seqstring =~ tr/[AFILMPVW]/3/;
+	$seqstring =~ tr/[CGNQSTY]/4/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/H/;
+	$seqstring =~ tr/4/P/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 hydrophobic
+
+ Title   : hydrophobic
+ Usage   : $output = $oddcode_obj->hydrophobic(); 
+ Function: turns amino acid sequence into 2-letter hydrophobicity alphabet
+	 : O (hydrophobic), I (hydrophilic)
+ Example : a sequence ACDEFGH will become OIIIOII
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub hydrophobic()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+	
+	$seqstring =~ tr/[AFILMPVW]/1/;
+	$seqstring =~ tr/[CDEGHKNQRSTY]/2/;
+	$seqstring =~ tr/1/I/;
+	$seqstring =~ tr/2/O/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 Dayhoff
+
+ Title   : Dayhoff
+ Usage   : $output = $oddcode_obj->Dayhoff(); 
+ Function: turns amino acid sequence into 6-letter Dayhoff alphabet
+ Example : a sequence ACDEFGH will become CADDGCE 
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub Dayhoff()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+	
+	$seqstring =~ tr/[C]/1/;
+	$seqstring =~ tr/[AGPST]/2/;
+	$seqstring =~ tr/[DENQ]/3/;
+	$seqstring =~ tr/[HKR]/4/;
+	$seqstring =~ tr/[ILMV]/5/;
+	$seqstring =~ tr/[FWY]/6/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/D/;
+	$seqstring =~ tr/4/E/;
+	$seqstring =~ tr/5/F/;
+	$seqstring =~ tr/6/G/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 Sneath
+
+ Title   : Sneath
+ Usage   : $output = $oddcode_obj->Sneath(); 
+ Function: turns amino acid sequence into 7-letter Sneath alphabet
+ Example : a sequence ACDEFGH will become CEFFHCF
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub Sneath()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+	
+	$seqstring =~ tr/[ILV]/1/;
+	$seqstring =~ tr/[AGP]/2/;
+	$seqstring =~ tr/[MNQ]/3/;
+	$seqstring =~ tr/[CST]/4/;
+	$seqstring =~ tr/[DE]/5/;
+	$seqstring =~ tr/[KR]/6/;
+	$seqstring =~ tr/[FHWY]/7/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/D/;
+	$seqstring =~ tr/4/E/;
+	$seqstring =~ tr/5/F/;
+	$seqstring =~ tr/6/G/;
+	$seqstring =~ tr/7/H/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 Stanfel
+
+ Title   : Stanfel
+ Usage   : $output = $oddcode_obj->Stanfel(); 
+ Function: turns amino acid sequence into 4-letter Stanfel alphabet
+ Example : a sequence ACDEFGH will become AACCDAE
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub Stanfel()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+	
+	$seqstring =~ tr/[ACGILMPSTV]/1/;
+	$seqstring =~ tr/[DENQ]/2/;
+	$seqstring =~ tr/[FWY]/3/;
+	$seqstring =~ tr/[HKR]/4/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/D/;
+	$seqstring =~ tr/4/E/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 chemical()
+
+ Title   : chemical
+ Usage   : $output = $oddcode_obj->chemical(); 
+ Function: turns amino acid sequence into 8-letter chemical alphabet
+	 : A (acidic), L (aliphatic), M (amide), R (aromatic)
+	 : C (basic), H (hydroxyl), I (imino), S (sulphur)
+ Example : a sequence ACDEFGH will become LSAARAC 
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub chemical()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+	
+	$seqstring =~ tr/[DE]/1/;
+	$seqstring =~ tr/[AGILV]/2/;
+	$seqstring =~ tr/[NQ]/3/;
+	$seqstring =~ tr/[FWY]/4/;
+	$seqstring =~ tr/[RHK]/5/;
+	$seqstring =~ tr/[ST]/6/;
+	$seqstring =~ tr/P/7/;
+	$seqstring =~ tr/[CM]/8/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/L/;
+	$seqstring =~ tr/3/M/;
+	$seqstring =~ tr/4/R/;
+	$seqstring =~ tr/5/C/;
+	$seqstring =~ tr/6/H/;
+	$seqstring =~ tr/7/I/;
+	$seqstring =~ tr/8/S/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 charge
+
+ Title   : charge
+ Usage   : $output = $oddcode_obj->charge(); 
+ Function: turns amino acid sequence into 3-letter charge alphabet
+ Example : a sequence ACDEFGH will become NNAANNC 
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub charge()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+	
+	$seqstring =~ tr/[DE]/1/;
+	$seqstring =~ tr/[HKR]/2/;
+	$seqstring =~ tr/[ACFGILMNPQSTVWY]/3/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/N/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+# _pullseq is called within each of the subroutines
+# it just checks a few things and returns the sequence
+
+sub _pullseq
+{
+	my $self = $_[0];
+
+	my $seqobj =  $self->{'_seqref'};
+	
+	unless  ($seqobj->isa("Bio::PrimarySeqI")) 
+	{
+		die("die, OddCodes works only on PrimarySeqI objects\n");
+    	}
+        $self->warn("\tAll OddCode alphabets need a protein sequence,\n".
+                    "\tbut BioPerl thinks this is not: [". $seqobj->id. "]")
+            unless $seqobj->alphabet eq 'protein' or $self->verbose < 0;;
+
+	my $seqstring = uc $seqobj->seq();
+
+	if(length($seqstring)<1)
+	{
+		die("$seqstring: die, sequence has zero length\n");
+	}
+	return $seqstring;
+}
+
+1;