diff variant_effect_predictor/Bio/AlignIO/msf.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/AlignIO/msf.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,242 @@
+# $Id: msf.pm,v 1.16 2002/11/26 16:34:39 jason Exp $
+#
+# BioPerl module for Bio::AlignIO::msf
+
+#	based on the Bio::SeqIO::msf module
+#       by Ewan Birney <birney@sanger.ac.uk>
+#       and Lincoln Stein  <lstein@cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::msf - msf sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::Align::AlignI> objects to and from msf flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.
+ Bug reports can be submitted via email or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner@alum.mit.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::msf;
+use vars qw(@ISA %valid_type);
+use strict;
+
+use Bio::AlignIO;
+use Bio::SeqIO::gcg; # for GCG_checksum()
+use Bio::SimpleAlign;
+
+@ISA = qw(Bio::AlignIO);
+
+BEGIN {
+    %valid_type = qw( dna N rna N protein P );
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream. Tries to read *all* MSF
+          It reads all non whitespace characters in the alignment
+          area. For MSFs with weird gaps (eg ~~~) map them by using
+          $al->map_chars('~','-')
+ Returns : L<Bio::Align::AlignI> object
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $entry;
+    my (%hash,$name,$str,@names,$seqname,$start,$end,$count,$seq);
+
+    my $aln =  Bio::SimpleAlign->new(-source => 'gcg' );
+
+    while( $entry = $self->_readline) {
+        $entry =~ /\/\// && last; # move to alignment section
+	$entry =~ /Name:\s+(\S+)/ && do { $name = $1;
+					  $hash{$name} = ""; # blank line
+					  push(@names,$name); # we need it ordered!
+				       };
+       # otherwise - skip
+    }
+
+   # alignment section
+
+   while( $entry = $self->_readline) {
+       next if ( $entry =~ /^\s+(\d+)/ ) ;
+       $entry =~ /^\s*(\S+)\s+(.*)$/ && do {
+	   $name = $1;
+	   $str = $2;
+	   if( ! exists $hash{$name} ) {
+	       $self->throw("$name exists as an alignment line but not in the header. Not confident of what is going on!");
+	   }
+	   $str =~ s/\s//g;
+	   $hash{$name} .= $str;
+       };
+   }
+
+   return 0 if scalar @names < 1;
+
+   # now got this as a name - sequence hash. Lets make some sequences!
+
+   foreach $name ( @names ) {
+       if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
+	   $seqname = $1;
+	   $start = $2;
+	   $end = $3;
+       } else {
+	   $seqname=$name;
+	   $start = 1;
+	   $str = $hash{$name};
+	   $str =~ s/[^A-Za-z]//g;
+	   $end = length($str);
+       }
+
+       $seq = new Bio::LocatableSeq('-seq'=>$hash{$name},
+				    '-id'=>$seqname,
+				    '-start'=>$start,
+				    '-end'=>$end,
+				    );
+
+       $aln->add_seq($seq);
+
+
+#  If $end <= 0, we have either reached the end of
+#  file in <> or we have encountered some other error
+#
+#   if ($end <= 0) { undef $aln;}
+
+
+   }
+
+   return $aln;
+}
+
+
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in MSF format
+           Sequence type of the alignment is determined by the first sequence.
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+    my ($self,@aln) = @_;
+    my $msftag;
+    my $type;
+    my $count = 0;
+    my $maxname;
+    my ($length,$date,$name,$seq,$miss,$pad,%hash,@arr,$tempcount,$index);
+    foreach my $aln (@aln) {
+	if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) { 
+	    $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	    next;
+	}
+	$date = localtime(time);
+	$msftag = "MSF";
+	$type = $valid_type{$aln->get_seq_by_pos(1)->alphabet};
+	$maxname = $aln->maxdisplayname_length();
+	$length  = $aln->length();
+	$name = $aln->id();
+	if( !defined $name ) {
+	    $name = "Align";
+	}
+
+
+	$self->_print (sprintf("\n%s   MSF: %d  Type: %s  %s  Check: 00 ..\n\n", 
+			       $name,  $aln->no_sequences, $type, $date));
+
+
+	foreach $seq ( $aln->each_seq() ) {
+
+
+	    $name = $aln->displayname($seq->get_nse());
+	    $miss = $maxname - length ($name);
+	    $miss += 2;
+	    $pad  = " " x $miss;
+
+	    $self->_print (sprintf(" Name: %s%sLen:    %d  Check:  %d  Weight:  1.00\n",$name,$pad,length $seq->seq(), Bio::SeqIO::gcg->GCG_checksum($seq)));
+
+	    $hash{$name} = $seq->seq();
+	    push(@arr,$name);
+	}
+    	# ok - heavy handed, but there you go.
+    	#
+    	$self->_print ("\n//\n\n\n");
+
+    	while( $count < $length ) {	
+	    # there is another block to go!
+	    foreach $name  ( @arr ) {
+	    	$self->_print (sprintf("%-20s  ",$name));
+
+	    	$tempcount = $count;
+	    	$index = 0;
+	    	while( ($tempcount + 10 < $length) && ($index < 5)  ) {
+
+		    $self->_print (sprintf("%s ",substr($hash{$name},$tempcount,10)));
+
+		    $tempcount += 10;
+		    $index++;
+	    	}	    	#
+	    	# ok, could be the very last guy ;)
+	    	#
+	    	if( $index < 5) {
+		    # space to print!
+		    #
+		    $self->_print (sprintf("%s ",substr($hash{$name},$tempcount)));
+		    $tempcount += 10;
+	    	}
+	    	$self->_print ("\n");
+	    }
+	    $self->_print ("\n\n");
+	    $count = $tempcount;
+    	}     			
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;