diff variant_effect_predictor/Bio/AlignIO/fasta.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/AlignIO/fasta.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,199 @@
+# $Id: fasta.pm,v 1.11 2002/12/14 19:09:05 birney Exp $
+#
+# BioPerl module for Bio::AlignIO::fasta
+
+#	based on the Bio::SeqIO::fasta module
+#       by Ewan Birney <birney@sanger.ac.uk>
+#       and Lincoln Stein  <lstein@cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::fasta - FastA MSA Sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::SimpleAlign> objects to and from
+fasta flat file databases.  This is for the fasta sequence format NOT
+FastA analysis program.  To process the pairwise alignments from a
+FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner@alum.mit.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::fasta;
+use vars qw(@ISA);
+use strict;
+
+use Bio::AlignIO;
+use Bio::SimpleAlign;
+
+@ISA = qw(Bio::AlignIO);
+
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
+	    or on error
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $entry;
+    my ($start,$end,$name,$seqname,$seq,$seqchar,$tempname,%align);
+    my $aln =  Bio::SimpleAlign->new();
+
+    while(defined ($entry = $self->_readline)) {
+	if($entry =~ /^>(\S+)/ ) {
+	    $tempname = $1;
+	    if( defined $name ) {
+		# put away last name and sequence
+
+		if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
+		    $seqname = $1;
+		    $start = $2;
+		    $end = $3;
+		} else {
+		    $seqname=$name;
+		    $start = 1;
+		    $end = length($seqchar);   #ps 9/6/00
+		}
+#		print STDERR  "Going to add with $seqchar $seqname\n";
+		$seq = new Bio::LocatableSeq('-seq'=>$seqchar,
+				    '-id'=>$seqname,
+				    '-start'=>$start,
+				    '-end'=>$end,
+				    );
+
+		$aln->add_seq($seq);
+	     }
+	     $name = $tempname;
+	     $seqchar  = "";
+	     next;
+	}
+	$entry =~ s/[^A-Za-z\.\-]//g;
+	$seqchar .= $entry;
+
+    }
+#
+#  Next two lines are to silence warnings that
+#  otherwise occur at EOF when using <$fh>
+
+   if (!defined $name) {$name="";}
+   if (!defined $seqchar) {$seqchar="";}
+
+#  Put away last name and sequence
+    if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
+	$seqname = $1;
+	$start = $2;
+	$end = $3;
+    } else {
+	$seqname=$name;
+	$start = 1;
+	$end = length($seqchar);   #ps 9/6/00
+#	$end = length($align{$name});
+    }
+
+
+#  If $end <= 0, we have either reached the end of
+#  file in <> or we have encountered some other error
+#
+   if ($end <= 0) { undef $aln; return $aln;}
+
+# This logic now also reads empty lines at the 
+# end of the file. Skip this is seqchar and seqname is null
+    if( length($seqchar) == 0 && length($seqname) == 0 ) {
+	# skip
+    } else {
+#	print STDERR "end to add with $seqchar $seqname\n";
+	$seq = new Bio::LocatableSeq('-seq'=>$seqchar,
+			'-id'=>$seqname,
+			'-start'=>$start,
+			'-end'=>$end,
+			);
+
+	$aln->add_seq($seq);
+    }
+
+    return $aln;
+
+}
+	
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in fasta format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+    my ($self,@aln) = @_;
+    my ($seq,$rseq,$name,$count,$length,$seqsub);
+
+    foreach my $aln (@aln) {
+	if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) { 
+	    $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	    next;
+	}
+	foreach $rseq ( $aln->each_seq() ) {
+	    $name = $aln->displayname($rseq->get_nse());
+	    $seq  = $rseq->seq();	
+	    $self->_print (">$name\n") or return ;	
+	    $count =0;
+	    $length = length($seq);
+	    while( ($count * 60 ) < $length ) {
+		$seqsub = substr($seq,$count*60,60);
+		$self->_print ("$seqsub\n") or return ;
+		$count++;
+	    }
+	}
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;