Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/AlignIO/clustalw.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/AlignIO/clustalw.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,280 @@ +# $Id: clustalw.pm,v 1.21 2002/10/22 07:38:25 lapp Exp $ +# +# BioPerl module for Bio::AlignIO::clustalw + +# based on the Bio::SeqIO modules +# by Ewan Birney <birney@sanger.ac.uk> +# and Lincoln Stein <lstein@cshl.org> +# +# and the SimpleAlign.pm module of Ewan Birney +# +# Copyright Peter Schattner +# +# You may distribute this module under the same terms as perl itself +# _history +# September 5, 2000 +# POD documentation - main docs before the code + +=head1 NAME + +Bio::AlignIO::clustalw - clustalw sequence input/output stream + +=head1 SYNOPSIS + +Do not use this module directly. Use it via the Bio::AlignIO class. + +=head1 DESCRIPTION + +This object can transform Bio::Align::AlignI objects to and from clustalw flat +file databases. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHORS - Peter Schattner + +Email: schattner@alum.mit.edu + + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::AlignIO::clustalw; +use vars qw(@ISA $LINELENGTH); +use strict; + +use Bio::AlignIO; +use Bio::LocatableSeq; +use Bio::SimpleAlign; # to be Bio::Align::Simple + +$LINELENGTH = 60; + +@ISA = qw(Bio::AlignIO); + +=head2 new + + Title : new + Usage : $alignio = new Bio::AlignIO(-format => 'clustalw', + -file => 'filename'); + Function: returns a new Bio::AlignIO object to handle clustalw files + Returns : Bio::AlignIO::clustalw object + Args : -verbose => verbosity setting (-1,0,1,2) + -file => name of file to read in or with ">" - writeout + -fh => alternative to -file param - provide a filehandle + to read from/write to + -format => type of Alignment Format to process or produce + -percentages => (clustalw only) display a percentage of identity + in each line of the alignment. + + -linelength=> Set the alignment output line length (default 60) + +=cut + +sub _initialize { + my ($self, @args) = @_; + $self->SUPER::_initialize(@args); + my ($percentages, + $ll) = $self->_rearrange([qw(PERCENTAGES LINELENGTH)], @args); + defined $percentages && $self->percentages($percentages); + $self->line_length($ll || $LINELENGTH); +} + +=head2 next_aln + + Title : next_aln + Usage : $aln = $stream->next_aln() + Function: returns the next alignment in the stream + Returns : Bio::Align::AlignI object + Args : NONE + +See L<Bio::Align::AlignI> for details + +=cut + +sub next_aln { + my ($self) = @_; + + my $first_line; + if( defined ($first_line = $self->_readline ) + && $first_line !~ /CLUSTAL/ ) { + $self->warn("trying to parse a file which does not start with a CLUSTAL header"); + } + my %alignments; + my $aln = Bio::SimpleAlign->new(-source => 'clustalw'); + my $order = 0; + my %order; + $self->{_lastline} = ''; + while( defined ($_ = $self->_readline) ) { + next if ( /^\s+$/ ); + + my ($seqname, $aln_line) = ('', ''); + if( /^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ ) { + # clustal 1.4 format + ($seqname,$aln_line) = ("$1:$2-$3",$4); + } elsif( /^(\S+)\s+([A-Z\-]+)\s*$/ ) { + ($seqname,$aln_line) = ($1,$2); + } else { $self->{_lastline} = $_; next } + + if( !exists $order{$seqname} ) { + $order{$seqname} = $order++; + } + + $alignments{$seqname} .= $aln_line; + } + my ($sname,$start,$end); + foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) { + if( $name =~ /(\S+):(\d+)-(\d+)/ ) { + ($sname,$start,$end) = ($1,$2,$3); + } else { + ($sname, $start) = ($name,1); + my $str = $alignments{$name}; + $str =~ s/[^A-Za-z]//g; + $end = length($str); + } + my $seq = new Bio::LocatableSeq('-seq' => $alignments{$name}, + '-id' => $sname, + '-start' => $start, + '-end' => $end); + $aln->add_seq($seq); + } + undef $aln if( !defined $end || $end <= 0); + return $aln; +} + +=head2 write_aln + + Title : write_aln + Usage : $stream->write_aln(@aln) + Function: writes the clustalw-format object (.aln) into the stream + Returns : 1 for success and 0 for error + Args : L<Bio::Align::AlignI> object + + +=cut + +sub write_aln { + my ($self,@aln) = @_; + my ($count,$length,$seq,@seq,$tempcount,$line_len); + $line_len = $self->line_length || $LINELENGTH; + foreach my $aln (@aln) { + if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) { + $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln"); + next; + } + my $matchline = $aln->match_line; + + $self->_print (sprintf("CLUSTAL W(1.81) multiple sequence alignment\n\n\n")) or return; + + $length = $aln->length(); + $count = $tempcount = 0; + @seq = $aln->each_seq(); + my $max = 22; + foreach $seq ( @seq ) { + $max = length ($aln->displayname($seq->get_nse())) + if( length ($aln->displayname($seq->get_nse())) > $max ); + } + while( $count < $length ) { + foreach $seq ( @seq ) { +# +# Following lines are to suppress warnings +# if some sequences in the alignment are much longer than others. + + my ($substring); + my $seqchars = $seq->seq(); + SWITCH: { + if (length($seqchars) >= ($count + $line_len)) { + $substring = substr($seqchars,$count,$line_len); + last SWITCH; + } elsif (length($seqchars) >= $count) { + $substring = substr($seqchars,$count); + last SWITCH; + } + $substring = ""; + } + + $self->_print (sprintf("%-".$max."s %s\n", + $aln->displayname($seq->get_nse()), + $substring)) or return; + } + + my $linesubstr = substr($matchline, $count,$line_len); + my $percentages = ''; + if( $self->percentages ) { + my ($strcpy) = ($linesubstr); + my $count = ($strcpy =~ tr/\*//); + $percentages = sprintf("\t%d%%", 100 * ($count / length($linesubstr))); + } + $self->_print (sprintf("%-".$max."s %s%s\n", '', $linesubstr, + $percentages)); + $self->_print (sprintf("\n\n")) or return; + $count += $line_len; + } + } + $self->flush if $self->_flush_on_write && defined $self->_fh; + return 1; +} + +=head2 percentages + + Title : percentages + Usage : $obj->percentages($newval) + Function: Set the percentages flag - whether or not to show percentages in + each output line + Returns : value of percentages + Args : newvalue (optional) + + +=cut + +sub percentages { + my ($self,$value) = @_; + if( defined $value) { + $self->{'_percentages'} = $value; + } + return $self->{'_percentages'}; +} + +=head2 line_length + + Title : line_length + Usage : $obj->line_length($newval) + Function: Set the alignment output line length + Returns : value of line_length + Args : newvalue (optional) + + +=cut + +sub line_length { + my ($self,$value) = @_; + if( defined $value) { + $self->{'_line_length'} = $value; + } + return $self->{'_line_length'}; +} + +1;