Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/AlignIO/clustalw.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: clustalw.pm,v 1.21 2002/10/22 07:38:25 lapp Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::AlignIO::clustalw | |
| 4 | |
| 5 # based on the Bio::SeqIO modules | |
| 6 # by Ewan Birney <birney@sanger.ac.uk> | |
| 7 # and Lincoln Stein <lstein@cshl.org> | |
| 8 # | |
| 9 # and the SimpleAlign.pm module of Ewan Birney | |
| 10 # | |
| 11 # Copyright Peter Schattner | |
| 12 # | |
| 13 # You may distribute this module under the same terms as perl itself | |
| 14 # _history | |
| 15 # September 5, 2000 | |
| 16 # POD documentation - main docs before the code | |
| 17 | |
| 18 =head1 NAME | |
| 19 | |
| 20 Bio::AlignIO::clustalw - clustalw sequence input/output stream | |
| 21 | |
| 22 =head1 SYNOPSIS | |
| 23 | |
| 24 Do not use this module directly. Use it via the Bio::AlignIO class. | |
| 25 | |
| 26 =head1 DESCRIPTION | |
| 27 | |
| 28 This object can transform Bio::Align::AlignI objects to and from clustalw flat | |
| 29 file databases. | |
| 30 | |
| 31 =head1 FEEDBACK | |
| 32 | |
| 33 =head2 Mailing Lists | |
| 34 | |
| 35 User feedback is an integral part of the evolution of this and other | |
| 36 Bioperl modules. Send your comments and suggestions preferably to one | |
| 37 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 38 | |
| 39 bioperl-l@bioperl.org - General discussion | |
| 40 http://bio.perl.org/MailList.html - About the mailing lists | |
| 41 | |
| 42 | |
| 43 =head2 Reporting Bugs | |
| 44 | |
| 45 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 46 the bugs and their resolution. Bug reports can be submitted via email | |
| 47 or the web: | |
| 48 | |
| 49 bioperl-bugs@bio.perl.org | |
| 50 http://bugzilla.bioperl.org/ | |
| 51 | |
| 52 =head1 AUTHORS - Peter Schattner | |
| 53 | |
| 54 Email: schattner@alum.mit.edu | |
| 55 | |
| 56 | |
| 57 =head1 APPENDIX | |
| 58 | |
| 59 The rest of the documentation details each of the object | |
| 60 methods. Internal methods are usually preceded with a _ | |
| 61 | |
| 62 =cut | |
| 63 | |
| 64 # Let the code begin... | |
| 65 | |
| 66 package Bio::AlignIO::clustalw; | |
| 67 use vars qw(@ISA $LINELENGTH); | |
| 68 use strict; | |
| 69 | |
| 70 use Bio::AlignIO; | |
| 71 use Bio::LocatableSeq; | |
| 72 use Bio::SimpleAlign; # to be Bio::Align::Simple | |
| 73 | |
| 74 $LINELENGTH = 60; | |
| 75 | |
| 76 @ISA = qw(Bio::AlignIO); | |
| 77 | |
| 78 =head2 new | |
| 79 | |
| 80 Title : new | |
| 81 Usage : $alignio = new Bio::AlignIO(-format => 'clustalw', | |
| 82 -file => 'filename'); | |
| 83 Function: returns a new Bio::AlignIO object to handle clustalw files | |
| 84 Returns : Bio::AlignIO::clustalw object | |
| 85 Args : -verbose => verbosity setting (-1,0,1,2) | |
| 86 -file => name of file to read in or with ">" - writeout | |
| 87 -fh => alternative to -file param - provide a filehandle | |
| 88 to read from/write to | |
| 89 -format => type of Alignment Format to process or produce | |
| 90 -percentages => (clustalw only) display a percentage of identity | |
| 91 in each line of the alignment. | |
| 92 | |
| 93 -linelength=> Set the alignment output line length (default 60) | |
| 94 | |
| 95 =cut | |
| 96 | |
| 97 sub _initialize { | |
| 98 my ($self, @args) = @_; | |
| 99 $self->SUPER::_initialize(@args); | |
| 100 my ($percentages, | |
| 101 $ll) = $self->_rearrange([qw(PERCENTAGES LINELENGTH)], @args); | |
| 102 defined $percentages && $self->percentages($percentages); | |
| 103 $self->line_length($ll || $LINELENGTH); | |
| 104 } | |
| 105 | |
| 106 =head2 next_aln | |
| 107 | |
| 108 Title : next_aln | |
| 109 Usage : $aln = $stream->next_aln() | |
| 110 Function: returns the next alignment in the stream | |
| 111 Returns : Bio::Align::AlignI object | |
| 112 Args : NONE | |
| 113 | |
| 114 See L<Bio::Align::AlignI> for details | |
| 115 | |
| 116 =cut | |
| 117 | |
| 118 sub next_aln { | |
| 119 my ($self) = @_; | |
| 120 | |
| 121 my $first_line; | |
| 122 if( defined ($first_line = $self->_readline ) | |
| 123 && $first_line !~ /CLUSTAL/ ) { | |
| 124 $self->warn("trying to parse a file which does not start with a CLUSTAL header"); | |
| 125 } | |
| 126 my %alignments; | |
| 127 my $aln = Bio::SimpleAlign->new(-source => 'clustalw'); | |
| 128 my $order = 0; | |
| 129 my %order; | |
| 130 $self->{_lastline} = ''; | |
| 131 while( defined ($_ = $self->_readline) ) { | |
| 132 next if ( /^\s+$/ ); | |
| 133 | |
| 134 my ($seqname, $aln_line) = ('', ''); | |
| 135 if( /^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ ) { | |
| 136 # clustal 1.4 format | |
| 137 ($seqname,$aln_line) = ("$1:$2-$3",$4); | |
| 138 } elsif( /^(\S+)\s+([A-Z\-]+)\s*$/ ) { | |
| 139 ($seqname,$aln_line) = ($1,$2); | |
| 140 } else { $self->{_lastline} = $_; next } | |
| 141 | |
| 142 if( !exists $order{$seqname} ) { | |
| 143 $order{$seqname} = $order++; | |
| 144 } | |
| 145 | |
| 146 $alignments{$seqname} .= $aln_line; | |
| 147 } | |
| 148 my ($sname,$start,$end); | |
| 149 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) { | |
| 150 if( $name =~ /(\S+):(\d+)-(\d+)/ ) { | |
| 151 ($sname,$start,$end) = ($1,$2,$3); | |
| 152 } else { | |
| 153 ($sname, $start) = ($name,1); | |
| 154 my $str = $alignments{$name}; | |
| 155 $str =~ s/[^A-Za-z]//g; | |
| 156 $end = length($str); | |
| 157 } | |
| 158 my $seq = new Bio::LocatableSeq('-seq' => $alignments{$name}, | |
| 159 '-id' => $sname, | |
| 160 '-start' => $start, | |
| 161 '-end' => $end); | |
| 162 $aln->add_seq($seq); | |
| 163 } | |
| 164 undef $aln if( !defined $end || $end <= 0); | |
| 165 return $aln; | |
| 166 } | |
| 167 | |
| 168 =head2 write_aln | |
| 169 | |
| 170 Title : write_aln | |
| 171 Usage : $stream->write_aln(@aln) | |
| 172 Function: writes the clustalw-format object (.aln) into the stream | |
| 173 Returns : 1 for success and 0 for error | |
| 174 Args : L<Bio::Align::AlignI> object | |
| 175 | |
| 176 | |
| 177 =cut | |
| 178 | |
| 179 sub write_aln { | |
| 180 my ($self,@aln) = @_; | |
| 181 my ($count,$length,$seq,@seq,$tempcount,$line_len); | |
| 182 $line_len = $self->line_length || $LINELENGTH; | |
| 183 foreach my $aln (@aln) { | |
| 184 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) { | |
| 185 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln"); | |
| 186 next; | |
| 187 } | |
| 188 my $matchline = $aln->match_line; | |
| 189 | |
| 190 $self->_print (sprintf("CLUSTAL W(1.81) multiple sequence alignment\n\n\n")) or return; | |
| 191 | |
| 192 $length = $aln->length(); | |
| 193 $count = $tempcount = 0; | |
| 194 @seq = $aln->each_seq(); | |
| 195 my $max = 22; | |
| 196 foreach $seq ( @seq ) { | |
| 197 $max = length ($aln->displayname($seq->get_nse())) | |
| 198 if( length ($aln->displayname($seq->get_nse())) > $max ); | |
| 199 } | |
| 200 while( $count < $length ) { | |
| 201 foreach $seq ( @seq ) { | |
| 202 # | |
| 203 # Following lines are to suppress warnings | |
| 204 # if some sequences in the alignment are much longer than others. | |
| 205 | |
| 206 my ($substring); | |
| 207 my $seqchars = $seq->seq(); | |
| 208 SWITCH: { | |
| 209 if (length($seqchars) >= ($count + $line_len)) { | |
| 210 $substring = substr($seqchars,$count,$line_len); | |
| 211 last SWITCH; | |
| 212 } elsif (length($seqchars) >= $count) { | |
| 213 $substring = substr($seqchars,$count); | |
| 214 last SWITCH; | |
| 215 } | |
| 216 $substring = ""; | |
| 217 } | |
| 218 | |
| 219 $self->_print (sprintf("%-".$max."s %s\n", | |
| 220 $aln->displayname($seq->get_nse()), | |
| 221 $substring)) or return; | |
| 222 } | |
| 223 | |
| 224 my $linesubstr = substr($matchline, $count,$line_len); | |
| 225 my $percentages = ''; | |
| 226 if( $self->percentages ) { | |
| 227 my ($strcpy) = ($linesubstr); | |
| 228 my $count = ($strcpy =~ tr/\*//); | |
| 229 $percentages = sprintf("\t%d%%", 100 * ($count / length($linesubstr))); | |
| 230 } | |
| 231 $self->_print (sprintf("%-".$max."s %s%s\n", '', $linesubstr, | |
| 232 $percentages)); | |
| 233 $self->_print (sprintf("\n\n")) or return; | |
| 234 $count += $line_len; | |
| 235 } | |
| 236 } | |
| 237 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
| 238 return 1; | |
| 239 } | |
| 240 | |
| 241 =head2 percentages | |
| 242 | |
| 243 Title : percentages | |
| 244 Usage : $obj->percentages($newval) | |
| 245 Function: Set the percentages flag - whether or not to show percentages in | |
| 246 each output line | |
| 247 Returns : value of percentages | |
| 248 Args : newvalue (optional) | |
| 249 | |
| 250 | |
| 251 =cut | |
| 252 | |
| 253 sub percentages { | |
| 254 my ($self,$value) = @_; | |
| 255 if( defined $value) { | |
| 256 $self->{'_percentages'} = $value; | |
| 257 } | |
| 258 return $self->{'_percentages'}; | |
| 259 } | |
| 260 | |
| 261 =head2 line_length | |
| 262 | |
| 263 Title : line_length | |
| 264 Usage : $obj->line_length($newval) | |
| 265 Function: Set the alignment output line length | |
| 266 Returns : value of line_length | |
| 267 Args : newvalue (optional) | |
| 268 | |
| 269 | |
| 270 =cut | |
| 271 | |
| 272 sub line_length { | |
| 273 my ($self,$value) = @_; | |
| 274 if( defined $value) { | |
| 275 $self->{'_line_length'} = $value; | |
| 276 } | |
| 277 return $self->{'_line_length'}; | |
| 278 } | |
| 279 | |
| 280 1; |
