comparison variant_effect_predictor/Bio/AlignIO/clustalw.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: clustalw.pm,v 1.21 2002/10/22 07:38:25 lapp Exp $
2 #
3 # BioPerl module for Bio::AlignIO::clustalw
4
5 # based on the Bio::SeqIO modules
6 # by Ewan Birney <birney@sanger.ac.uk>
7 # and Lincoln Stein <lstein@cshl.org>
8 #
9 # and the SimpleAlign.pm module of Ewan Birney
10 #
11 # Copyright Peter Schattner
12 #
13 # You may distribute this module under the same terms as perl itself
14 # _history
15 # September 5, 2000
16 # POD documentation - main docs before the code
17
18 =head1 NAME
19
20 Bio::AlignIO::clustalw - clustalw sequence input/output stream
21
22 =head1 SYNOPSIS
23
24 Do not use this module directly. Use it via the Bio::AlignIO class.
25
26 =head1 DESCRIPTION
27
28 This object can transform Bio::Align::AlignI objects to and from clustalw flat
29 file databases.
30
31 =head1 FEEDBACK
32
33 =head2 Mailing Lists
34
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to one
37 of the Bioperl mailing lists. Your participation is much appreciated.
38
39 bioperl-l@bioperl.org - General discussion
40 http://bio.perl.org/MailList.html - About the mailing lists
41
42
43 =head2 Reporting Bugs
44
45 Report bugs to the Bioperl bug tracking system to help us keep track
46 the bugs and their resolution. Bug reports can be submitted via email
47 or the web:
48
49 bioperl-bugs@bio.perl.org
50 http://bugzilla.bioperl.org/
51
52 =head1 AUTHORS - Peter Schattner
53
54 Email: schattner@alum.mit.edu
55
56
57 =head1 APPENDIX
58
59 The rest of the documentation details each of the object
60 methods. Internal methods are usually preceded with a _
61
62 =cut
63
64 # Let the code begin...
65
66 package Bio::AlignIO::clustalw;
67 use vars qw(@ISA $LINELENGTH);
68 use strict;
69
70 use Bio::AlignIO;
71 use Bio::LocatableSeq;
72 use Bio::SimpleAlign; # to be Bio::Align::Simple
73
74 $LINELENGTH = 60;
75
76 @ISA = qw(Bio::AlignIO);
77
78 =head2 new
79
80 Title : new
81 Usage : $alignio = new Bio::AlignIO(-format => 'clustalw',
82 -file => 'filename');
83 Function: returns a new Bio::AlignIO object to handle clustalw files
84 Returns : Bio::AlignIO::clustalw object
85 Args : -verbose => verbosity setting (-1,0,1,2)
86 -file => name of file to read in or with ">" - writeout
87 -fh => alternative to -file param - provide a filehandle
88 to read from/write to
89 -format => type of Alignment Format to process or produce
90 -percentages => (clustalw only) display a percentage of identity
91 in each line of the alignment.
92
93 -linelength=> Set the alignment output line length (default 60)
94
95 =cut
96
97 sub _initialize {
98 my ($self, @args) = @_;
99 $self->SUPER::_initialize(@args);
100 my ($percentages,
101 $ll) = $self->_rearrange([qw(PERCENTAGES LINELENGTH)], @args);
102 defined $percentages && $self->percentages($percentages);
103 $self->line_length($ll || $LINELENGTH);
104 }
105
106 =head2 next_aln
107
108 Title : next_aln
109 Usage : $aln = $stream->next_aln()
110 Function: returns the next alignment in the stream
111 Returns : Bio::Align::AlignI object
112 Args : NONE
113
114 See L<Bio::Align::AlignI> for details
115
116 =cut
117
118 sub next_aln {
119 my ($self) = @_;
120
121 my $first_line;
122 if( defined ($first_line = $self->_readline )
123 && $first_line !~ /CLUSTAL/ ) {
124 $self->warn("trying to parse a file which does not start with a CLUSTAL header");
125 }
126 my %alignments;
127 my $aln = Bio::SimpleAlign->new(-source => 'clustalw');
128 my $order = 0;
129 my %order;
130 $self->{_lastline} = '';
131 while( defined ($_ = $self->_readline) ) {
132 next if ( /^\s+$/ );
133
134 my ($seqname, $aln_line) = ('', '');
135 if( /^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ ) {
136 # clustal 1.4 format
137 ($seqname,$aln_line) = ("$1:$2-$3",$4);
138 } elsif( /^(\S+)\s+([A-Z\-]+)\s*$/ ) {
139 ($seqname,$aln_line) = ($1,$2);
140 } else { $self->{_lastline} = $_; next }
141
142 if( !exists $order{$seqname} ) {
143 $order{$seqname} = $order++;
144 }
145
146 $alignments{$seqname} .= $aln_line;
147 }
148 my ($sname,$start,$end);
149 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
150 if( $name =~ /(\S+):(\d+)-(\d+)/ ) {
151 ($sname,$start,$end) = ($1,$2,$3);
152 } else {
153 ($sname, $start) = ($name,1);
154 my $str = $alignments{$name};
155 $str =~ s/[^A-Za-z]//g;
156 $end = length($str);
157 }
158 my $seq = new Bio::LocatableSeq('-seq' => $alignments{$name},
159 '-id' => $sname,
160 '-start' => $start,
161 '-end' => $end);
162 $aln->add_seq($seq);
163 }
164 undef $aln if( !defined $end || $end <= 0);
165 return $aln;
166 }
167
168 =head2 write_aln
169
170 Title : write_aln
171 Usage : $stream->write_aln(@aln)
172 Function: writes the clustalw-format object (.aln) into the stream
173 Returns : 1 for success and 0 for error
174 Args : L<Bio::Align::AlignI> object
175
176
177 =cut
178
179 sub write_aln {
180 my ($self,@aln) = @_;
181 my ($count,$length,$seq,@seq,$tempcount,$line_len);
182 $line_len = $self->line_length || $LINELENGTH;
183 foreach my $aln (@aln) {
184 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
185 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
186 next;
187 }
188 my $matchline = $aln->match_line;
189
190 $self->_print (sprintf("CLUSTAL W(1.81) multiple sequence alignment\n\n\n")) or return;
191
192 $length = $aln->length();
193 $count = $tempcount = 0;
194 @seq = $aln->each_seq();
195 my $max = 22;
196 foreach $seq ( @seq ) {
197 $max = length ($aln->displayname($seq->get_nse()))
198 if( length ($aln->displayname($seq->get_nse())) > $max );
199 }
200 while( $count < $length ) {
201 foreach $seq ( @seq ) {
202 #
203 # Following lines are to suppress warnings
204 # if some sequences in the alignment are much longer than others.
205
206 my ($substring);
207 my $seqchars = $seq->seq();
208 SWITCH: {
209 if (length($seqchars) >= ($count + $line_len)) {
210 $substring = substr($seqchars,$count,$line_len);
211 last SWITCH;
212 } elsif (length($seqchars) >= $count) {
213 $substring = substr($seqchars,$count);
214 last SWITCH;
215 }
216 $substring = "";
217 }
218
219 $self->_print (sprintf("%-".$max."s %s\n",
220 $aln->displayname($seq->get_nse()),
221 $substring)) or return;
222 }
223
224 my $linesubstr = substr($matchline, $count,$line_len);
225 my $percentages = '';
226 if( $self->percentages ) {
227 my ($strcpy) = ($linesubstr);
228 my $count = ($strcpy =~ tr/\*//);
229 $percentages = sprintf("\t%d%%", 100 * ($count / length($linesubstr)));
230 }
231 $self->_print (sprintf("%-".$max."s %s%s\n", '', $linesubstr,
232 $percentages));
233 $self->_print (sprintf("\n\n")) or return;
234 $count += $line_len;
235 }
236 }
237 $self->flush if $self->_flush_on_write && defined $self->_fh;
238 return 1;
239 }
240
241 =head2 percentages
242
243 Title : percentages
244 Usage : $obj->percentages($newval)
245 Function: Set the percentages flag - whether or not to show percentages in
246 each output line
247 Returns : value of percentages
248 Args : newvalue (optional)
249
250
251 =cut
252
253 sub percentages {
254 my ($self,$value) = @_;
255 if( defined $value) {
256 $self->{'_percentages'} = $value;
257 }
258 return $self->{'_percentages'};
259 }
260
261 =head2 line_length
262
263 Title : line_length
264 Usage : $obj->line_length($newval)
265 Function: Set the alignment output line length
266 Returns : value of line_length
267 Args : newvalue (optional)
268
269
270 =cut
271
272 sub line_length {
273 my ($self,$value) = @_;
274 if( defined $value) {
275 $self->{'_line_length'} = $value;
276 }
277 return $self->{'_line_length'};
278 }
279
280 1;