Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/AlignIO/clustalw.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: clustalw.pm,v 1.21 2002/10/22 07:38:25 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::AlignIO::clustalw | |
4 | |
5 # based on the Bio::SeqIO modules | |
6 # by Ewan Birney <birney@sanger.ac.uk> | |
7 # and Lincoln Stein <lstein@cshl.org> | |
8 # | |
9 # and the SimpleAlign.pm module of Ewan Birney | |
10 # | |
11 # Copyright Peter Schattner | |
12 # | |
13 # You may distribute this module under the same terms as perl itself | |
14 # _history | |
15 # September 5, 2000 | |
16 # POD documentation - main docs before the code | |
17 | |
18 =head1 NAME | |
19 | |
20 Bio::AlignIO::clustalw - clustalw sequence input/output stream | |
21 | |
22 =head1 SYNOPSIS | |
23 | |
24 Do not use this module directly. Use it via the Bio::AlignIO class. | |
25 | |
26 =head1 DESCRIPTION | |
27 | |
28 This object can transform Bio::Align::AlignI objects to and from clustalw flat | |
29 file databases. | |
30 | |
31 =head1 FEEDBACK | |
32 | |
33 =head2 Mailing Lists | |
34 | |
35 User feedback is an integral part of the evolution of this and other | |
36 Bioperl modules. Send your comments and suggestions preferably to one | |
37 of the Bioperl mailing lists. Your participation is much appreciated. | |
38 | |
39 bioperl-l@bioperl.org - General discussion | |
40 http://bio.perl.org/MailList.html - About the mailing lists | |
41 | |
42 | |
43 =head2 Reporting Bugs | |
44 | |
45 Report bugs to the Bioperl bug tracking system to help us keep track | |
46 the bugs and their resolution. Bug reports can be submitted via email | |
47 or the web: | |
48 | |
49 bioperl-bugs@bio.perl.org | |
50 http://bugzilla.bioperl.org/ | |
51 | |
52 =head1 AUTHORS - Peter Schattner | |
53 | |
54 Email: schattner@alum.mit.edu | |
55 | |
56 | |
57 =head1 APPENDIX | |
58 | |
59 The rest of the documentation details each of the object | |
60 methods. Internal methods are usually preceded with a _ | |
61 | |
62 =cut | |
63 | |
64 # Let the code begin... | |
65 | |
66 package Bio::AlignIO::clustalw; | |
67 use vars qw(@ISA $LINELENGTH); | |
68 use strict; | |
69 | |
70 use Bio::AlignIO; | |
71 use Bio::LocatableSeq; | |
72 use Bio::SimpleAlign; # to be Bio::Align::Simple | |
73 | |
74 $LINELENGTH = 60; | |
75 | |
76 @ISA = qw(Bio::AlignIO); | |
77 | |
78 =head2 new | |
79 | |
80 Title : new | |
81 Usage : $alignio = new Bio::AlignIO(-format => 'clustalw', | |
82 -file => 'filename'); | |
83 Function: returns a new Bio::AlignIO object to handle clustalw files | |
84 Returns : Bio::AlignIO::clustalw object | |
85 Args : -verbose => verbosity setting (-1,0,1,2) | |
86 -file => name of file to read in or with ">" - writeout | |
87 -fh => alternative to -file param - provide a filehandle | |
88 to read from/write to | |
89 -format => type of Alignment Format to process or produce | |
90 -percentages => (clustalw only) display a percentage of identity | |
91 in each line of the alignment. | |
92 | |
93 -linelength=> Set the alignment output line length (default 60) | |
94 | |
95 =cut | |
96 | |
97 sub _initialize { | |
98 my ($self, @args) = @_; | |
99 $self->SUPER::_initialize(@args); | |
100 my ($percentages, | |
101 $ll) = $self->_rearrange([qw(PERCENTAGES LINELENGTH)], @args); | |
102 defined $percentages && $self->percentages($percentages); | |
103 $self->line_length($ll || $LINELENGTH); | |
104 } | |
105 | |
106 =head2 next_aln | |
107 | |
108 Title : next_aln | |
109 Usage : $aln = $stream->next_aln() | |
110 Function: returns the next alignment in the stream | |
111 Returns : Bio::Align::AlignI object | |
112 Args : NONE | |
113 | |
114 See L<Bio::Align::AlignI> for details | |
115 | |
116 =cut | |
117 | |
118 sub next_aln { | |
119 my ($self) = @_; | |
120 | |
121 my $first_line; | |
122 if( defined ($first_line = $self->_readline ) | |
123 && $first_line !~ /CLUSTAL/ ) { | |
124 $self->warn("trying to parse a file which does not start with a CLUSTAL header"); | |
125 } | |
126 my %alignments; | |
127 my $aln = Bio::SimpleAlign->new(-source => 'clustalw'); | |
128 my $order = 0; | |
129 my %order; | |
130 $self->{_lastline} = ''; | |
131 while( defined ($_ = $self->_readline) ) { | |
132 next if ( /^\s+$/ ); | |
133 | |
134 my ($seqname, $aln_line) = ('', ''); | |
135 if( /^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ ) { | |
136 # clustal 1.4 format | |
137 ($seqname,$aln_line) = ("$1:$2-$3",$4); | |
138 } elsif( /^(\S+)\s+([A-Z\-]+)\s*$/ ) { | |
139 ($seqname,$aln_line) = ($1,$2); | |
140 } else { $self->{_lastline} = $_; next } | |
141 | |
142 if( !exists $order{$seqname} ) { | |
143 $order{$seqname} = $order++; | |
144 } | |
145 | |
146 $alignments{$seqname} .= $aln_line; | |
147 } | |
148 my ($sname,$start,$end); | |
149 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) { | |
150 if( $name =~ /(\S+):(\d+)-(\d+)/ ) { | |
151 ($sname,$start,$end) = ($1,$2,$3); | |
152 } else { | |
153 ($sname, $start) = ($name,1); | |
154 my $str = $alignments{$name}; | |
155 $str =~ s/[^A-Za-z]//g; | |
156 $end = length($str); | |
157 } | |
158 my $seq = new Bio::LocatableSeq('-seq' => $alignments{$name}, | |
159 '-id' => $sname, | |
160 '-start' => $start, | |
161 '-end' => $end); | |
162 $aln->add_seq($seq); | |
163 } | |
164 undef $aln if( !defined $end || $end <= 0); | |
165 return $aln; | |
166 } | |
167 | |
168 =head2 write_aln | |
169 | |
170 Title : write_aln | |
171 Usage : $stream->write_aln(@aln) | |
172 Function: writes the clustalw-format object (.aln) into the stream | |
173 Returns : 1 for success and 0 for error | |
174 Args : L<Bio::Align::AlignI> object | |
175 | |
176 | |
177 =cut | |
178 | |
179 sub write_aln { | |
180 my ($self,@aln) = @_; | |
181 my ($count,$length,$seq,@seq,$tempcount,$line_len); | |
182 $line_len = $self->line_length || $LINELENGTH; | |
183 foreach my $aln (@aln) { | |
184 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) { | |
185 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln"); | |
186 next; | |
187 } | |
188 my $matchline = $aln->match_line; | |
189 | |
190 $self->_print (sprintf("CLUSTAL W(1.81) multiple sequence alignment\n\n\n")) or return; | |
191 | |
192 $length = $aln->length(); | |
193 $count = $tempcount = 0; | |
194 @seq = $aln->each_seq(); | |
195 my $max = 22; | |
196 foreach $seq ( @seq ) { | |
197 $max = length ($aln->displayname($seq->get_nse())) | |
198 if( length ($aln->displayname($seq->get_nse())) > $max ); | |
199 } | |
200 while( $count < $length ) { | |
201 foreach $seq ( @seq ) { | |
202 # | |
203 # Following lines are to suppress warnings | |
204 # if some sequences in the alignment are much longer than others. | |
205 | |
206 my ($substring); | |
207 my $seqchars = $seq->seq(); | |
208 SWITCH: { | |
209 if (length($seqchars) >= ($count + $line_len)) { | |
210 $substring = substr($seqchars,$count,$line_len); | |
211 last SWITCH; | |
212 } elsif (length($seqchars) >= $count) { | |
213 $substring = substr($seqchars,$count); | |
214 last SWITCH; | |
215 } | |
216 $substring = ""; | |
217 } | |
218 | |
219 $self->_print (sprintf("%-".$max."s %s\n", | |
220 $aln->displayname($seq->get_nse()), | |
221 $substring)) or return; | |
222 } | |
223 | |
224 my $linesubstr = substr($matchline, $count,$line_len); | |
225 my $percentages = ''; | |
226 if( $self->percentages ) { | |
227 my ($strcpy) = ($linesubstr); | |
228 my $count = ($strcpy =~ tr/\*//); | |
229 $percentages = sprintf("\t%d%%", 100 * ($count / length($linesubstr))); | |
230 } | |
231 $self->_print (sprintf("%-".$max."s %s%s\n", '', $linesubstr, | |
232 $percentages)); | |
233 $self->_print (sprintf("\n\n")) or return; | |
234 $count += $line_len; | |
235 } | |
236 } | |
237 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
238 return 1; | |
239 } | |
240 | |
241 =head2 percentages | |
242 | |
243 Title : percentages | |
244 Usage : $obj->percentages($newval) | |
245 Function: Set the percentages flag - whether or not to show percentages in | |
246 each output line | |
247 Returns : value of percentages | |
248 Args : newvalue (optional) | |
249 | |
250 | |
251 =cut | |
252 | |
253 sub percentages { | |
254 my ($self,$value) = @_; | |
255 if( defined $value) { | |
256 $self->{'_percentages'} = $value; | |
257 } | |
258 return $self->{'_percentages'}; | |
259 } | |
260 | |
261 =head2 line_length | |
262 | |
263 Title : line_length | |
264 Usage : $obj->line_length($newval) | |
265 Function: Set the alignment output line length | |
266 Returns : value of line_length | |
267 Args : newvalue (optional) | |
268 | |
269 | |
270 =cut | |
271 | |
272 sub line_length { | |
273 my ($self,$value) = @_; | |
274 if( defined $value) { | |
275 $self->{'_line_length'} = $value; | |
276 } | |
277 return $self->{'_line_length'}; | |
278 } | |
279 | |
280 1; |