Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/AlignIO.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: AlignIO.pm,v 1.28 2002/10/22 07:38:23 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::AlignIO | |
4 # | |
5 # based on the Bio::SeqIO module | |
6 # by Ewan Birney <birney@sanger.ac.uk> | |
7 # and Lincoln Stein <lstein@cshl.org> | |
8 # | |
9 # Copyright Peter Schattner | |
10 # | |
11 # You may distribute this module under the same terms as perl itself | |
12 # | |
13 # _history | |
14 # October 18, 1999 SeqIO largely rewritten by Lincoln Stein | |
15 # September, 2000 AlignIO written by Peter Schattner | |
16 | |
17 # POD documentation - main docs before the code | |
18 | |
19 =head1 NAME | |
20 | |
21 Bio::AlignIO - Handler for AlignIO Formats | |
22 | |
23 =head1 SYNOPSIS | |
24 | |
25 use Bio::AlignIO; | |
26 | |
27 $inputfilename = "testaln.fasta"; | |
28 $in = Bio::AlignIO->new(-file => $inputfilename , | |
29 '-format' => 'fasta'); | |
30 $out = Bio::AlignIO->new(-file => ">out.aln.pfam" , | |
31 '-format' => 'pfam'); | |
32 # note: we quote -format to keep older perl's from complaining. | |
33 | |
34 while ( my $aln = $in->next_aln() ) { | |
35 $out->write_aln($aln); | |
36 } | |
37 | |
38 or | |
39 | |
40 use Bio::AlignIO; | |
41 | |
42 $inputfilename = "testaln.fasta"; | |
43 $in = Bio::AlignIO->newFh(-file => $inputfilename , | |
44 '-format' => 'fasta'); | |
45 $out = Bio::AlignIO->newFh('-format' => 'pfam'); | |
46 | |
47 # World's shortest Fasta<->pfam format converter: | |
48 print $out $_ while <$in>; | |
49 | |
50 =head1 DESCRIPTION | |
51 | |
52 Bio::AlignIO is a handler module for the formats in the AlignIO set | |
53 (eg, Bio::AlignIO::fasta). It is the officially sanctioned way of | |
54 getting at the alignment objects, which most people should use. The | |
55 resulting alignment is a Bio::Align::AlignI compliant object. See | |
56 L<Bio::Align::AlignI> for more information. | |
57 | |
58 The idea is that you request a stream object for a particular format. | |
59 All the stream objects have a notion of an internal file that is read | |
60 from or written to. A particular AlignIO object instance is configured | |
61 for either input or output. A specific example of a stream object is | |
62 the Bio::AlignIO::fasta object. | |
63 | |
64 Each stream object has functions | |
65 | |
66 $stream->next_aln(); | |
67 | |
68 and | |
69 | |
70 $stream->write_aln($aln); | |
71 | |
72 also | |
73 | |
74 $stream->type() # returns 'INPUT' or 'OUTPUT' | |
75 | |
76 As an added bonus, you can recover a filehandle that is tied to the | |
77 AlignIO object, allowing you to use the standard E<lt>E<gt> and print | |
78 operations to read and write sequence objects: | |
79 | |
80 use Bio::AlignIO; | |
81 | |
82 # read from standard input | |
83 $stream = Bio::AlignIO->newFh(-format => 'Fasta'); | |
84 | |
85 while ( $aln = <$stream> ) { | |
86 # do something with $aln | |
87 } | |
88 | |
89 and | |
90 | |
91 print $stream $aln; # when stream is in output mode | |
92 | |
93 This makes the simplest ever reformatter | |
94 | |
95 #!/usr/local/bin/perl | |
96 | |
97 $format1 = shift; | |
98 $format2 = shift || | |
99 die "Usage: reformat format1 format2 < input > output"; | |
100 | |
101 use Bio::AlignIO; | |
102 | |
103 $in = Bio::AlignIO->newFh(-format => $format1 ); | |
104 $out = Bio::AlignIO->newFh(-format => $format2 ); | |
105 # note: you might want to quote -format to keep | |
106 # older perl's from complaining. | |
107 | |
108 print $out $_ while <$in>; | |
109 | |
110 AlignIO.pm is patterned on the module SeqIO.pm and shares most the | |
111 SeqIO.pm features. One significant difference currently is that | |
112 AlignIO.pm usually handles IO for only a single alignment at a time | |
113 (SeqIO.pm handles IO for multiple sequences in a single stream.) The | |
114 principal reason for this is that whereas simultaneously handling | |
115 multiple sequences is a common requirement, simultaneous handling of | |
116 multiple alignments is not. The only current exception is format | |
117 "bl2seq" which parses results of the Blast bl2seq program and which | |
118 may produce several alignment pairs. This set of alignment pairs can | |
119 be read using multiple calls to next_aln. | |
120 | |
121 Capability for IO for more than one multiple alignment - other than | |
122 for bl2seq format -(which may be of use for certain applications such | |
123 as IO for Pfam libraries) may be included in the future. For this | |
124 reason we keep the name "next_aln()" for the alignment input routine, | |
125 even though in most cases only one alignment is read (or written) at a | |
126 time and the name "read_aln()" might be more appropriate. | |
127 | |
128 =head1 CONSTRUCTORS | |
129 | |
130 =head2 Bio::AlignIO-E<gt>new() | |
131 | |
132 $seqIO = Bio::AlignIO->new(-file => 'filename', -format=>$format); | |
133 $seqIO = Bio::AlignIO->new(-fh => \*FILEHANDLE, -format=>$format); | |
134 $seqIO = Bio::AlignIO->new(-format => $format); | |
135 | |
136 The new() class method constructs a new Bio::AlignIO object. The | |
137 returned object can be used to retrieve or print BioAlign | |
138 objects. new() accepts the following parameters: | |
139 | |
140 =over 4 | |
141 | |
142 =item -file | |
143 | |
144 A file path to be opened for reading or writing. The usual Perl | |
145 conventions apply: | |
146 | |
147 'file' # open file for reading | |
148 '>file' # open file for writing | |
149 '>>file' # open file for appending | |
150 '+<file' # open file read/write | |
151 'command |' # open a pipe from the command | |
152 '| command' # open a pipe to the command | |
153 | |
154 =item -fh | |
155 | |
156 You may provide new() with a previously-opened filehandle. For | |
157 example, to read from STDIN: | |
158 | |
159 $seqIO = Bio::AlignIO->new(-fh => \*STDIN); | |
160 | |
161 Note that you must pass filehandles as references to globs. | |
162 | |
163 If neither a filehandle nor a filename is specified, then the module | |
164 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt> | |
165 semantics. | |
166 | |
167 =item -format | |
168 | |
169 Specify the format of the file. Supported formats include: | |
170 | |
171 fasta FASTA format | |
172 selex selex (hmmer) format | |
173 stockholm stockholm format | |
174 prodom prodom (protein domain) format | |
175 clustalw clustalw (.aln) format | |
176 msf msf (GCG) format | |
177 mase mase (seaview) format | |
178 bl2seq Bl2seq Blast output | |
179 nexus Swofford et al NEXUS format | |
180 pfam Pfam sequence alignment format | |
181 phylip Felsenstein's PHYLIP format | |
182 emboss EMBOSS water and needle format | |
183 mega MEGA format | |
184 meme MEME format | |
185 psi PSI-BLAST format | |
186 | |
187 Currently only those formats which were implemented in L<Bio::SimpleAlign> | |
188 have been incorporated in AlignIO.pm. Specifically, mase, stockholm | |
189 and prodom have only been implemented for input. See the specific module | |
190 (e.g. L<Bio::AlignIO::meme>) for notes on supported versions. | |
191 | |
192 If no format is specified and a filename is given, then the module | |
193 will attempt to deduce it from the filename suffix. If this is unsuccessful, | |
194 Fasta format is assumed. | |
195 | |
196 The format name is case insensitive. 'FASTA', 'Fasta' and 'fasta' are | |
197 all supported. | |
198 | |
199 =back | |
200 | |
201 =head2 Bio::AlignIO-E<gt>newFh() | |
202 | |
203 $fh = Bio::AlignIO->newFh(-fh => \*FILEHANDLE, -format=>$format); | |
204 $fh = Bio::AlignIO->newFh(-format => $format); | |
205 # etc. | |
206 | |
207 This constructor behaves like new(), but returns a tied filehandle | |
208 rather than a Bio::AlignIO object. You can read sequences from this | |
209 object using the familiar E<lt>E<gt> operator, and write to it using print(). | |
210 The usual array and $_ semantics work. For example, you can read all | |
211 sequence objects into an array like this: | |
212 | |
213 @sequences = <$fh>; | |
214 | |
215 Other operations, such as read(), sysread(), write(), close(), and printf() | |
216 are not supported. | |
217 | |
218 =over 1 | |
219 | |
220 =item -flush | |
221 | |
222 By default, all files (or filehandles) opened for writing alignments | |
223 will be flushed after each write_aln() (making the file immediately | |
224 usable). If you don't need this facility and would like to marginally | |
225 improve the efficiency of writing multiple sequences to the same file | |
226 (or filehandle), pass the -flush option '0' or any other value that | |
227 evaluates as defined but false: | |
228 | |
229 my $clustal = new Bio::AlignIO -file => "<prot.aln", | |
230 -format => "clustalw"; | |
231 my $msf = new Bio::AlignIO -file => ">prot.msf", | |
232 -format => "msf", | |
233 -flush => 0; # go as fast as we can! | |
234 while($seq = $clustal->next_aln) { $msf->write_aln($seq) } | |
235 | |
236 =back | |
237 | |
238 =head1 OBJECT METHODS | |
239 | |
240 See below for more detailed summaries. The main methods are: | |
241 | |
242 =head2 $alignment = $AlignIO-E<gt>next_aln() | |
243 | |
244 Fetch an alignment from a formatted file. | |
245 | |
246 =head2 $AlignIO-E<gt>write_aln($aln) | |
247 | |
248 Write the specified alignment to a file.. | |
249 | |
250 =head2 TIEHANDLE(), READLINE(), PRINT() | |
251 | |
252 These provide the tie interface. See L<perltie> for more details. | |
253 | |
254 =head1 FEEDBACK | |
255 | |
256 =head2 Mailing Lists | |
257 | |
258 User feedback is an integral part of the evolution of this and other | |
259 Bioperl modules. Send your comments and suggestions preferably to one | |
260 of the Bioperl mailing lists. Your participation is much appreciated. | |
261 | |
262 bioperl-l@bioperl.org - General discussion | |
263 http://bio.perl.org/MailList.html - About the mailing lists | |
264 | |
265 =head2 Reporting Bugs | |
266 | |
267 Report bugs to the Bioperl bug tracking system to help us keep track | |
268 the bugs and their resolution. | |
269 Bug reports can be submitted via email or the web: | |
270 | |
271 bioperl-bugs@bio.perl.org | |
272 http://bugzilla.bioperl.org/ | |
273 | |
274 =head1 AUTHOR - Peter Schattner | |
275 | |
276 Email: schattner@alum.mit.edu | |
277 | |
278 =head1 CONTRIBUTORS | |
279 | |
280 Jason Stajich, jason@bioperl.org | |
281 | |
282 =head1 APPENDIX | |
283 | |
284 The rest of the documentation details each of the object | |
285 methods. Internal methods are usually preceded with a _ | |
286 | |
287 =cut | |
288 | |
289 # 'Let the code begin... | |
290 | |
291 package Bio::AlignIO; | |
292 | |
293 use strict; | |
294 use vars qw(@ISA); | |
295 | |
296 use Bio::Root::Root; | |
297 use Bio::Seq; | |
298 use Bio::LocatableSeq; | |
299 use Bio::SimpleAlign; | |
300 use Bio::Root::IO; | |
301 @ISA = qw(Bio::Root::Root Bio::Root::IO); | |
302 | |
303 =head2 new | |
304 | |
305 Title : new | |
306 Usage : $stream = Bio::AlignIO->new(-file => $filename, | |
307 '-format' => 'Format') | |
308 Function: Returns a new seqstream | |
309 Returns : A Bio::AlignIO::Handler initialised with | |
310 the appropriate format | |
311 Args : -file => $filename | |
312 -format => format | |
313 -fh => filehandle to attach to | |
314 | |
315 =cut | |
316 | |
317 sub new { | |
318 my ($caller,@args) = @_; | |
319 my $class = ref($caller) || $caller; | |
320 | |
321 # or do we want to call SUPER on an object if $caller is an | |
322 # object? | |
323 if( $class =~ /Bio::AlignIO::(\S+)/ ) { | |
324 my ($self) = $class->SUPER::new(@args); | |
325 $self->_initialize(@args); | |
326 return $self; | |
327 } else { | |
328 | |
329 my %param = @args; | |
330 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
331 my $format = $param{'-format'} || | |
332 $class->_guess_format( $param{-file} || $ARGV[0] ) || | |
333 'fasta'; | |
334 $format = "\L$format"; # normalize capitalization to lower case | |
335 | |
336 # normalize capitalization | |
337 return undef unless( $class->_load_format_module($format) ); | |
338 return "Bio::AlignIO::$format"->new(@args); | |
339 } | |
340 } | |
341 | |
342 | |
343 =head2 newFh | |
344 | |
345 Title : newFh | |
346 Usage : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format') | |
347 Function: does a new() followed by an fh() | |
348 Example : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format') | |
349 $sequence = <$fh>; # read a sequence object | |
350 print $fh $sequence; # write a sequence object | |
351 Returns : filehandle tied to the Bio::AlignIO::Fh class | |
352 Args : | |
353 | |
354 =cut | |
355 | |
356 sub newFh { | |
357 my $class = shift; | |
358 return unless my $self = $class->new(@_); | |
359 return $self->fh; | |
360 } | |
361 | |
362 =head2 fh | |
363 | |
364 Title : fh | |
365 Usage : $obj->fh | |
366 Function: | |
367 Example : $fh = $obj->fh; # make a tied filehandle | |
368 $sequence = <$fh>; # read a sequence object | |
369 print $fh $sequence; # write a sequence object | |
370 Returns : filehandle tied to the Bio::AlignIO::Fh class | |
371 Args : | |
372 | |
373 =cut | |
374 | |
375 | |
376 sub fh { | |
377 my $self = shift; | |
378 my $class = ref($self) || $self; | |
379 my $s = Symbol::gensym; | |
380 tie $$s,$class,$self; | |
381 return $s; | |
382 } | |
383 | |
384 # _initialize is where the heavy stuff will happen when new is called | |
385 | |
386 sub _initialize { | |
387 my($self,@args) = @_; | |
388 | |
389 $self->_initialize_io(@args); | |
390 1; | |
391 } | |
392 | |
393 =head2 _load_format_module | |
394 | |
395 Title : _load_format_module | |
396 Usage : *INTERNAL AlignIO stuff* | |
397 Function: Loads up (like use) a module at run time on demand | |
398 Example : | |
399 Returns : | |
400 Args : | |
401 | |
402 =cut | |
403 | |
404 sub _load_format_module { | |
405 my ($self,$format) = @_; | |
406 my $module = "Bio::AlignIO::" . $format; | |
407 my $ok; | |
408 | |
409 eval { | |
410 $ok = $self->_load_module($module); | |
411 }; | |
412 if ( $@ ) { | |
413 print STDERR <<END; | |
414 $self: $format cannot be found | |
415 Exception $@ | |
416 For more information about the AlignIO system please see the AlignIO docs. | |
417 This includes ways of checking for formats at compile time, not run time | |
418 END | |
419 ; | |
420 return; | |
421 } | |
422 return 1; | |
423 } | |
424 | |
425 =head2 next_aln | |
426 | |
427 Title : next_aln | |
428 Usage : $aln = stream->next_aln | |
429 Function: reads the next $aln object from the stream | |
430 Returns : a Bio::Align::AlignI compliant object | |
431 Args : | |
432 | |
433 =cut | |
434 | |
435 sub next_aln { | |
436 my ($self,$aln) = @_; | |
437 $self->throw("Sorry, you cannot read from a generic Bio::AlignIO object."); | |
438 } | |
439 | |
440 =head2 write_aln | |
441 | |
442 Title : write_aln | |
443 Usage : $stream->write_aln($aln) | |
444 Function: writes the $aln object into the stream | |
445 Returns : 1 for success and 0 for error | |
446 Args : Bio::Seq object | |
447 | |
448 =cut | |
449 | |
450 sub write_aln { | |
451 my ($self,$aln) = @_; | |
452 $self->throw("Sorry, you cannot write to a generic Bio::AlignIO object."); | |
453 } | |
454 | |
455 =head2 _guess_format | |
456 | |
457 Title : _guess_format | |
458 Usage : $obj->_guess_format($filename) | |
459 Function: | |
460 Example : | |
461 Returns : guessed format of filename (lower case) | |
462 Args : | |
463 | |
464 =cut | |
465 | |
466 sub _guess_format { | |
467 my $class = shift; | |
468 return unless $_ = shift; | |
469 return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i; | |
470 return 'msf' if /\.(msf|pileup|gcg)$/i; | |
471 return 'pfam' if /\.(pfam|pfm)$/i; | |
472 return 'selex' if /\.(selex|slx|selx|slex|sx)$/i; | |
473 return 'phylip' if /\.(phylip|phlp|phyl|phy|phy|ph)$/i; | |
474 return 'nexus' if /\.(nexus|nex)$/i; | |
475 return 'mega' if( /\.(meg|mega)$/i ); | |
476 return 'clustalw' if( /\.aln$/i ); | |
477 return 'meme' if( /\.meme$/i ); | |
478 return 'emboss' if( /\.(water|needle)$/i ); | |
479 return 'psi' if( /\.psi$/i ); | |
480 } | |
481 | |
482 sub DESTROY { | |
483 my $self = shift; | |
484 $self->close(); | |
485 } | |
486 | |
487 sub TIEHANDLE { | |
488 my $class = shift; | |
489 return bless {'alignio' => shift},$class; | |
490 } | |
491 | |
492 sub READLINE { | |
493 my $self = shift; | |
494 return $self->{'alignio'}->next_aln() unless wantarray; | |
495 my (@list,$obj); | |
496 push @list,$obj while $obj = $self->{'alignio'}->next_aln(); | |
497 return @list; | |
498 } | |
499 | |
500 sub PRINT { | |
501 my $self = shift; | |
502 $self->{'alignio'}->write_aln(@_); | |
503 } | |
504 | |
505 1; |