comparison variant_effect_predictor/Bio/AlignIO.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: AlignIO.pm,v 1.28 2002/10/22 07:38:23 lapp Exp $
2 #
3 # BioPerl module for Bio::AlignIO
4 #
5 # based on the Bio::SeqIO module
6 # by Ewan Birney <birney@sanger.ac.uk>
7 # and Lincoln Stein <lstein@cshl.org>
8 #
9 # Copyright Peter Schattner
10 #
11 # You may distribute this module under the same terms as perl itself
12 #
13 # _history
14 # October 18, 1999 SeqIO largely rewritten by Lincoln Stein
15 # September, 2000 AlignIO written by Peter Schattner
16
17 # POD documentation - main docs before the code
18
19 =head1 NAME
20
21 Bio::AlignIO - Handler for AlignIO Formats
22
23 =head1 SYNOPSIS
24
25 use Bio::AlignIO;
26
27 $inputfilename = "testaln.fasta";
28 $in = Bio::AlignIO->new(-file => $inputfilename ,
29 '-format' => 'fasta');
30 $out = Bio::AlignIO->new(-file => ">out.aln.pfam" ,
31 '-format' => 'pfam');
32 # note: we quote -format to keep older perl's from complaining.
33
34 while ( my $aln = $in->next_aln() ) {
35 $out->write_aln($aln);
36 }
37
38 or
39
40 use Bio::AlignIO;
41
42 $inputfilename = "testaln.fasta";
43 $in = Bio::AlignIO->newFh(-file => $inputfilename ,
44 '-format' => 'fasta');
45 $out = Bio::AlignIO->newFh('-format' => 'pfam');
46
47 # World's shortest Fasta<->pfam format converter:
48 print $out $_ while <$in>;
49
50 =head1 DESCRIPTION
51
52 Bio::AlignIO is a handler module for the formats in the AlignIO set
53 (eg, Bio::AlignIO::fasta). It is the officially sanctioned way of
54 getting at the alignment objects, which most people should use. The
55 resulting alignment is a Bio::Align::AlignI compliant object. See
56 L<Bio::Align::AlignI> for more information.
57
58 The idea is that you request a stream object for a particular format.
59 All the stream objects have a notion of an internal file that is read
60 from or written to. A particular AlignIO object instance is configured
61 for either input or output. A specific example of a stream object is
62 the Bio::AlignIO::fasta object.
63
64 Each stream object has functions
65
66 $stream->next_aln();
67
68 and
69
70 $stream->write_aln($aln);
71
72 also
73
74 $stream->type() # returns 'INPUT' or 'OUTPUT'
75
76 As an added bonus, you can recover a filehandle that is tied to the
77 AlignIO object, allowing you to use the standard E<lt>E<gt> and print
78 operations to read and write sequence objects:
79
80 use Bio::AlignIO;
81
82 # read from standard input
83 $stream = Bio::AlignIO->newFh(-format => 'Fasta');
84
85 while ( $aln = <$stream> ) {
86 # do something with $aln
87 }
88
89 and
90
91 print $stream $aln; # when stream is in output mode
92
93 This makes the simplest ever reformatter
94
95 #!/usr/local/bin/perl
96
97 $format1 = shift;
98 $format2 = shift ||
99 die "Usage: reformat format1 format2 < input > output";
100
101 use Bio::AlignIO;
102
103 $in = Bio::AlignIO->newFh(-format => $format1 );
104 $out = Bio::AlignIO->newFh(-format => $format2 );
105 # note: you might want to quote -format to keep
106 # older perl's from complaining.
107
108 print $out $_ while <$in>;
109
110 AlignIO.pm is patterned on the module SeqIO.pm and shares most the
111 SeqIO.pm features. One significant difference currently is that
112 AlignIO.pm usually handles IO for only a single alignment at a time
113 (SeqIO.pm handles IO for multiple sequences in a single stream.) The
114 principal reason for this is that whereas simultaneously handling
115 multiple sequences is a common requirement, simultaneous handling of
116 multiple alignments is not. The only current exception is format
117 "bl2seq" which parses results of the Blast bl2seq program and which
118 may produce several alignment pairs. This set of alignment pairs can
119 be read using multiple calls to next_aln.
120
121 Capability for IO for more than one multiple alignment - other than
122 for bl2seq format -(which may be of use for certain applications such
123 as IO for Pfam libraries) may be included in the future. For this
124 reason we keep the name "next_aln()" for the alignment input routine,
125 even though in most cases only one alignment is read (or written) at a
126 time and the name "read_aln()" might be more appropriate.
127
128 =head1 CONSTRUCTORS
129
130 =head2 Bio::AlignIO-E<gt>new()
131
132 $seqIO = Bio::AlignIO->new(-file => 'filename', -format=>$format);
133 $seqIO = Bio::AlignIO->new(-fh => \*FILEHANDLE, -format=>$format);
134 $seqIO = Bio::AlignIO->new(-format => $format);
135
136 The new() class method constructs a new Bio::AlignIO object. The
137 returned object can be used to retrieve or print BioAlign
138 objects. new() accepts the following parameters:
139
140 =over 4
141
142 =item -file
143
144 A file path to be opened for reading or writing. The usual Perl
145 conventions apply:
146
147 'file' # open file for reading
148 '>file' # open file for writing
149 '>>file' # open file for appending
150 '+<file' # open file read/write
151 'command |' # open a pipe from the command
152 '| command' # open a pipe to the command
153
154 =item -fh
155
156 You may provide new() with a previously-opened filehandle. For
157 example, to read from STDIN:
158
159 $seqIO = Bio::AlignIO->new(-fh => \*STDIN);
160
161 Note that you must pass filehandles as references to globs.
162
163 If neither a filehandle nor a filename is specified, then the module
164 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
165 semantics.
166
167 =item -format
168
169 Specify the format of the file. Supported formats include:
170
171 fasta FASTA format
172 selex selex (hmmer) format
173 stockholm stockholm format
174 prodom prodom (protein domain) format
175 clustalw clustalw (.aln) format
176 msf msf (GCG) format
177 mase mase (seaview) format
178 bl2seq Bl2seq Blast output
179 nexus Swofford et al NEXUS format
180 pfam Pfam sequence alignment format
181 phylip Felsenstein's PHYLIP format
182 emboss EMBOSS water and needle format
183 mega MEGA format
184 meme MEME format
185 psi PSI-BLAST format
186
187 Currently only those formats which were implemented in L<Bio::SimpleAlign>
188 have been incorporated in AlignIO.pm. Specifically, mase, stockholm
189 and prodom have only been implemented for input. See the specific module
190 (e.g. L<Bio::AlignIO::meme>) for notes on supported versions.
191
192 If no format is specified and a filename is given, then the module
193 will attempt to deduce it from the filename suffix. If this is unsuccessful,
194 Fasta format is assumed.
195
196 The format name is case insensitive. 'FASTA', 'Fasta' and 'fasta' are
197 all supported.
198
199 =back
200
201 =head2 Bio::AlignIO-E<gt>newFh()
202
203 $fh = Bio::AlignIO->newFh(-fh => \*FILEHANDLE, -format=>$format);
204 $fh = Bio::AlignIO->newFh(-format => $format);
205 # etc.
206
207 This constructor behaves like new(), but returns a tied filehandle
208 rather than a Bio::AlignIO object. You can read sequences from this
209 object using the familiar E<lt>E<gt> operator, and write to it using print().
210 The usual array and $_ semantics work. For example, you can read all
211 sequence objects into an array like this:
212
213 @sequences = <$fh>;
214
215 Other operations, such as read(), sysread(), write(), close(), and printf()
216 are not supported.
217
218 =over 1
219
220 =item -flush
221
222 By default, all files (or filehandles) opened for writing alignments
223 will be flushed after each write_aln() (making the file immediately
224 usable). If you don't need this facility and would like to marginally
225 improve the efficiency of writing multiple sequences to the same file
226 (or filehandle), pass the -flush option '0' or any other value that
227 evaluates as defined but false:
228
229 my $clustal = new Bio::AlignIO -file => "<prot.aln",
230 -format => "clustalw";
231 my $msf = new Bio::AlignIO -file => ">prot.msf",
232 -format => "msf",
233 -flush => 0; # go as fast as we can!
234 while($seq = $clustal->next_aln) { $msf->write_aln($seq) }
235
236 =back
237
238 =head1 OBJECT METHODS
239
240 See below for more detailed summaries. The main methods are:
241
242 =head2 $alignment = $AlignIO-E<gt>next_aln()
243
244 Fetch an alignment from a formatted file.
245
246 =head2 $AlignIO-E<gt>write_aln($aln)
247
248 Write the specified alignment to a file..
249
250 =head2 TIEHANDLE(), READLINE(), PRINT()
251
252 These provide the tie interface. See L<perltie> for more details.
253
254 =head1 FEEDBACK
255
256 =head2 Mailing Lists
257
258 User feedback is an integral part of the evolution of this and other
259 Bioperl modules. Send your comments and suggestions preferably to one
260 of the Bioperl mailing lists. Your participation is much appreciated.
261
262 bioperl-l@bioperl.org - General discussion
263 http://bio.perl.org/MailList.html - About the mailing lists
264
265 =head2 Reporting Bugs
266
267 Report bugs to the Bioperl bug tracking system to help us keep track
268 the bugs and their resolution.
269 Bug reports can be submitted via email or the web:
270
271 bioperl-bugs@bio.perl.org
272 http://bugzilla.bioperl.org/
273
274 =head1 AUTHOR - Peter Schattner
275
276 Email: schattner@alum.mit.edu
277
278 =head1 CONTRIBUTORS
279
280 Jason Stajich, jason@bioperl.org
281
282 =head1 APPENDIX
283
284 The rest of the documentation details each of the object
285 methods. Internal methods are usually preceded with a _
286
287 =cut
288
289 # 'Let the code begin...
290
291 package Bio::AlignIO;
292
293 use strict;
294 use vars qw(@ISA);
295
296 use Bio::Root::Root;
297 use Bio::Seq;
298 use Bio::LocatableSeq;
299 use Bio::SimpleAlign;
300 use Bio::Root::IO;
301 @ISA = qw(Bio::Root::Root Bio::Root::IO);
302
303 =head2 new
304
305 Title : new
306 Usage : $stream = Bio::AlignIO->new(-file => $filename,
307 '-format' => 'Format')
308 Function: Returns a new seqstream
309 Returns : A Bio::AlignIO::Handler initialised with
310 the appropriate format
311 Args : -file => $filename
312 -format => format
313 -fh => filehandle to attach to
314
315 =cut
316
317 sub new {
318 my ($caller,@args) = @_;
319 my $class = ref($caller) || $caller;
320
321 # or do we want to call SUPER on an object if $caller is an
322 # object?
323 if( $class =~ /Bio::AlignIO::(\S+)/ ) {
324 my ($self) = $class->SUPER::new(@args);
325 $self->_initialize(@args);
326 return $self;
327 } else {
328
329 my %param = @args;
330 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
331 my $format = $param{'-format'} ||
332 $class->_guess_format( $param{-file} || $ARGV[0] ) ||
333 'fasta';
334 $format = "\L$format"; # normalize capitalization to lower case
335
336 # normalize capitalization
337 return undef unless( $class->_load_format_module($format) );
338 return "Bio::AlignIO::$format"->new(@args);
339 }
340 }
341
342
343 =head2 newFh
344
345 Title : newFh
346 Usage : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format')
347 Function: does a new() followed by an fh()
348 Example : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format')
349 $sequence = <$fh>; # read a sequence object
350 print $fh $sequence; # write a sequence object
351 Returns : filehandle tied to the Bio::AlignIO::Fh class
352 Args :
353
354 =cut
355
356 sub newFh {
357 my $class = shift;
358 return unless my $self = $class->new(@_);
359 return $self->fh;
360 }
361
362 =head2 fh
363
364 Title : fh
365 Usage : $obj->fh
366 Function:
367 Example : $fh = $obj->fh; # make a tied filehandle
368 $sequence = <$fh>; # read a sequence object
369 print $fh $sequence; # write a sequence object
370 Returns : filehandle tied to the Bio::AlignIO::Fh class
371 Args :
372
373 =cut
374
375
376 sub fh {
377 my $self = shift;
378 my $class = ref($self) || $self;
379 my $s = Symbol::gensym;
380 tie $$s,$class,$self;
381 return $s;
382 }
383
384 # _initialize is where the heavy stuff will happen when new is called
385
386 sub _initialize {
387 my($self,@args) = @_;
388
389 $self->_initialize_io(@args);
390 1;
391 }
392
393 =head2 _load_format_module
394
395 Title : _load_format_module
396 Usage : *INTERNAL AlignIO stuff*
397 Function: Loads up (like use) a module at run time on demand
398 Example :
399 Returns :
400 Args :
401
402 =cut
403
404 sub _load_format_module {
405 my ($self,$format) = @_;
406 my $module = "Bio::AlignIO::" . $format;
407 my $ok;
408
409 eval {
410 $ok = $self->_load_module($module);
411 };
412 if ( $@ ) {
413 print STDERR <<END;
414 $self: $format cannot be found
415 Exception $@
416 For more information about the AlignIO system please see the AlignIO docs.
417 This includes ways of checking for formats at compile time, not run time
418 END
419 ;
420 return;
421 }
422 return 1;
423 }
424
425 =head2 next_aln
426
427 Title : next_aln
428 Usage : $aln = stream->next_aln
429 Function: reads the next $aln object from the stream
430 Returns : a Bio::Align::AlignI compliant object
431 Args :
432
433 =cut
434
435 sub next_aln {
436 my ($self,$aln) = @_;
437 $self->throw("Sorry, you cannot read from a generic Bio::AlignIO object.");
438 }
439
440 =head2 write_aln
441
442 Title : write_aln
443 Usage : $stream->write_aln($aln)
444 Function: writes the $aln object into the stream
445 Returns : 1 for success and 0 for error
446 Args : Bio::Seq object
447
448 =cut
449
450 sub write_aln {
451 my ($self,$aln) = @_;
452 $self->throw("Sorry, you cannot write to a generic Bio::AlignIO object.");
453 }
454
455 =head2 _guess_format
456
457 Title : _guess_format
458 Usage : $obj->_guess_format($filename)
459 Function:
460 Example :
461 Returns : guessed format of filename (lower case)
462 Args :
463
464 =cut
465
466 sub _guess_format {
467 my $class = shift;
468 return unless $_ = shift;
469 return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
470 return 'msf' if /\.(msf|pileup|gcg)$/i;
471 return 'pfam' if /\.(pfam|pfm)$/i;
472 return 'selex' if /\.(selex|slx|selx|slex|sx)$/i;
473 return 'phylip' if /\.(phylip|phlp|phyl|phy|phy|ph)$/i;
474 return 'nexus' if /\.(nexus|nex)$/i;
475 return 'mega' if( /\.(meg|mega)$/i );
476 return 'clustalw' if( /\.aln$/i );
477 return 'meme' if( /\.meme$/i );
478 return 'emboss' if( /\.(water|needle)$/i );
479 return 'psi' if( /\.psi$/i );
480 }
481
482 sub DESTROY {
483 my $self = shift;
484 $self->close();
485 }
486
487 sub TIEHANDLE {
488 my $class = shift;
489 return bless {'alignio' => shift},$class;
490 }
491
492 sub READLINE {
493 my $self = shift;
494 return $self->{'alignio'}->next_aln() unless wantarray;
495 my (@list,$obj);
496 push @list,$obj while $obj = $self->{'alignio'}->next_aln();
497 return @list;
498 }
499
500 sub PRINT {
501 my $self = shift;
502 $self->{'alignio'}->write_aln(@_);
503 }
504
505 1;