comparison variant_effect_predictor/Bio/Structure/IO.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: IO.pm,v 1.3 2002/10/22 07:45:21 lapp Exp $
2 #
3 # BioPerl module for Bio::Structure::IO
4 #
5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
6 # and Lincoln Stein <lstein@cshl.org>
7 # and Kris Boulez <kris.boulez@algonomics.com>
8 #
9 # Copyright 2001, 2002 Kris Boulez
10 #
11 # You may distribute this module under the same terms as perl itself
12 #
13 # _history
14 # October 18, 1999 Largely rewritten by Lincoln Stein
15 # November 16, 2001 Copied Bio::SeqIO to Bio::Structure::IO and modified
16 # where needed. Factoring out common methods
17 # (to Bio::Root::IO) might be a good idea.
18
19 # POD documentation - main docs before the code
20
21 =head1 NAME
22
23 Bio::Structure::IO - Handler for Structure Formats
24
25 =head1 SYNOPSIS
26
27 use Bio::Structure::IO;
28
29 $in = Bio::Structure::IO->new(-file => "inputfilename" , '-format' => 'pdb');
30 $out = Bio::Structure::IO->new(-file => ">outputfilename" , '-format' => 'pdb');
31 # note: we quote -format to keep older perl's from complaining.
32
33 while ( my $struc = $in->next_structure() ) {
34 $out->write_structure($struc);
35 }
36
37 now, to actually get at the structure object, use the standard Bio::Structure
38 methods (look at L<Bio::Structure> if you don't know what they are)
39
40 use Bio::Structure::IO;
41
42 $in = Bio::Structure::IO->new(-file => "inputfilename" , '-format' => 'pdb');
43
44 while ( my $struc = $in->next_structure() ) {
45 print "Structure ",$struc->id," number of models: ",scalar $struc->model,"\n";
46 }
47
48
49
50 =head1 DESCRIPTION
51
52 [ The following description is a copy-paste from the Bio::SeqIO description.
53 This is not surprising as the code is also mostly a copy. ]
54
55 Bio::Structure::IO is a handler module for the formats in the Structure::IO set
56 (eg, Bio::Structure::IO::pdb). It is the officially sanctioned way of getting at
57 the format objects, which most people should use.
58
59 The Bio::Structure::IO system can be thought of like biological file handles.
60 They are attached to filehandles with smart formatting rules (eg, PDB format)
61 and can either read or write structure objects (Bio::Structure objects, or
62 more correctly, Bio::Structure::StructureI implementing objects, of which
63 Bio::Structure is one such object). If you want to know what to do with a
64 Bio::Structure object, read L<Bio::Structure>
65
66 The idea is that you request a stream object for a particular format.
67 All the stream objects have a notion of an internal file that is read
68 from or written to. A particular Structure::IO object instance is configured
69 for either input or output. A specific example of a stream object is
70 the Bio::Structure::IO::pdb object.
71
72 Each stream object has functions
73
74 $stream->next_structure();
75
76 and
77
78 $stream->write_structure($struc);
79
80 also
81
82 $stream->type() # returns 'INPUT' or 'OUTPUT'
83
84 As an added bonus, you can recover a filehandle that is tied to the
85 Structure::IOIO object, allowing you to use the standard E<lt>E<gt> and print operations
86 to read and write structure::IOuence objects:
87
88 use Bio::Structure::IO;
89
90 $stream = Bio::Structure::IO->newFh(-format => 'pdb'); # read from standard input
91
92 while ( $structure = <$stream> ) {
93 # do something with $structure
94 }
95
96 and
97
98 print $stream $structure; # when stream is in output mode
99
100
101 =head1 CONSTRUCTORS
102
103 =head2 Bio::Structure::IO-E<gt>new()
104
105 $stream = Bio::Structure::IO->new(-file => 'filename', -format=>$format);
106 $stream = Bio::Structure::IO->new(-fh => \*FILEHANDLE, -format=>$format);
107 $stream = Bio::Structure::IO->new(-format => $format);
108
109 The new() class method constructs a new Bio::Structure::IO object. The
110 returned object can be used to retrieve or print Bio::Structure objects.
111 new() accepts the following parameters:
112
113 =over 4
114
115 =item -file
116
117 A file path to be opened for reading or writing. The usual Perl
118 conventions apply:
119
120 'file' # open file for reading
121 '>file' # open file for writing
122 '>>file' # open file for appending
123 '+<file' # open file read/write
124 'command |' # open a pipe from the command
125 '| command' # open a pipe to the command
126
127 =item -fh
128
129 You may provide new() with a previously-opened filehandle. For
130 example, to read from STDIN:
131
132 $strucIO = Bio::Structure::IO->new(-fh => \*STDIN);
133
134 Note that you must pass filehandles as references to globs.
135
136 If neither a filehandle nor a filename is specified, then the module
137 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
138 semantics.
139
140 A string filehandle is handy if you want to modify the output in the
141 memory, before printing it out. The following program reads in EMBL
142 formatted entries from a file and prints them out in fasta format with
143 some HTML tags:
144 [ not relevant for Bio::Structure::IO as only one format is supported
145 at the moment ]
146
147 use Bio::SeqIO;
148 use IO::String;
149 my $in = Bio::SeqIO->new('-file' => "emblfile" ,
150 '-format' => 'EMBL');
151 while ( my $seq = $in->next_seq() ) {
152 # the output handle is reset for every file
153 my $stringio = IO::String->new($string);
154 my $out = Bio::SeqIO->new('-fh' => $stringio,
155 '-format' => 'fasta');
156 # output goes into $string
157 $out->write_seq($seq);
158 # modify $string
159 $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g;
160 # print into STDOUT
161 print $string;
162 }
163
164 =item -format
165
166 Specify the format of the file. Supported formats include:
167
168 PDB Protein Data Bank format
169
170 If no format is specified and a filename is given, then the module
171 will attempt to deduce it from the filename. If this is unsuccessful,
172 PDB format is assumed.
173
174 The format name is case insensitive. 'PDB', 'Pdb' and 'pdb' are
175 all supported.
176
177 =back
178
179 =head2 Bio::Structure::IO-E<gt>newFh()
180
181 $fh = Bio::Structure::IO->newFh(-fh => \*FILEHANDLE, -format=>$format);
182 $fh = Bio::Structure::IO->newFh(-format => $format);
183 # etc.
184
185 This constructor behaves like new(), but returns a tied filehandle
186 rather than a Bio::Structure::IO object. You can read structures from this
187 object using the familiar E<lt>E<gt> operator, and write to it using
188 print(). The usual array and $_ semantics work. For example, you can
189 read all structure objects into an array like this:
190
191 @structures = <$fh>;
192
193 Other operations, such as read(), sysread(), write(), close(), and printf()
194 are not supported.
195
196 =head1 OBJECT METHODS
197
198 See below for more detailed summaries. The main methods are:
199
200 =head2 $structure = $structIO-E<gt>next_structure()
201
202 Fetch the next structure from the stream.
203
204 =head2 $structIO-E<gt>write_structure($struc [,$another_struc,...])
205
206 Write the specified structure(s) to the stream.
207
208 =head2 TIEHANDLE(), READLINE(), PRINT()
209
210 These provide the tie interface. See L<perltie> for more details.
211
212 =head1 FEEDBACK
213
214 =head2 Mailing Lists
215
216 User feedback is an integral part of the evolution of this
217 and other Bioperl modules. Send your comments and suggestions preferably
218 to one of the Bioperl mailing lists.
219 Your participation is much appreciated.
220
221 bioperl-l@bioperl.org - General discussion
222 http://bioperl.org/MailList.shtml - About the mailing lists
223
224 =head2 Reporting Bugs
225
226 Report bugs to the Bioperl bug tracking system to help us keep track
227 the bugs and their resolution.
228 Bug reports can be submitted via email or the web:
229
230 bioperl-bugs@bioperl.org
231 http://bugzilla.bioperl.org/
232
233 =head1 AUTHOR - Ewan Birney, Lincoln Stein, Kris Boulez
234
235 Email birney@ebi.ac.uk, kris.boulez@algonomics
236
237 Describe contact details here
238
239 =head1 APPENDIX
240
241 The rest of the documentation details each of the object
242 methods. Internal methods are usually preceded with a _
243
244 =cut
245
246 # Let the code begin...
247
248 package Bio::Structure::IO;
249
250 use strict;
251 use vars qw(@ISA);
252
253 use Bio::Root::Root;
254 use Bio::Root::IO;
255 use Bio::PrimarySeq;
256 use Symbol();
257
258 @ISA = qw(Bio::Root::Root Bio::Root::IO);
259
260 =head2 new
261
262 Title : new
263 Usage : $stream = Bio::Structure::IO->new(-file => $filename, -format => 'Format')
264 Function: Returns a new structIOstream
265 Returns : A Bio::Structure::IO handler initialised with the appropriate format
266 Args : -file => $filename
267 -format => format
268 -fh => filehandle to attach to
269
270 =cut
271
272 my $entry = 0;
273
274 sub new {
275 my ($caller,@args) = @_;
276 my $class = ref($caller) || $caller;
277
278 # or do we want to call SUPER on an object if $caller is an
279 # object?
280 if( $class =~ /Bio::Structure::IO::(\S+)/ ) {
281 my ($self) = $class->SUPER::new(@args);
282 $self->_initialize(@args);
283 return $self;
284 } else {
285
286 my %param = @args;
287 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
288 my $format = $param{'-format'} ||
289 $class->_guess_format( $param{-file} || $ARGV[0] ) ||
290 'pdb';
291 $format = "\L$format"; # normalize capitalization to lower case
292
293 # normalize capitalization
294 return undef unless( &_load_format_module($format) );
295 return "Bio::Structure::IO::$format"->new(@args);
296 }
297 }
298
299 =head2 newFh
300
301 Title : newFh
302 Usage : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
303 Function: does a new() followed by an fh()
304 Example : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
305 $structure = <$fh>; # read a structure object
306 print $fh $structure; # write a structure object
307 Returns : filehandle tied to the Bio::Structure::IO::Fh class
308 Args :
309
310 =cut
311
312 sub newFh {
313 my $class = shift;
314 return unless my $self = $class->new(@_);
315 return $self->fh;
316 }
317
318 =head2 fh
319
320 Title : fh
321 Usage : $obj->fh
322 Function:
323 Example : $fh = $obj->fh; # make a tied filehandle
324 $structure = <$fh>; # read a structure object
325 print $fh $structure; # write a structure object
326 Returns : filehandle tied to the Bio::Structure::IO::Fh class
327 Args :
328
329 =cut
330
331
332 sub fh {
333 my $self = shift;
334 my $class = ref($self) || $self;
335 my $s = Symbol::gensym;
336 tie $$s,$class,$self;
337 return $s;
338 }
339
340
341 # _initialize is chained for all SeqIO classes
342
343 sub _initialize {
344 my($self, @args) = @_;
345
346 # not really necessary unless we put more in RootI
347 $self->SUPER::_initialize(@args);
348
349 # initialize the IO part
350 $self->_initialize_io(@args);
351 }
352
353 =head2 next_structure
354
355 Title : next_structure
356 Usage : $structure = stream->next_structure
357 Function: Reads the next structure object from the stream and returns it.
358
359 Certain driver modules may encounter entries in the stream that
360 are either misformatted or that use syntax not yet understood
361 by the driver. If such an incident is recoverable, e.g., by
362 dismissing a feature of a feature table or some other non-mandatory
363 part of an entry, the driver will issue a warning. In the case
364 of a non-recoverable situation an exception will be thrown.
365 Do not assume that you can resume parsing the same stream after
366 catching the exception. Note that you can always turn recoverable
367 errors into exceptions by calling $stream->verbose(2) (see
368 Bio::RootI POD page).
369 Returns : a Bio::Structure structure object
370 Args : none
371
372 =cut
373
374 sub next_structure {
375 my ($self, $struc) = @_;
376 $self->throw("Sorry, you cannot read from a generic Bio::Structure::IO object.");
377 }
378
379 # Do we want people to read out the sequence directly from a $structIO stream
380 #
381 ##=head2 next_primary_seq
382 ##
383 ## Title : next_primary_seq
384 ## Usage : $seq = $stream->next_primary_seq
385 ## Function: Provides a primaryseq type of sequence object
386 ## Returns : A Bio::PrimarySeqI object
387 ## Args : none
388 ##
389 ##
390 ##=cut
391 ##
392 ##sub next_primary_seq {
393 ## my ($self) = @_;
394 ##
395 ## # in this case, we default to next_seq. This is because
396 ## # Bio::Seq's are Bio::PrimarySeqI objects. However we
397 ## # expect certain sub classes to override this method to provide
398 ## # less parsing heavy methods to retrieving the objects
399 ##
400 ## return $self->next_seq();
401 ##}
402
403 =head2 write_structure
404
405 Title : write_structure
406 Usage : $stream->write_structure($structure)
407 Function: writes the $structure object into the stream
408 Returns : 1 for success and 0 for error
409 Args : Bio::Structure object
410
411 =cut
412
413 sub write_seq {
414 my ($self, $struc) = @_;
415 $self->throw("Sorry, you cannot write to a generic Bio::Structure::IO object.");
416 }
417
418
419 # De we need this here
420 #
421 ##=head2 alphabet
422 ##
423 ## Title : alphabet
424 ## Usage : $self->alphabet($newval)
425 ## Function: Set/get the molecule type for the Seq objects to be created.
426 ## Example : $seqio->alphabet('protein')
427 ## Returns : value of alphabet: 'dna', 'rna', or 'protein'
428 ## Args : newvalue (optional)
429 ## Throws : Exception if the argument is not one of 'dna', 'rna', or 'protein'
430 ##
431 ##=cut
432 ##
433 ##sub alphabet {
434 ## my ($self, $value) = @_;
435 ##
436 ## if ( defined $value) {
437 ## # instead of hard-coding the allowed values once more, we check by
438 ## # creating a dummy sequence object
439 ## eval {
440 ## my $seq = Bio::PrimarySeq->new('-alphabet' => $value);
441 ## };
442 ## if($@) {
443 ## $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
444 ## }
445 ## $self->{'alphabet'} = "\L$value";
446 ## }
447 ## return $self->{'alphabet'};
448 ##}
449
450 =head2 _load_format_module
451
452 Title : _load_format_module
453 Usage : *INTERNAL Structure::IO stuff*
454 Function: Loads up (like use) a module at run time on demand
455 Example :
456 Returns :
457 Args :
458
459 =cut
460
461 sub _load_format_module {
462 my ($format) = @_;
463 my ($module, $load, $m);
464
465 $module = "_<Bio/Structure/IO/$format.pm";
466 $load = "Bio/Structure/IO/$format.pm";
467
468 return 1 if $main::{$module};
469 eval {
470 require $load;
471 };
472 if ( $@ ) {
473 print STDERR <<END;
474 $load: $format cannot be found
475 Exception $@
476 For more information about the Structure::IO system please see the
477 Bio::Structure::IO docs. This includes ways of checking for formats at
478 compile time, not run time
479 END
480 ;
481 return;
482 }
483 return 1;
484 }
485
486 =head2 _concatenate_lines
487
488 Title : _concatenate_lines
489 Usage : $s = _concatenate_lines($line, $continuation_line)
490 Function: Private. Concatenates two strings assuming that the second stems
491 from a continuation line of the first. Adds a space between both
492 unless the first ends with a dash.
493
494 Takes care of either arg being empty.
495 Example :
496 Returns : A string.
497 Args :
498
499 =cut
500
501 sub _concatenate_lines {
502 my ($self, $s1, $s2) = @_;
503 $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2);
504 return ($s1 ? $s1 : "") . ($s2 ? $s2 : "");
505 }
506
507 =head2 _filehandle
508
509 Title : _filehandle
510 Usage : $obj->_filehandle($newval)
511 Function: This method is deprecated. Call _fh() instead.
512 Example :
513 Returns : value of _filehandle
514 Args : newvalue (optional)
515
516
517 =cut
518
519 sub _filehandle {
520 my ($self,@args) = @_;
521 return $self->_fh(@args);
522 }
523
524 =head2 _guess_format
525
526 Title : _guess_format
527 Usage : $obj->_guess_format($filename)
528 Function:
529 Example :
530 Returns : guessed format of filename (lower case)
531 Args :
532
533 =cut
534
535 sub _guess_format {
536 my $class = shift;
537 return unless $_ = shift;
538 return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
539 return 'genbank' if /\.(gb|gbank|genbank)$/i;
540 return 'scf' if /\.scf$/i;
541 return 'pir' if /\.pir$/i;
542 return 'embl' if /\.(embl|ebl|emb|dat)$/i;
543 return 'raw' if /\.(txt)$/i;
544 return 'gcg' if /\.gcg$/i;
545 return 'ace' if /\.ace$/i;
546 return 'bsml' if /\.(bsm|bsml)$/i;
547 return 'pdb' if /\.(ent|pdb)$/i;
548 }
549
550 sub DESTROY {
551 my $self = shift;
552
553 $self->close();
554 }
555
556 sub TIEHANDLE {
557 my ($class,$val) = @_;
558 return bless {'structio' => $val}, $class;
559 }
560
561 sub READLINE {
562 my $self = shift;
563 return $self->{'structio'}->next_seq() unless wantarray;
564 my (@list, $obj);
565 push @list, $obj while $obj = $self->{'structio'}->next_seq();
566 return @list;
567 }
568
569 sub PRINT {
570 my $self = shift;
571 $self->{'structio'}->write_seq(@_);
572 }
573
574 1;
575