0
|
1
|
|
2 # $Id: SeqIO.pm,v 1.59.2.4 2003/09/14 19:16:53 jason Exp $
|
|
3 #
|
|
4 # BioPerl module for Bio::SeqIO
|
|
5 #
|
|
6 # Cared for by Ewan Birney <birney@sanger.ac.uk>
|
|
7 # and Lincoln Stein <lstein@cshl.org>
|
|
8 #
|
|
9 # Copyright Ewan Birney
|
|
10 #
|
|
11 # You may distribute this module under the same terms as perl itself
|
|
12 #
|
|
13 # _history
|
|
14 # October 18, 1999 Largely rewritten by Lincoln Stein
|
|
15
|
|
16 # POD documentation - main docs before the code
|
|
17
|
|
18 =head1 NAME
|
|
19
|
|
20 Bio::SeqIO - Handler for SeqIO Formats
|
|
21
|
|
22 =head1 SYNOPSIS
|
|
23
|
|
24 use Bio::SeqIO;
|
|
25
|
|
26 $in = Bio::SeqIO->new(-file => "inputfilename" , '-format' => 'Fasta');
|
|
27 $out = Bio::SeqIO->new(-file => ">outputfilename" , '-format' => 'EMBL');
|
|
28 # note: we quote -format to keep older Perls from complaining.
|
|
29
|
|
30 while ( my $seq = $in->next_seq() ) {
|
|
31 $out->write_seq($seq);
|
|
32 }
|
|
33
|
|
34 Now, to actually get at the sequence object, use the standard Bio::Seq
|
|
35 methods (look at L<Bio::Seq> if you don't know what they are)
|
|
36
|
|
37 use Bio::SeqIO;
|
|
38
|
|
39 $in = Bio::SeqIO->new(-file => "inputfilename" , '-format' => 'genbank');
|
|
40
|
|
41 while ( my $seq = $in->next_seq() ) {
|
|
42 print "Sequence ",$seq->id," first 10 bases ",$seq->subseq(1,10),"\n";
|
|
43 }
|
|
44
|
|
45
|
|
46 The SeqIO system does have a filehandle binding. Most people find this
|
|
47 a little confusing, but it does mean you write the world's smallest
|
|
48 reformatter
|
|
49
|
|
50 use Bio::SeqIO;
|
|
51
|
|
52 $in = Bio::SeqIO->newFh(-file => "inputfilename" , '-format' => 'Fasta');
|
|
53 $out = Bio::SeqIO->newFh('-format' => 'EMBL');
|
|
54
|
|
55 # World's shortest Fasta<->EMBL format converter:
|
|
56 print $out $_ while <$in>;
|
|
57
|
|
58
|
|
59 =head1 DESCRIPTION
|
|
60
|
|
61 Bio::SeqIO is a handler module for the formats in the SeqIO set (eg,
|
|
62 Bio::SeqIO::fasta). It is the officially sanctioned way of getting at
|
|
63 the format objects, which most people should use.
|
|
64
|
|
65 The Bio::SeqIO system can be thought of like biological file handles.
|
|
66 They are attached to filehandles with smart formatting rules (eg,
|
|
67 genbank format, or EMBL format, or binary trace file format) and
|
|
68 can either read or write sequence objects (Bio::Seq objects, or
|
|
69 more correctly, Bio::SeqI implementing objects, of which Bio::Seq is
|
|
70 one such object). If you want to know what to do with a Bio::Seq
|
|
71 object, read L<Bio::Seq>.
|
|
72
|
|
73 The idea is that you request a stream object for a particular format.
|
|
74 All the stream objects have a notion of an internal file that is read
|
|
75 from or written to. A particular SeqIO object instance is configured
|
|
76 for either input or output. A specific example of a stream object is
|
|
77 the Bio::SeqIO::fasta object.
|
|
78
|
|
79 Each stream object has functions
|
|
80
|
|
81 $stream->next_seq();
|
|
82
|
|
83 and
|
|
84
|
|
85 $stream->write_seq($seq);
|
|
86
|
|
87 As an added bonus, you can recover a filehandle that is tied to the
|
|
88 SeqIO object, allowing you to use the standard E<lt>E<gt> and print operations
|
|
89 to read and write sequence objects:
|
|
90
|
|
91 use Bio::SeqIO;
|
|
92
|
|
93 $stream = Bio::SeqIO->newFh(-format => 'Fasta'); # read from standard input
|
|
94
|
|
95 while ( $seq = <$stream> ) {
|
|
96 # do something with $seq
|
|
97 }
|
|
98
|
|
99 and
|
|
100
|
|
101 print $stream $seq; # when stream is in output mode
|
|
102
|
|
103 This makes the simplest ever reformatter
|
|
104
|
|
105 #!/usr/local/bin/perl
|
|
106
|
|
107 $format1 = shift;
|
|
108 $format2 = shift || die "Usage: reformat format1 format2 < input > output";
|
|
109
|
|
110 use Bio::SeqIO;
|
|
111
|
|
112 $in = Bio::SeqIO->newFh(-format => $format1 );
|
|
113 $out = Bio::SeqIO->newFh(-format => $format2 );
|
|
114 #note: you might want to quote -format to keep older perl's from complaining.
|
|
115
|
|
116 print $out $_ while <$in>;
|
|
117
|
|
118
|
|
119 =head1 CONSTRUCTORS
|
|
120
|
|
121 =head2 Bio::SeqIO-E<gt>new()
|
|
122
|
|
123 $seqIO = Bio::SeqIO->new(-file => 'filename', -format=>$format);
|
|
124 $seqIO = Bio::SeqIO->new(-fh => \*FILEHANDLE, -format=>$format);
|
|
125 $seqIO = Bio::SeqIO->new(-format => $format);
|
|
126
|
|
127 The new() class method constructs a new Bio::SeqIO object. The
|
|
128 returned object can be used to retrieve or print Seq objects. new()
|
|
129 accepts the following parameters:
|
|
130
|
|
131 =over 4
|
|
132
|
|
133 =item -file
|
|
134
|
|
135 A file path to be opened for reading or writing. The usual Perl
|
|
136 conventions apply:
|
|
137
|
|
138 'file' # open file for reading
|
|
139 '>file' # open file for writing
|
|
140 '>>file' # open file for appending
|
|
141 '+<file' # open file read/write
|
|
142 'command |' # open a pipe from the command
|
|
143 '| command' # open a pipe to the command
|
|
144
|
|
145 =item -fh
|
|
146
|
|
147 You may provide new() with a previously-opened filehandle. For
|
|
148 example, to read from STDIN:
|
|
149
|
|
150 $seqIO = Bio::SeqIO->new(-fh => \*STDIN);
|
|
151
|
|
152 Note that you must pass filehandles as references to globs.
|
|
153
|
|
154 If neither a filehandle nor a filename is specified, then the module
|
|
155 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
|
|
156 semantics.
|
|
157
|
|
158 A string filehandle is handy if you want to modify the output in the
|
|
159 memory, before printing it out. The following program reads in EMBL
|
|
160 formatted entries from a file and prints them out in fasta format with
|
|
161 some HTML tags:
|
|
162
|
|
163 use Bio::SeqIO;
|
|
164 use IO::String;
|
|
165 my $in = Bio::SeqIO->new('-file' => "emblfile" ,
|
|
166 '-format' => 'EMBL');
|
|
167 while ( my $seq = $in->next_seq() ) {
|
|
168 # the output handle is reset for every file
|
|
169 my $stringio = IO::String->new($string);
|
|
170 my $out = Bio::SeqIO->new('-fh' => $stringio,
|
|
171 '-format' => 'fasta');
|
|
172 # output goes into $string
|
|
173 $out->write_seq($seq);
|
|
174 # modify $string
|
|
175 $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g;
|
|
176 # print into STDOUT
|
|
177 print $string;
|
|
178 }
|
|
179
|
|
180 =item -format
|
|
181
|
|
182 Specify the format of the file. Supported formats include:
|
|
183
|
|
184 Fasta FASTA format
|
|
185 EMBL EMBL format
|
|
186 GenBank GenBank format
|
|
187 swiss Swissprot format
|
|
188 PIR Protein Information Resource format
|
|
189 GCG GCG format
|
|
190 raw Raw format (one sequence per line, no ID)
|
|
191 ace ACeDB sequence format
|
|
192 game GAME XML format
|
|
193 phd phred output
|
|
194 qual Quality values (get a sequence of quality scores)
|
|
195 Fastq Fastq format
|
|
196 SCF SCF tracefile format
|
|
197 ABI ABI tracefile format
|
|
198 ALF ALF tracefile format
|
|
199 CTF CTF tracefile format
|
|
200 ZTR ZTR tracefile format
|
|
201 PLN Staden plain tracefile format
|
|
202 EXP Staden tagged experiment tracefile format
|
|
203
|
|
204 If no format is specified and a filename is given then the module
|
|
205 will attempt to deduce the format from the filename suffix. If this
|
|
206 is unsuccessful then Fasta format is assumed.
|
|
207
|
|
208 The format name is case insensitive. 'FASTA', 'Fasta' and 'fasta' are
|
|
209 all valid suffixes.
|
|
210
|
|
211 Currently, the tracefile formats (except for SCF) require installation
|
|
212 of the external Staden "io_lib" package, as well as the
|
|
213 Bio::SeqIO::staden::read package available from the bioperl-ext
|
|
214 repository.
|
|
215
|
|
216 =item -flush
|
|
217
|
|
218 By default, all files (or filehandles) opened for writing sequences
|
|
219 will be flushed after each write_seq() (making the file immediately
|
|
220 usable). If you don't need this facility and would like to marginally
|
|
221 improve the efficiency of writing multiple sequences to the same file
|
|
222 (or filehandle), pass the -flush option '0' or any other value that
|
|
223 evaluates as defined but false:
|
|
224
|
|
225 my $gb = new Bio::SeqIO -file => "<gball.gbk",
|
|
226 -format => "gb";
|
|
227 my $fa = new Bio::SeqIO -file => ">gball.fa",
|
|
228 -format => "fasta",
|
|
229 -flush => 0; # go as fast as we can!
|
|
230 while($seq = $gb->next_seq) { $fa->write_seq($seq) }
|
|
231
|
|
232
|
|
233 =back
|
|
234
|
|
235 =head2 Bio::SeqIO-E<gt>newFh()
|
|
236
|
|
237 $fh = Bio::SeqIO->newFh(-fh => \*FILEHANDLE, -format=>$format);
|
|
238 $fh = Bio::SeqIO->newFh(-format => $format);
|
|
239 # etc.
|
|
240
|
|
241 This constructor behaves like new(), but returns a tied filehandle
|
|
242 rather than a Bio::SeqIO object. You can read sequences from this
|
|
243 object using the familiar E<lt>E<gt> operator, and write to it using
|
|
244 print(). The usual array and $_ semantics work. For example, you can
|
|
245 read all sequence objects into an array like this:
|
|
246
|
|
247 @sequences = <$fh>;
|
|
248
|
|
249 Other operations, such as read(), sysread(), write(), close(), and printf()
|
|
250 are not supported.
|
|
251
|
|
252 =head1 OBJECT METHODS
|
|
253
|
|
254 See below for more detailed summaries. The main methods are:
|
|
255
|
|
256 =head2 $sequence = $seqIO-E<gt>next_seq()
|
|
257
|
|
258 Fetch the next sequence from the stream.
|
|
259
|
|
260 =head2 $seqIO-E<gt>write_seq($sequence [,$another_sequence,...])
|
|
261
|
|
262 Write the specified sequence(s) to the stream.
|
|
263
|
|
264 =head2 TIEHANDLE(), READLINE(), PRINT()
|
|
265
|
|
266 These provide the tie interface. See L<perltie> for more details.
|
|
267
|
|
268 =head1 FEEDBACK
|
|
269
|
|
270 =head2 Mailing Lists
|
|
271
|
|
272 User feedback is an integral part of the evolution of this
|
|
273 and other Bioperl modules. Send your comments and suggestions preferably
|
|
274 to one of the Bioperl mailing lists.
|
|
275
|
|
276 Your participation is much appreciated.
|
|
277
|
|
278 bioperl-l@bioperl.org - General discussion
|
|
279 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
280
|
|
281 =head2 Reporting Bugs
|
|
282
|
|
283 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
284 the bugs and their resolution.
|
|
285 Bug reports can be submitted via email or the web:
|
|
286
|
|
287 bioperl-bugs@bioperl.org
|
|
288 http://bugzilla.bioperl.org/
|
|
289
|
|
290 =head1 AUTHOR - Ewan Birney, Lincoln Stein
|
|
291
|
|
292 Email birney@ebi.ac.uk
|
|
293
|
|
294 =head1 APPENDIX
|
|
295
|
|
296 The rest of the documentation details each of the object
|
|
297 methods. Internal methods are usually preceded with a _
|
|
298
|
|
299 =cut
|
|
300
|
|
301 #' Let the code begin...
|
|
302
|
|
303 package Bio::SeqIO;
|
|
304
|
|
305 use strict;
|
|
306 use vars qw(@ISA);
|
|
307
|
|
308 use Bio::Root::Root;
|
|
309 use Bio::Root::IO;
|
|
310 use Bio::Factory::SequenceStreamI;
|
|
311 use Bio::Factory::FTLocationFactory;
|
|
312 use Bio::Seq::SeqBuilder;
|
|
313 use Symbol();
|
|
314
|
|
315 @ISA = qw(Bio::Root::Root Bio::Root::IO Bio::Factory::SequenceStreamI);
|
|
316
|
|
317 sub BEGIN {
|
|
318 eval { require Bio::SeqIO::staden::read; };
|
|
319 }
|
|
320
|
|
321 my %valid_alphabet_cache;
|
|
322
|
|
323 =head2 new
|
|
324
|
|
325 Title : new
|
|
326 Usage : $stream = Bio::SeqIO->new(-file => $filename, -format => 'Format')
|
|
327 Function: Returns a new seqstream
|
|
328 Returns : A Bio::SeqIO stream initialised with the appropriate format
|
|
329 Args : Named parameters:
|
|
330 -file => $filename
|
|
331 -fh => filehandle to attach to
|
|
332 -format => format
|
|
333
|
|
334 Additional arguments may be used to set factories and
|
|
335 builders involved in the sequence object creation. None of
|
|
336 these must be provided, they all have reasonable defaults.
|
|
337 -seqfactory the L<Bio::Factory::SequenceFactoryI> object
|
|
338 -locfactory the L<Bio::Factory::LocationFactoryI> object
|
|
339 -objbuilder the L<Bio::Factory::ObjectBuilderI> object
|
|
340
|
|
341 See L<Bio::SeqIO::Handler>
|
|
342
|
|
343 =cut
|
|
344
|
|
345 my $entry = 0;
|
|
346
|
|
347 sub new {
|
|
348 my ($caller,@args) = @_;
|
|
349 my $class = ref($caller) || $caller;
|
|
350
|
|
351 # or do we want to call SUPER on an object if $caller is an
|
|
352 # object?
|
|
353 if( $class =~ /Bio::SeqIO::(\S+)/ ) {
|
|
354 my ($self) = $class->SUPER::new(@args);
|
|
355 $self->_initialize(@args);
|
|
356 return $self;
|
|
357 } else {
|
|
358
|
|
359 my %param = @args;
|
|
360 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
|
|
361 my $format = $param{'-format'} ||
|
|
362 $class->_guess_format( $param{-file} || $ARGV[0] ) ||
|
|
363 'fasta';
|
|
364 $format = "\L$format"; # normalize capitalization to lower case
|
|
365
|
|
366 # normalize capitalization
|
|
367 return undef unless( $class->_load_format_module($format) );
|
|
368 return "Bio::SeqIO::$format"->new(@args);
|
|
369 }
|
|
370 }
|
|
371
|
|
372 =head2 newFh
|
|
373
|
|
374 Title : newFh
|
|
375 Usage : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
|
|
376 Function: does a new() followed by an fh()
|
|
377 Example : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
|
|
378 $sequence = <$fh>; # read a sequence object
|
|
379 print $fh $sequence; # write a sequence object
|
|
380 Returns : filehandle tied to the Bio::SeqIO::Fh class
|
|
381 Args :
|
|
382
|
|
383 See L<Bio::SeqIO::Fh>
|
|
384
|
|
385 =cut
|
|
386
|
|
387 sub newFh {
|
|
388 my $class = shift;
|
|
389 return unless my $self = $class->new(@_);
|
|
390 return $self->fh;
|
|
391 }
|
|
392
|
|
393 =head2 fh
|
|
394
|
|
395 Title : fh
|
|
396 Usage : $obj->fh
|
|
397 Function:
|
|
398 Example : $fh = $obj->fh; # make a tied filehandle
|
|
399 $sequence = <$fh>; # read a sequence object
|
|
400 print $fh $sequence; # write a sequence object
|
|
401 Returns : filehandle tied to Bio::SeqIO class
|
|
402 Args : none
|
|
403
|
|
404 =cut
|
|
405
|
|
406
|
|
407 sub fh {
|
|
408 my $self = shift;
|
|
409 my $class = ref($self) || $self;
|
|
410 my $s = Symbol::gensym;
|
|
411 tie $$s,$class,$self;
|
|
412 return $s;
|
|
413 }
|
|
414
|
|
415 # _initialize is chained for all SeqIO classes
|
|
416
|
|
417 sub _initialize {
|
|
418 my($self, @args) = @_;
|
|
419
|
|
420 # flush is initialized by the Root::IO init
|
|
421
|
|
422 my ($seqfact,$locfact,$objbuilder) =
|
|
423 $self->_rearrange([qw(SEQFACTORY
|
|
424 LOCFACTORY
|
|
425 OBJBUILDER)
|
|
426 ], @args);
|
|
427
|
|
428 $locfact = Bio::Factory::FTLocationFactory->new(-verbose => $self->verbose) if ! $locfact;
|
|
429 $objbuilder = Bio::Seq::SeqBuilder->new(-verbose => $self->verbose) unless $objbuilder;
|
|
430 $self->sequence_builder($objbuilder);
|
|
431 $self->location_factory($locfact);
|
|
432 # note that this should come last because it propagates the sequence
|
|
433 # factory to the sequence builder
|
|
434 $seqfact && $self->sequence_factory($seqfact);
|
|
435
|
|
436 # initialize the IO part
|
|
437 $self->_initialize_io(@args);
|
|
438 }
|
|
439
|
|
440 =head2 next_seq
|
|
441
|
|
442 Title : next_seq
|
|
443 Usage : $seq = stream->next_seq
|
|
444 Function: Reads the next sequence object from the stream and returns it.
|
|
445
|
|
446 Certain driver modules may encounter entries in the stream that
|
|
447 are either misformatted or that use syntax not yet understood
|
|
448 by the driver. If such an incident is recoverable, e.g., by
|
|
449 dismissing a feature of a feature table or some other non-mandatory
|
|
450 part of an entry, the driver will issue a warning. In the case
|
|
451 of a non-recoverable situation an exception will be thrown.
|
|
452 Do not assume that you can resume parsing the same stream after
|
|
453 catching the exception. Note that you can always turn recoverable
|
|
454 errors into exceptions by calling $stream->verbose(2).
|
|
455 Returns : a Bio::Seq sequence object
|
|
456 Args : none
|
|
457
|
|
458 See L<Bio::Root::RootI>, L<Bio::Factory::SeqStreamI>, L<Bio::Seq>
|
|
459
|
|
460 =cut
|
|
461
|
|
462 sub next_seq {
|
|
463 my ($self, $seq) = @_;
|
|
464 $self->throw("Sorry, you cannot read from a generic Bio::SeqIO object.");
|
|
465 }
|
|
466
|
|
467 =head2 write_seq
|
|
468
|
|
469 Title : write_seq
|
|
470 Usage : $stream->write_seq($seq)
|
|
471 Function: writes the $seq object into the stream
|
|
472 Returns : 1 for success and 0 for error
|
|
473 Args : Bio::Seq object
|
|
474
|
|
475 =cut
|
|
476
|
|
477 sub write_seq {
|
|
478 my ($self, $seq) = @_;
|
|
479 $self->throw("Sorry, you cannot write to a generic Bio::SeqIO object.");
|
|
480 }
|
|
481
|
|
482
|
|
483 =head2 alphabet
|
|
484
|
|
485 Title : alphabet
|
|
486 Usage : $self->alphabet($newval)
|
|
487 Function: Set/get the molecule type for the Seq objects to be created.
|
|
488 Example : $seqio->alphabet('protein')
|
|
489 Returns : value of alphabet: 'dna', 'rna', or 'protein'
|
|
490 Args : newvalue (optional)
|
|
491 Throws : Exception if the argument is not one of 'dna', 'rna', or 'protein'
|
|
492
|
|
493 =cut
|
|
494
|
|
495 sub alphabet {
|
|
496 my ($self, $value) = @_;
|
|
497
|
|
498 if ( defined $value) {
|
|
499 $value = lc $value;
|
|
500 unless ($valid_alphabet_cache{$value}) {
|
|
501 # instead of hard-coding the allowed values once more, we check by
|
|
502 # creating a dummy sequence object
|
|
503 eval {
|
|
504 require Bio::PrimarySeq;
|
|
505 my $seq = Bio::PrimarySeq->new('-verbose' => $self->verbose,
|
|
506 '-alphabet' => $value);
|
|
507
|
|
508 };
|
|
509 if ($@) {
|
|
510 $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
|
|
511 }
|
|
512 $valid_alphabet_cache{$value} = 1;
|
|
513 }
|
|
514 $self->{'alphabet'} = $value;
|
|
515 }
|
|
516 return $self->{'alphabet'};
|
|
517 }
|
|
518
|
|
519 =head2 _load_format_module
|
|
520
|
|
521 Title : _load_format_module
|
|
522 Usage : *INTERNAL SeqIO stuff*
|
|
523 Function: Loads up (like use) a module at run time on demand
|
|
524 Example :
|
|
525 Returns :
|
|
526 Args :
|
|
527
|
|
528 =cut
|
|
529
|
|
530 sub _load_format_module {
|
|
531 my ($self, $format) = @_;
|
|
532 my $module = "Bio::SeqIO::" . $format;
|
|
533 my $ok;
|
|
534
|
|
535 eval {
|
|
536 $ok = $self->_load_module($module);
|
|
537 };
|
|
538 if ( $@ ) {
|
|
539 print STDERR <<END;
|
|
540 $self: $format cannot be found
|
|
541 Exception $@
|
|
542 For more information about the SeqIO system please see the SeqIO docs.
|
|
543 This includes ways of checking for formats at compile time, not run time
|
|
544 END
|
|
545 ;
|
|
546 }
|
|
547 return $ok;
|
|
548 }
|
|
549
|
|
550 =head2 _concatenate_lines
|
|
551
|
|
552 Title : _concatenate_lines
|
|
553 Usage : $s = _concatenate_lines($line, $continuation_line)
|
|
554 Function: Private. Concatenates two strings assuming that the second stems
|
|
555 from a continuation line of the first. Adds a space between both
|
|
556 unless the first ends with a dash.
|
|
557
|
|
558 Takes care of either arg being empty.
|
|
559 Example :
|
|
560 Returns : A string.
|
|
561 Args :
|
|
562
|
|
563 =cut
|
|
564
|
|
565 sub _concatenate_lines {
|
|
566 my ($self, $s1, $s2) = @_;
|
|
567
|
|
568 $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2);
|
|
569 return ($s1 ? $s1 : "") . ($s2 ? $s2 : "");
|
|
570 }
|
|
571
|
|
572 =head2 _filehandle
|
|
573
|
|
574 Title : _filehandle
|
|
575 Usage : $obj->_filehandle($newval)
|
|
576 Function: This method is deprecated. Call _fh() instead.
|
|
577 Example :
|
|
578 Returns : value of _filehandle
|
|
579 Args : newvalue (optional)
|
|
580
|
|
581
|
|
582 =cut
|
|
583
|
|
584 sub _filehandle {
|
|
585 my ($self,@args) = @_;
|
|
586 return $self->_fh(@args);
|
|
587 }
|
|
588
|
|
589 =head2 _guess_format
|
|
590
|
|
591 Title : _guess_format
|
|
592 Usage : $obj->_guess_format($filename)
|
|
593 Function: guess format based on file suffix
|
|
594 Example :
|
|
595 Returns : guessed format of filename (lower case)
|
|
596 Args :
|
|
597 Notes : formats that _filehandle() will guess include fasta,
|
|
598 genbank, scf, pir, embl, raw, gcg, ace, bsml, swissprot,
|
|
599 fastq and phd/phred
|
|
600
|
|
601 =cut
|
|
602
|
|
603 sub _guess_format {
|
|
604 my $class = shift;
|
|
605 return unless $_ = shift;
|
|
606 return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
|
|
607 return 'genbank' if /\.(gb|gbank|genbank|gbk|gbs)$/i;
|
|
608 return 'scf' if /\.scf$/i;
|
|
609 return 'scf' if /\.scf$/i;
|
|
610 return 'abi' if /\.abi$/i;
|
|
611 return 'alf' if /\.alf$/i;
|
|
612 return 'ctf' if /\.ctf$/i;
|
|
613 return 'ztr' if /\.ztr$/i;
|
|
614 return 'pln' if /\.pln$/i;
|
|
615 return 'exp' if /\.exp$/i;
|
|
616 return 'pir' if /\.pir$/i;
|
|
617 return 'embl' if /\.(embl|ebl|emb|dat)$/i;
|
|
618 return 'raw' if /\.(txt)$/i;
|
|
619 return 'gcg' if /\.gcg$/i;
|
|
620 return 'ace' if /\.ace$/i;
|
|
621 return 'bsml' if /\.(bsm|bsml)$/i;
|
|
622 return 'swiss' if /\.(swiss|sp)$/i;
|
|
623 return 'phd' if /\.(phd|phred)$/i;
|
|
624 return 'fastq' if /\.fastq$/i;
|
|
625 }
|
|
626
|
|
627 sub DESTROY {
|
|
628 my $self = shift;
|
|
629
|
|
630 $self->close();
|
|
631 }
|
|
632
|
|
633 sub TIEHANDLE {
|
|
634 my ($class,$val) = @_;
|
|
635 return bless {'seqio' => $val}, $class;
|
|
636 }
|
|
637
|
|
638 sub READLINE {
|
|
639 my $self = shift;
|
|
640 return $self->{'seqio'}->next_seq() unless wantarray;
|
|
641 my (@list, $obj);
|
|
642 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
|
|
643 return @list;
|
|
644 }
|
|
645
|
|
646 sub PRINT {
|
|
647 my $self = shift;
|
|
648 $self->{'seqio'}->write_seq(@_);
|
|
649 }
|
|
650
|
|
651 =head2 sequence_factory
|
|
652
|
|
653 Title : sequence_factory
|
|
654 Usage : $seqio->sequence_factory($seqfactory)
|
|
655 Function: Get/Set the Bio::Factory::SequenceFactoryI
|
|
656 Returns : Bio::Factory::SequenceFactoryI
|
|
657 Args : [optional] Bio::Factory::SequenceFactoryI
|
|
658
|
|
659
|
|
660 =cut
|
|
661
|
|
662 sub sequence_factory{
|
|
663 my ($self,$obj) = @_;
|
|
664 if( defined $obj ) {
|
|
665 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
|
|
666 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)."::sequence_factory()");
|
|
667 }
|
|
668 $self->{'_seqio_seqfactory'} = $obj;
|
|
669 my $builder = $self->sequence_builder();
|
|
670 if($builder && $builder->can('sequence_factory') &&
|
|
671 (! $builder->sequence_factory())) {
|
|
672 $builder->sequence_factory($obj);
|
|
673 }
|
|
674 }
|
|
675 $self->{'_seqio_seqfactory'};
|
|
676 }
|
|
677
|
|
678 =head2 object_factory
|
|
679
|
|
680 Title : object_factory
|
|
681 Usage : $obj->object_factory($newval)
|
|
682 Function: This is an alias to sequence_factory with a more generic name.
|
|
683 Example :
|
|
684 Returns : value of object_factory (a scalar)
|
|
685 Args : on set, new value (a scalar or undef, optional)
|
|
686
|
|
687
|
|
688 =cut
|
|
689
|
|
690 sub object_factory{
|
|
691 return shift->sequence_factory(@_);
|
|
692 }
|
|
693
|
|
694 =head2 sequence_builder
|
|
695
|
|
696 Title : sequence_builder
|
|
697 Usage : $seqio->sequence_builder($seqfactory)
|
|
698 Function: Get/Set the L<Bio::Factory::ObjectBuilderI> used to build sequence
|
|
699 objects.
|
|
700
|
|
701 If you do not set the sequence object builder yourself, it
|
|
702 will in fact be an instance of L<Bio::Seq::SeqBuilder>, and
|
|
703 you may use all methods documented there to configure it.
|
|
704
|
|
705 Returns : a L<Bio::Factory::ObjectBuilderI> compliant object
|
|
706 Args : [optional] a L<Bio::Factory::ObjectBuilderI> compliant object
|
|
707
|
|
708
|
|
709 =cut
|
|
710
|
|
711 sub sequence_builder{
|
|
712 my ($self,$obj) = @_;
|
|
713 if( defined $obj ) {
|
|
714 if( ! ref($obj) || ! $obj->isa('Bio::Factory::ObjectBuilderI') ) {
|
|
715 $self->throw("Must provide a valid Bio::Factory::ObjectBuilderI object to ".ref($self)."::sequence_builder()");
|
|
716 }
|
|
717 $self->{'_object_builder'} = $obj;
|
|
718 }
|
|
719 $self->{'_object_builder'};
|
|
720 }
|
|
721
|
|
722 =head2 location_factory
|
|
723
|
|
724 Title : location_factory
|
|
725 Usage : $seqio->location_factory($locfactory)
|
|
726 Function: Get/Set the Bio::Factory::LocationFactoryI object to be used for
|
|
727 location string parsing
|
|
728 Returns : a L<Bio::Factory::LocationFactoryI> implementing object
|
|
729 Args : [optional] on set, a L<Bio::Factory::LocationFactoryI> implementing
|
|
730 object.
|
|
731
|
|
732
|
|
733 =cut
|
|
734
|
|
735 sub location_factory{
|
|
736 my ($self,$obj) = @_;
|
|
737 if( defined $obj ) {
|
|
738 if( ! ref($obj) || ! $obj->isa('Bio::Factory::LocationFactoryI') ) {
|
|
739 $self->throw("Must provide a valid Bio::Factory::LocationFactoryI".
|
|
740 " object to ".ref($self)."->location_factory()");
|
|
741 }
|
|
742 $self->{'_seqio_locfactory'} = $obj;
|
|
743 }
|
|
744 $self->{'_seqio_locfactory'};
|
|
745 }
|
|
746
|
|
747 1;
|
|
748
|