Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 | |
| 2 # $Id: SeqIO.pm,v 1.59.2.4 2003/09/14 19:16:53 jason Exp $ | |
| 3 # | |
| 4 # BioPerl module for Bio::SeqIO | |
| 5 # | |
| 6 # Cared for by Ewan Birney <birney@sanger.ac.uk> | |
| 7 # and Lincoln Stein <lstein@cshl.org> | |
| 8 # | |
| 9 # Copyright Ewan Birney | |
| 10 # | |
| 11 # You may distribute this module under the same terms as perl itself | |
| 12 # | |
| 13 # _history | |
| 14 # October 18, 1999 Largely rewritten by Lincoln Stein | |
| 15 | |
| 16 # POD documentation - main docs before the code | |
| 17 | |
| 18 =head1 NAME | |
| 19 | |
| 20 Bio::SeqIO - Handler for SeqIO Formats | |
| 21 | |
| 22 =head1 SYNOPSIS | |
| 23 | |
| 24 use Bio::SeqIO; | |
| 25 | |
| 26 $in = Bio::SeqIO->new(-file => "inputfilename" , '-format' => 'Fasta'); | |
| 27 $out = Bio::SeqIO->new(-file => ">outputfilename" , '-format' => 'EMBL'); | |
| 28 # note: we quote -format to keep older Perls from complaining. | |
| 29 | |
| 30 while ( my $seq = $in->next_seq() ) { | |
| 31 $out->write_seq($seq); | |
| 32 } | |
| 33 | |
| 34 Now, to actually get at the sequence object, use the standard Bio::Seq | |
| 35 methods (look at L<Bio::Seq> if you don't know what they are) | |
| 36 | |
| 37 use Bio::SeqIO; | |
| 38 | |
| 39 $in = Bio::SeqIO->new(-file => "inputfilename" , '-format' => 'genbank'); | |
| 40 | |
| 41 while ( my $seq = $in->next_seq() ) { | |
| 42 print "Sequence ",$seq->id," first 10 bases ",$seq->subseq(1,10),"\n"; | |
| 43 } | |
| 44 | |
| 45 | |
| 46 The SeqIO system does have a filehandle binding. Most people find this | |
| 47 a little confusing, but it does mean you write the world's smallest | |
| 48 reformatter | |
| 49 | |
| 50 use Bio::SeqIO; | |
| 51 | |
| 52 $in = Bio::SeqIO->newFh(-file => "inputfilename" , '-format' => 'Fasta'); | |
| 53 $out = Bio::SeqIO->newFh('-format' => 'EMBL'); | |
| 54 | |
| 55 # World's shortest Fasta<->EMBL format converter: | |
| 56 print $out $_ while <$in>; | |
| 57 | |
| 58 | |
| 59 =head1 DESCRIPTION | |
| 60 | |
| 61 Bio::SeqIO is a handler module for the formats in the SeqIO set (eg, | |
| 62 Bio::SeqIO::fasta). It is the officially sanctioned way of getting at | |
| 63 the format objects, which most people should use. | |
| 64 | |
| 65 The Bio::SeqIO system can be thought of like biological file handles. | |
| 66 They are attached to filehandles with smart formatting rules (eg, | |
| 67 genbank format, or EMBL format, or binary trace file format) and | |
| 68 can either read or write sequence objects (Bio::Seq objects, or | |
| 69 more correctly, Bio::SeqI implementing objects, of which Bio::Seq is | |
| 70 one such object). If you want to know what to do with a Bio::Seq | |
| 71 object, read L<Bio::Seq>. | |
| 72 | |
| 73 The idea is that you request a stream object for a particular format. | |
| 74 All the stream objects have a notion of an internal file that is read | |
| 75 from or written to. A particular SeqIO object instance is configured | |
| 76 for either input or output. A specific example of a stream object is | |
| 77 the Bio::SeqIO::fasta object. | |
| 78 | |
| 79 Each stream object has functions | |
| 80 | |
| 81 $stream->next_seq(); | |
| 82 | |
| 83 and | |
| 84 | |
| 85 $stream->write_seq($seq); | |
| 86 | |
| 87 As an added bonus, you can recover a filehandle that is tied to the | |
| 88 SeqIO object, allowing you to use the standard E<lt>E<gt> and print operations | |
| 89 to read and write sequence objects: | |
| 90 | |
| 91 use Bio::SeqIO; | |
| 92 | |
| 93 $stream = Bio::SeqIO->newFh(-format => 'Fasta'); # read from standard input | |
| 94 | |
| 95 while ( $seq = <$stream> ) { | |
| 96 # do something with $seq | |
| 97 } | |
| 98 | |
| 99 and | |
| 100 | |
| 101 print $stream $seq; # when stream is in output mode | |
| 102 | |
| 103 This makes the simplest ever reformatter | |
| 104 | |
| 105 #!/usr/local/bin/perl | |
| 106 | |
| 107 $format1 = shift; | |
| 108 $format2 = shift || die "Usage: reformat format1 format2 < input > output"; | |
| 109 | |
| 110 use Bio::SeqIO; | |
| 111 | |
| 112 $in = Bio::SeqIO->newFh(-format => $format1 ); | |
| 113 $out = Bio::SeqIO->newFh(-format => $format2 ); | |
| 114 #note: you might want to quote -format to keep older perl's from complaining. | |
| 115 | |
| 116 print $out $_ while <$in>; | |
| 117 | |
| 118 | |
| 119 =head1 CONSTRUCTORS | |
| 120 | |
| 121 =head2 Bio::SeqIO-E<gt>new() | |
| 122 | |
| 123 $seqIO = Bio::SeqIO->new(-file => 'filename', -format=>$format); | |
| 124 $seqIO = Bio::SeqIO->new(-fh => \*FILEHANDLE, -format=>$format); | |
| 125 $seqIO = Bio::SeqIO->new(-format => $format); | |
| 126 | |
| 127 The new() class method constructs a new Bio::SeqIO object. The | |
| 128 returned object can be used to retrieve or print Seq objects. new() | |
| 129 accepts the following parameters: | |
| 130 | |
| 131 =over 4 | |
| 132 | |
| 133 =item -file | |
| 134 | |
| 135 A file path to be opened for reading or writing. The usual Perl | |
| 136 conventions apply: | |
| 137 | |
| 138 'file' # open file for reading | |
| 139 '>file' # open file for writing | |
| 140 '>>file' # open file for appending | |
| 141 '+<file' # open file read/write | |
| 142 'command |' # open a pipe from the command | |
| 143 '| command' # open a pipe to the command | |
| 144 | |
| 145 =item -fh | |
| 146 | |
| 147 You may provide new() with a previously-opened filehandle. For | |
| 148 example, to read from STDIN: | |
| 149 | |
| 150 $seqIO = Bio::SeqIO->new(-fh => \*STDIN); | |
| 151 | |
| 152 Note that you must pass filehandles as references to globs. | |
| 153 | |
| 154 If neither a filehandle nor a filename is specified, then the module | |
| 155 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt> | |
| 156 semantics. | |
| 157 | |
| 158 A string filehandle is handy if you want to modify the output in the | |
| 159 memory, before printing it out. The following program reads in EMBL | |
| 160 formatted entries from a file and prints them out in fasta format with | |
| 161 some HTML tags: | |
| 162 | |
| 163 use Bio::SeqIO; | |
| 164 use IO::String; | |
| 165 my $in = Bio::SeqIO->new('-file' => "emblfile" , | |
| 166 '-format' => 'EMBL'); | |
| 167 while ( my $seq = $in->next_seq() ) { | |
| 168 # the output handle is reset for every file | |
| 169 my $stringio = IO::String->new($string); | |
| 170 my $out = Bio::SeqIO->new('-fh' => $stringio, | |
| 171 '-format' => 'fasta'); | |
| 172 # output goes into $string | |
| 173 $out->write_seq($seq); | |
| 174 # modify $string | |
| 175 $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g; | |
| 176 # print into STDOUT | |
| 177 print $string; | |
| 178 } | |
| 179 | |
| 180 =item -format | |
| 181 | |
| 182 Specify the format of the file. Supported formats include: | |
| 183 | |
| 184 Fasta FASTA format | |
| 185 EMBL EMBL format | |
| 186 GenBank GenBank format | |
| 187 swiss Swissprot format | |
| 188 PIR Protein Information Resource format | |
| 189 GCG GCG format | |
| 190 raw Raw format (one sequence per line, no ID) | |
| 191 ace ACeDB sequence format | |
| 192 game GAME XML format | |
| 193 phd phred output | |
| 194 qual Quality values (get a sequence of quality scores) | |
| 195 Fastq Fastq format | |
| 196 SCF SCF tracefile format | |
| 197 ABI ABI tracefile format | |
| 198 ALF ALF tracefile format | |
| 199 CTF CTF tracefile format | |
| 200 ZTR ZTR tracefile format | |
| 201 PLN Staden plain tracefile format | |
| 202 EXP Staden tagged experiment tracefile format | |
| 203 | |
| 204 If no format is specified and a filename is given then the module | |
| 205 will attempt to deduce the format from the filename suffix. If this | |
| 206 is unsuccessful then Fasta format is assumed. | |
| 207 | |
| 208 The format name is case insensitive. 'FASTA', 'Fasta' and 'fasta' are | |
| 209 all valid suffixes. | |
| 210 | |
| 211 Currently, the tracefile formats (except for SCF) require installation | |
| 212 of the external Staden "io_lib" package, as well as the | |
| 213 Bio::SeqIO::staden::read package available from the bioperl-ext | |
| 214 repository. | |
| 215 | |
| 216 =item -flush | |
| 217 | |
| 218 By default, all files (or filehandles) opened for writing sequences | |
| 219 will be flushed after each write_seq() (making the file immediately | |
| 220 usable). If you don't need this facility and would like to marginally | |
| 221 improve the efficiency of writing multiple sequences to the same file | |
| 222 (or filehandle), pass the -flush option '0' or any other value that | |
| 223 evaluates as defined but false: | |
| 224 | |
| 225 my $gb = new Bio::SeqIO -file => "<gball.gbk", | |
| 226 -format => "gb"; | |
| 227 my $fa = new Bio::SeqIO -file => ">gball.fa", | |
| 228 -format => "fasta", | |
| 229 -flush => 0; # go as fast as we can! | |
| 230 while($seq = $gb->next_seq) { $fa->write_seq($seq) } | |
| 231 | |
| 232 | |
| 233 =back | |
| 234 | |
| 235 =head2 Bio::SeqIO-E<gt>newFh() | |
| 236 | |
| 237 $fh = Bio::SeqIO->newFh(-fh => \*FILEHANDLE, -format=>$format); | |
| 238 $fh = Bio::SeqIO->newFh(-format => $format); | |
| 239 # etc. | |
| 240 | |
| 241 This constructor behaves like new(), but returns a tied filehandle | |
| 242 rather than a Bio::SeqIO object. You can read sequences from this | |
| 243 object using the familiar E<lt>E<gt> operator, and write to it using | |
| 244 print(). The usual array and $_ semantics work. For example, you can | |
| 245 read all sequence objects into an array like this: | |
| 246 | |
| 247 @sequences = <$fh>; | |
| 248 | |
| 249 Other operations, such as read(), sysread(), write(), close(), and printf() | |
| 250 are not supported. | |
| 251 | |
| 252 =head1 OBJECT METHODS | |
| 253 | |
| 254 See below for more detailed summaries. The main methods are: | |
| 255 | |
| 256 =head2 $sequence = $seqIO-E<gt>next_seq() | |
| 257 | |
| 258 Fetch the next sequence from the stream. | |
| 259 | |
| 260 =head2 $seqIO-E<gt>write_seq($sequence [,$another_sequence,...]) | |
| 261 | |
| 262 Write the specified sequence(s) to the stream. | |
| 263 | |
| 264 =head2 TIEHANDLE(), READLINE(), PRINT() | |
| 265 | |
| 266 These provide the tie interface. See L<perltie> for more details. | |
| 267 | |
| 268 =head1 FEEDBACK | |
| 269 | |
| 270 =head2 Mailing Lists | |
| 271 | |
| 272 User feedback is an integral part of the evolution of this | |
| 273 and other Bioperl modules. Send your comments and suggestions preferably | |
| 274 to one of the Bioperl mailing lists. | |
| 275 | |
| 276 Your participation is much appreciated. | |
| 277 | |
| 278 bioperl-l@bioperl.org - General discussion | |
| 279 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 280 | |
| 281 =head2 Reporting Bugs | |
| 282 | |
| 283 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 284 the bugs and their resolution. | |
| 285 Bug reports can be submitted via email or the web: | |
| 286 | |
| 287 bioperl-bugs@bioperl.org | |
| 288 http://bugzilla.bioperl.org/ | |
| 289 | |
| 290 =head1 AUTHOR - Ewan Birney, Lincoln Stein | |
| 291 | |
| 292 Email birney@ebi.ac.uk | |
| 293 | |
| 294 =head1 APPENDIX | |
| 295 | |
| 296 The rest of the documentation details each of the object | |
| 297 methods. Internal methods are usually preceded with a _ | |
| 298 | |
| 299 =cut | |
| 300 | |
| 301 #' Let the code begin... | |
| 302 | |
| 303 package Bio::SeqIO; | |
| 304 | |
| 305 use strict; | |
| 306 use vars qw(@ISA); | |
| 307 | |
| 308 use Bio::Root::Root; | |
| 309 use Bio::Root::IO; | |
| 310 use Bio::Factory::SequenceStreamI; | |
| 311 use Bio::Factory::FTLocationFactory; | |
| 312 use Bio::Seq::SeqBuilder; | |
| 313 use Symbol(); | |
| 314 | |
| 315 @ISA = qw(Bio::Root::Root Bio::Root::IO Bio::Factory::SequenceStreamI); | |
| 316 | |
| 317 sub BEGIN { | |
| 318 eval { require Bio::SeqIO::staden::read; }; | |
| 319 } | |
| 320 | |
| 321 my %valid_alphabet_cache; | |
| 322 | |
| 323 =head2 new | |
| 324 | |
| 325 Title : new | |
| 326 Usage : $stream = Bio::SeqIO->new(-file => $filename, -format => 'Format') | |
| 327 Function: Returns a new seqstream | |
| 328 Returns : A Bio::SeqIO stream initialised with the appropriate format | |
| 329 Args : Named parameters: | |
| 330 -file => $filename | |
| 331 -fh => filehandle to attach to | |
| 332 -format => format | |
| 333 | |
| 334 Additional arguments may be used to set factories and | |
| 335 builders involved in the sequence object creation. None of | |
| 336 these must be provided, they all have reasonable defaults. | |
| 337 -seqfactory the L<Bio::Factory::SequenceFactoryI> object | |
| 338 -locfactory the L<Bio::Factory::LocationFactoryI> object | |
| 339 -objbuilder the L<Bio::Factory::ObjectBuilderI> object | |
| 340 | |
| 341 See L<Bio::SeqIO::Handler> | |
| 342 | |
| 343 =cut | |
| 344 | |
| 345 my $entry = 0; | |
| 346 | |
| 347 sub new { | |
| 348 my ($caller,@args) = @_; | |
| 349 my $class = ref($caller) || $caller; | |
| 350 | |
| 351 # or do we want to call SUPER on an object if $caller is an | |
| 352 # object? | |
| 353 if( $class =~ /Bio::SeqIO::(\S+)/ ) { | |
| 354 my ($self) = $class->SUPER::new(@args); | |
| 355 $self->_initialize(@args); | |
| 356 return $self; | |
| 357 } else { | |
| 358 | |
| 359 my %param = @args; | |
| 360 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
| 361 my $format = $param{'-format'} || | |
| 362 $class->_guess_format( $param{-file} || $ARGV[0] ) || | |
| 363 'fasta'; | |
| 364 $format = "\L$format"; # normalize capitalization to lower case | |
| 365 | |
| 366 # normalize capitalization | |
| 367 return undef unless( $class->_load_format_module($format) ); | |
| 368 return "Bio::SeqIO::$format"->new(@args); | |
| 369 } | |
| 370 } | |
| 371 | |
| 372 =head2 newFh | |
| 373 | |
| 374 Title : newFh | |
| 375 Usage : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format') | |
| 376 Function: does a new() followed by an fh() | |
| 377 Example : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format') | |
| 378 $sequence = <$fh>; # read a sequence object | |
| 379 print $fh $sequence; # write a sequence object | |
| 380 Returns : filehandle tied to the Bio::SeqIO::Fh class | |
| 381 Args : | |
| 382 | |
| 383 See L<Bio::SeqIO::Fh> | |
| 384 | |
| 385 =cut | |
| 386 | |
| 387 sub newFh { | |
| 388 my $class = shift; | |
| 389 return unless my $self = $class->new(@_); | |
| 390 return $self->fh; | |
| 391 } | |
| 392 | |
| 393 =head2 fh | |
| 394 | |
| 395 Title : fh | |
| 396 Usage : $obj->fh | |
| 397 Function: | |
| 398 Example : $fh = $obj->fh; # make a tied filehandle | |
| 399 $sequence = <$fh>; # read a sequence object | |
| 400 print $fh $sequence; # write a sequence object | |
| 401 Returns : filehandle tied to Bio::SeqIO class | |
| 402 Args : none | |
| 403 | |
| 404 =cut | |
| 405 | |
| 406 | |
| 407 sub fh { | |
| 408 my $self = shift; | |
| 409 my $class = ref($self) || $self; | |
| 410 my $s = Symbol::gensym; | |
| 411 tie $$s,$class,$self; | |
| 412 return $s; | |
| 413 } | |
| 414 | |
| 415 # _initialize is chained for all SeqIO classes | |
| 416 | |
| 417 sub _initialize { | |
| 418 my($self, @args) = @_; | |
| 419 | |
| 420 # flush is initialized by the Root::IO init | |
| 421 | |
| 422 my ($seqfact,$locfact,$objbuilder) = | |
| 423 $self->_rearrange([qw(SEQFACTORY | |
| 424 LOCFACTORY | |
| 425 OBJBUILDER) | |
| 426 ], @args); | |
| 427 | |
| 428 $locfact = Bio::Factory::FTLocationFactory->new(-verbose => $self->verbose) if ! $locfact; | |
| 429 $objbuilder = Bio::Seq::SeqBuilder->new(-verbose => $self->verbose) unless $objbuilder; | |
| 430 $self->sequence_builder($objbuilder); | |
| 431 $self->location_factory($locfact); | |
| 432 # note that this should come last because it propagates the sequence | |
| 433 # factory to the sequence builder | |
| 434 $seqfact && $self->sequence_factory($seqfact); | |
| 435 | |
| 436 # initialize the IO part | |
| 437 $self->_initialize_io(@args); | |
| 438 } | |
| 439 | |
| 440 =head2 next_seq | |
| 441 | |
| 442 Title : next_seq | |
| 443 Usage : $seq = stream->next_seq | |
| 444 Function: Reads the next sequence object from the stream and returns it. | |
| 445 | |
| 446 Certain driver modules may encounter entries in the stream that | |
| 447 are either misformatted or that use syntax not yet understood | |
| 448 by the driver. If such an incident is recoverable, e.g., by | |
| 449 dismissing a feature of a feature table or some other non-mandatory | |
| 450 part of an entry, the driver will issue a warning. In the case | |
| 451 of a non-recoverable situation an exception will be thrown. | |
| 452 Do not assume that you can resume parsing the same stream after | |
| 453 catching the exception. Note that you can always turn recoverable | |
| 454 errors into exceptions by calling $stream->verbose(2). | |
| 455 Returns : a Bio::Seq sequence object | |
| 456 Args : none | |
| 457 | |
| 458 See L<Bio::Root::RootI>, L<Bio::Factory::SeqStreamI>, L<Bio::Seq> | |
| 459 | |
| 460 =cut | |
| 461 | |
| 462 sub next_seq { | |
| 463 my ($self, $seq) = @_; | |
| 464 $self->throw("Sorry, you cannot read from a generic Bio::SeqIO object."); | |
| 465 } | |
| 466 | |
| 467 =head2 write_seq | |
| 468 | |
| 469 Title : write_seq | |
| 470 Usage : $stream->write_seq($seq) | |
| 471 Function: writes the $seq object into the stream | |
| 472 Returns : 1 for success and 0 for error | |
| 473 Args : Bio::Seq object | |
| 474 | |
| 475 =cut | |
| 476 | |
| 477 sub write_seq { | |
| 478 my ($self, $seq) = @_; | |
| 479 $self->throw("Sorry, you cannot write to a generic Bio::SeqIO object."); | |
| 480 } | |
| 481 | |
| 482 | |
| 483 =head2 alphabet | |
| 484 | |
| 485 Title : alphabet | |
| 486 Usage : $self->alphabet($newval) | |
| 487 Function: Set/get the molecule type for the Seq objects to be created. | |
| 488 Example : $seqio->alphabet('protein') | |
| 489 Returns : value of alphabet: 'dna', 'rna', or 'protein' | |
| 490 Args : newvalue (optional) | |
| 491 Throws : Exception if the argument is not one of 'dna', 'rna', or 'protein' | |
| 492 | |
| 493 =cut | |
| 494 | |
| 495 sub alphabet { | |
| 496 my ($self, $value) = @_; | |
| 497 | |
| 498 if ( defined $value) { | |
| 499 $value = lc $value; | |
| 500 unless ($valid_alphabet_cache{$value}) { | |
| 501 # instead of hard-coding the allowed values once more, we check by | |
| 502 # creating a dummy sequence object | |
| 503 eval { | |
| 504 require Bio::PrimarySeq; | |
| 505 my $seq = Bio::PrimarySeq->new('-verbose' => $self->verbose, | |
| 506 '-alphabet' => $value); | |
| 507 | |
| 508 }; | |
| 509 if ($@) { | |
| 510 $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values."); | |
| 511 } | |
| 512 $valid_alphabet_cache{$value} = 1; | |
| 513 } | |
| 514 $self->{'alphabet'} = $value; | |
| 515 } | |
| 516 return $self->{'alphabet'}; | |
| 517 } | |
| 518 | |
| 519 =head2 _load_format_module | |
| 520 | |
| 521 Title : _load_format_module | |
| 522 Usage : *INTERNAL SeqIO stuff* | |
| 523 Function: Loads up (like use) a module at run time on demand | |
| 524 Example : | |
| 525 Returns : | |
| 526 Args : | |
| 527 | |
| 528 =cut | |
| 529 | |
| 530 sub _load_format_module { | |
| 531 my ($self, $format) = @_; | |
| 532 my $module = "Bio::SeqIO::" . $format; | |
| 533 my $ok; | |
| 534 | |
| 535 eval { | |
| 536 $ok = $self->_load_module($module); | |
| 537 }; | |
| 538 if ( $@ ) { | |
| 539 print STDERR <<END; | |
| 540 $self: $format cannot be found | |
| 541 Exception $@ | |
| 542 For more information about the SeqIO system please see the SeqIO docs. | |
| 543 This includes ways of checking for formats at compile time, not run time | |
| 544 END | |
| 545 ; | |
| 546 } | |
| 547 return $ok; | |
| 548 } | |
| 549 | |
| 550 =head2 _concatenate_lines | |
| 551 | |
| 552 Title : _concatenate_lines | |
| 553 Usage : $s = _concatenate_lines($line, $continuation_line) | |
| 554 Function: Private. Concatenates two strings assuming that the second stems | |
| 555 from a continuation line of the first. Adds a space between both | |
| 556 unless the first ends with a dash. | |
| 557 | |
| 558 Takes care of either arg being empty. | |
| 559 Example : | |
| 560 Returns : A string. | |
| 561 Args : | |
| 562 | |
| 563 =cut | |
| 564 | |
| 565 sub _concatenate_lines { | |
| 566 my ($self, $s1, $s2) = @_; | |
| 567 | |
| 568 $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2); | |
| 569 return ($s1 ? $s1 : "") . ($s2 ? $s2 : ""); | |
| 570 } | |
| 571 | |
| 572 =head2 _filehandle | |
| 573 | |
| 574 Title : _filehandle | |
| 575 Usage : $obj->_filehandle($newval) | |
| 576 Function: This method is deprecated. Call _fh() instead. | |
| 577 Example : | |
| 578 Returns : value of _filehandle | |
| 579 Args : newvalue (optional) | |
| 580 | |
| 581 | |
| 582 =cut | |
| 583 | |
| 584 sub _filehandle { | |
| 585 my ($self,@args) = @_; | |
| 586 return $self->_fh(@args); | |
| 587 } | |
| 588 | |
| 589 =head2 _guess_format | |
| 590 | |
| 591 Title : _guess_format | |
| 592 Usage : $obj->_guess_format($filename) | |
| 593 Function: guess format based on file suffix | |
| 594 Example : | |
| 595 Returns : guessed format of filename (lower case) | |
| 596 Args : | |
| 597 Notes : formats that _filehandle() will guess include fasta, | |
| 598 genbank, scf, pir, embl, raw, gcg, ace, bsml, swissprot, | |
| 599 fastq and phd/phred | |
| 600 | |
| 601 =cut | |
| 602 | |
| 603 sub _guess_format { | |
| 604 my $class = shift; | |
| 605 return unless $_ = shift; | |
| 606 return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i; | |
| 607 return 'genbank' if /\.(gb|gbank|genbank|gbk|gbs)$/i; | |
| 608 return 'scf' if /\.scf$/i; | |
| 609 return 'scf' if /\.scf$/i; | |
| 610 return 'abi' if /\.abi$/i; | |
| 611 return 'alf' if /\.alf$/i; | |
| 612 return 'ctf' if /\.ctf$/i; | |
| 613 return 'ztr' if /\.ztr$/i; | |
| 614 return 'pln' if /\.pln$/i; | |
| 615 return 'exp' if /\.exp$/i; | |
| 616 return 'pir' if /\.pir$/i; | |
| 617 return 'embl' if /\.(embl|ebl|emb|dat)$/i; | |
| 618 return 'raw' if /\.(txt)$/i; | |
| 619 return 'gcg' if /\.gcg$/i; | |
| 620 return 'ace' if /\.ace$/i; | |
| 621 return 'bsml' if /\.(bsm|bsml)$/i; | |
| 622 return 'swiss' if /\.(swiss|sp)$/i; | |
| 623 return 'phd' if /\.(phd|phred)$/i; | |
| 624 return 'fastq' if /\.fastq$/i; | |
| 625 } | |
| 626 | |
| 627 sub DESTROY { | |
| 628 my $self = shift; | |
| 629 | |
| 630 $self->close(); | |
| 631 } | |
| 632 | |
| 633 sub TIEHANDLE { | |
| 634 my ($class,$val) = @_; | |
| 635 return bless {'seqio' => $val}, $class; | |
| 636 } | |
| 637 | |
| 638 sub READLINE { | |
| 639 my $self = shift; | |
| 640 return $self->{'seqio'}->next_seq() unless wantarray; | |
| 641 my (@list, $obj); | |
| 642 push @list, $obj while $obj = $self->{'seqio'}->next_seq(); | |
| 643 return @list; | |
| 644 } | |
| 645 | |
| 646 sub PRINT { | |
| 647 my $self = shift; | |
| 648 $self->{'seqio'}->write_seq(@_); | |
| 649 } | |
| 650 | |
| 651 =head2 sequence_factory | |
| 652 | |
| 653 Title : sequence_factory | |
| 654 Usage : $seqio->sequence_factory($seqfactory) | |
| 655 Function: Get/Set the Bio::Factory::SequenceFactoryI | |
| 656 Returns : Bio::Factory::SequenceFactoryI | |
| 657 Args : [optional] Bio::Factory::SequenceFactoryI | |
| 658 | |
| 659 | |
| 660 =cut | |
| 661 | |
| 662 sub sequence_factory{ | |
| 663 my ($self,$obj) = @_; | |
| 664 if( defined $obj ) { | |
| 665 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) { | |
| 666 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)."::sequence_factory()"); | |
| 667 } | |
| 668 $self->{'_seqio_seqfactory'} = $obj; | |
| 669 my $builder = $self->sequence_builder(); | |
| 670 if($builder && $builder->can('sequence_factory') && | |
| 671 (! $builder->sequence_factory())) { | |
| 672 $builder->sequence_factory($obj); | |
| 673 } | |
| 674 } | |
| 675 $self->{'_seqio_seqfactory'}; | |
| 676 } | |
| 677 | |
| 678 =head2 object_factory | |
| 679 | |
| 680 Title : object_factory | |
| 681 Usage : $obj->object_factory($newval) | |
| 682 Function: This is an alias to sequence_factory with a more generic name. | |
| 683 Example : | |
| 684 Returns : value of object_factory (a scalar) | |
| 685 Args : on set, new value (a scalar or undef, optional) | |
| 686 | |
| 687 | |
| 688 =cut | |
| 689 | |
| 690 sub object_factory{ | |
| 691 return shift->sequence_factory(@_); | |
| 692 } | |
| 693 | |
| 694 =head2 sequence_builder | |
| 695 | |
| 696 Title : sequence_builder | |
| 697 Usage : $seqio->sequence_builder($seqfactory) | |
| 698 Function: Get/Set the L<Bio::Factory::ObjectBuilderI> used to build sequence | |
| 699 objects. | |
| 700 | |
| 701 If you do not set the sequence object builder yourself, it | |
| 702 will in fact be an instance of L<Bio::Seq::SeqBuilder>, and | |
| 703 you may use all methods documented there to configure it. | |
| 704 | |
| 705 Returns : a L<Bio::Factory::ObjectBuilderI> compliant object | |
| 706 Args : [optional] a L<Bio::Factory::ObjectBuilderI> compliant object | |
| 707 | |
| 708 | |
| 709 =cut | |
| 710 | |
| 711 sub sequence_builder{ | |
| 712 my ($self,$obj) = @_; | |
| 713 if( defined $obj ) { | |
| 714 if( ! ref($obj) || ! $obj->isa('Bio::Factory::ObjectBuilderI') ) { | |
| 715 $self->throw("Must provide a valid Bio::Factory::ObjectBuilderI object to ".ref($self)."::sequence_builder()"); | |
| 716 } | |
| 717 $self->{'_object_builder'} = $obj; | |
| 718 } | |
| 719 $self->{'_object_builder'}; | |
| 720 } | |
| 721 | |
| 722 =head2 location_factory | |
| 723 | |
| 724 Title : location_factory | |
| 725 Usage : $seqio->location_factory($locfactory) | |
| 726 Function: Get/Set the Bio::Factory::LocationFactoryI object to be used for | |
| 727 location string parsing | |
| 728 Returns : a L<Bio::Factory::LocationFactoryI> implementing object | |
| 729 Args : [optional] on set, a L<Bio::Factory::LocationFactoryI> implementing | |
| 730 object. | |
| 731 | |
| 732 | |
| 733 =cut | |
| 734 | |
| 735 sub location_factory{ | |
| 736 my ($self,$obj) = @_; | |
| 737 if( defined $obj ) { | |
| 738 if( ! ref($obj) || ! $obj->isa('Bio::Factory::LocationFactoryI') ) { | |
| 739 $self->throw("Must provide a valid Bio::Factory::LocationFactoryI". | |
| 740 " object to ".ref($self)."->location_factory()"); | |
| 741 } | |
| 742 $self->{'_seqio_locfactory'} = $obj; | |
| 743 } | |
| 744 $self->{'_seqio_locfactory'}; | |
| 745 } | |
| 746 | |
| 747 1; | |
| 748 |
