Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Structure/IO.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm | 
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:1f6dce3d34e0 | 
|---|---|
| 1 # $Id: IO.pm,v 1.3 2002/10/22 07:45:21 lapp Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Structure::IO | |
| 4 # | |
| 5 # Cared for by Ewan Birney <birney@sanger.ac.uk> | |
| 6 # and Lincoln Stein <lstein@cshl.org> | |
| 7 # and Kris Boulez <kris.boulez@algonomics.com> | |
| 8 # | |
| 9 # Copyright 2001, 2002 Kris Boulez | |
| 10 # | |
| 11 # You may distribute this module under the same terms as perl itself | |
| 12 # | |
| 13 # _history | |
| 14 # October 18, 1999 Largely rewritten by Lincoln Stein | |
| 15 # November 16, 2001 Copied Bio::SeqIO to Bio::Structure::IO and modified | |
| 16 # where needed. Factoring out common methods | |
| 17 # (to Bio::Root::IO) might be a good idea. | |
| 18 | |
| 19 # POD documentation - main docs before the code | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::Structure::IO - Handler for Structure Formats | |
| 24 | |
| 25 =head1 SYNOPSIS | |
| 26 | |
| 27 use Bio::Structure::IO; | |
| 28 | |
| 29 $in = Bio::Structure::IO->new(-file => "inputfilename" , '-format' => 'pdb'); | |
| 30 $out = Bio::Structure::IO->new(-file => ">outputfilename" , '-format' => 'pdb'); | |
| 31 # note: we quote -format to keep older perl's from complaining. | |
| 32 | |
| 33 while ( my $struc = $in->next_structure() ) { | |
| 34 $out->write_structure($struc); | |
| 35 } | |
| 36 | |
| 37 now, to actually get at the structure object, use the standard Bio::Structure | |
| 38 methods (look at L<Bio::Structure> if you don't know what they are) | |
| 39 | |
| 40 use Bio::Structure::IO; | |
| 41 | |
| 42 $in = Bio::Structure::IO->new(-file => "inputfilename" , '-format' => 'pdb'); | |
| 43 | |
| 44 while ( my $struc = $in->next_structure() ) { | |
| 45 print "Structure ",$struc->id," number of models: ",scalar $struc->model,"\n"; | |
| 46 } | |
| 47 | |
| 48 | |
| 49 | |
| 50 =head1 DESCRIPTION | |
| 51 | |
| 52 [ The following description is a copy-paste from the Bio::SeqIO description. | |
| 53 This is not surprising as the code is also mostly a copy. ] | |
| 54 | |
| 55 Bio::Structure::IO is a handler module for the formats in the Structure::IO set | |
| 56 (eg, Bio::Structure::IO::pdb). It is the officially sanctioned way of getting at | |
| 57 the format objects, which most people should use. | |
| 58 | |
| 59 The Bio::Structure::IO system can be thought of like biological file handles. | |
| 60 They are attached to filehandles with smart formatting rules (eg, PDB format) | |
| 61 and can either read or write structure objects (Bio::Structure objects, or | |
| 62 more correctly, Bio::Structure::StructureI implementing objects, of which | |
| 63 Bio::Structure is one such object). If you want to know what to do with a | |
| 64 Bio::Structure object, read L<Bio::Structure> | |
| 65 | |
| 66 The idea is that you request a stream object for a particular format. | |
| 67 All the stream objects have a notion of an internal file that is read | |
| 68 from or written to. A particular Structure::IO object instance is configured | |
| 69 for either input or output. A specific example of a stream object is | |
| 70 the Bio::Structure::IO::pdb object. | |
| 71 | |
| 72 Each stream object has functions | |
| 73 | |
| 74 $stream->next_structure(); | |
| 75 | |
| 76 and | |
| 77 | |
| 78 $stream->write_structure($struc); | |
| 79 | |
| 80 also | |
| 81 | |
| 82 $stream->type() # returns 'INPUT' or 'OUTPUT' | |
| 83 | |
| 84 As an added bonus, you can recover a filehandle that is tied to the | |
| 85 Structure::IOIO object, allowing you to use the standard E<lt>E<gt> and print operations | |
| 86 to read and write structure::IOuence objects: | |
| 87 | |
| 88 use Bio::Structure::IO; | |
| 89 | |
| 90 $stream = Bio::Structure::IO->newFh(-format => 'pdb'); # read from standard input | |
| 91 | |
| 92 while ( $structure = <$stream> ) { | |
| 93 # do something with $structure | |
| 94 } | |
| 95 | |
| 96 and | |
| 97 | |
| 98 print $stream $structure; # when stream is in output mode | |
| 99 | |
| 100 | |
| 101 =head1 CONSTRUCTORS | |
| 102 | |
| 103 =head2 Bio::Structure::IO-E<gt>new() | |
| 104 | |
| 105 $stream = Bio::Structure::IO->new(-file => 'filename', -format=>$format); | |
| 106 $stream = Bio::Structure::IO->new(-fh => \*FILEHANDLE, -format=>$format); | |
| 107 $stream = Bio::Structure::IO->new(-format => $format); | |
| 108 | |
| 109 The new() class method constructs a new Bio::Structure::IO object. The | |
| 110 returned object can be used to retrieve or print Bio::Structure objects. | |
| 111 new() accepts the following parameters: | |
| 112 | |
| 113 =over 4 | |
| 114 | |
| 115 =item -file | |
| 116 | |
| 117 A file path to be opened for reading or writing. The usual Perl | |
| 118 conventions apply: | |
| 119 | |
| 120 'file' # open file for reading | |
| 121 '>file' # open file for writing | |
| 122 '>>file' # open file for appending | |
| 123 '+<file' # open file read/write | |
| 124 'command |' # open a pipe from the command | |
| 125 '| command' # open a pipe to the command | |
| 126 | |
| 127 =item -fh | |
| 128 | |
| 129 You may provide new() with a previously-opened filehandle. For | |
| 130 example, to read from STDIN: | |
| 131 | |
| 132 $strucIO = Bio::Structure::IO->new(-fh => \*STDIN); | |
| 133 | |
| 134 Note that you must pass filehandles as references to globs. | |
| 135 | |
| 136 If neither a filehandle nor a filename is specified, then the module | |
| 137 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt> | |
| 138 semantics. | |
| 139 | |
| 140 A string filehandle is handy if you want to modify the output in the | |
| 141 memory, before printing it out. The following program reads in EMBL | |
| 142 formatted entries from a file and prints them out in fasta format with | |
| 143 some HTML tags: | |
| 144 [ not relevant for Bio::Structure::IO as only one format is supported | |
| 145 at the moment ] | |
| 146 | |
| 147 use Bio::SeqIO; | |
| 148 use IO::String; | |
| 149 my $in = Bio::SeqIO->new('-file' => "emblfile" , | |
| 150 '-format' => 'EMBL'); | |
| 151 while ( my $seq = $in->next_seq() ) { | |
| 152 # the output handle is reset for every file | |
| 153 my $stringio = IO::String->new($string); | |
| 154 my $out = Bio::SeqIO->new('-fh' => $stringio, | |
| 155 '-format' => 'fasta'); | |
| 156 # output goes into $string | |
| 157 $out->write_seq($seq); | |
| 158 # modify $string | |
| 159 $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g; | |
| 160 # print into STDOUT | |
| 161 print $string; | |
| 162 } | |
| 163 | |
| 164 =item -format | |
| 165 | |
| 166 Specify the format of the file. Supported formats include: | |
| 167 | |
| 168 PDB Protein Data Bank format | |
| 169 | |
| 170 If no format is specified and a filename is given, then the module | |
| 171 will attempt to deduce it from the filename. If this is unsuccessful, | |
| 172 PDB format is assumed. | |
| 173 | |
| 174 The format name is case insensitive. 'PDB', 'Pdb' and 'pdb' are | |
| 175 all supported. | |
| 176 | |
| 177 =back | |
| 178 | |
| 179 =head2 Bio::Structure::IO-E<gt>newFh() | |
| 180 | |
| 181 $fh = Bio::Structure::IO->newFh(-fh => \*FILEHANDLE, -format=>$format); | |
| 182 $fh = Bio::Structure::IO->newFh(-format => $format); | |
| 183 # etc. | |
| 184 | |
| 185 This constructor behaves like new(), but returns a tied filehandle | |
| 186 rather than a Bio::Structure::IO object. You can read structures from this | |
| 187 object using the familiar E<lt>E<gt> operator, and write to it using | |
| 188 print(). The usual array and $_ semantics work. For example, you can | |
| 189 read all structure objects into an array like this: | |
| 190 | |
| 191 @structures = <$fh>; | |
| 192 | |
| 193 Other operations, such as read(), sysread(), write(), close(), and printf() | |
| 194 are not supported. | |
| 195 | |
| 196 =head1 OBJECT METHODS | |
| 197 | |
| 198 See below for more detailed summaries. The main methods are: | |
| 199 | |
| 200 =head2 $structure = $structIO-E<gt>next_structure() | |
| 201 | |
| 202 Fetch the next structure from the stream. | |
| 203 | |
| 204 =head2 $structIO-E<gt>write_structure($struc [,$another_struc,...]) | |
| 205 | |
| 206 Write the specified structure(s) to the stream. | |
| 207 | |
| 208 =head2 TIEHANDLE(), READLINE(), PRINT() | |
| 209 | |
| 210 These provide the tie interface. See L<perltie> for more details. | |
| 211 | |
| 212 =head1 FEEDBACK | |
| 213 | |
| 214 =head2 Mailing Lists | |
| 215 | |
| 216 User feedback is an integral part of the evolution of this | |
| 217 and other Bioperl modules. Send your comments and suggestions preferably | |
| 218 to one of the Bioperl mailing lists. | |
| 219 Your participation is much appreciated. | |
| 220 | |
| 221 bioperl-l@bioperl.org - General discussion | |
| 222 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 223 | |
| 224 =head2 Reporting Bugs | |
| 225 | |
| 226 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 227 the bugs and their resolution. | |
| 228 Bug reports can be submitted via email or the web: | |
| 229 | |
| 230 bioperl-bugs@bioperl.org | |
| 231 http://bugzilla.bioperl.org/ | |
| 232 | |
| 233 =head1 AUTHOR - Ewan Birney, Lincoln Stein, Kris Boulez | |
| 234 | |
| 235 Email birney@ebi.ac.uk, kris.boulez@algonomics | |
| 236 | |
| 237 Describe contact details here | |
| 238 | |
| 239 =head1 APPENDIX | |
| 240 | |
| 241 The rest of the documentation details each of the object | |
| 242 methods. Internal methods are usually preceded with a _ | |
| 243 | |
| 244 =cut | |
| 245 | |
| 246 # Let the code begin... | |
| 247 | |
| 248 package Bio::Structure::IO; | |
| 249 | |
| 250 use strict; | |
| 251 use vars qw(@ISA); | |
| 252 | |
| 253 use Bio::Root::Root; | |
| 254 use Bio::Root::IO; | |
| 255 use Bio::PrimarySeq; | |
| 256 use Symbol(); | |
| 257 | |
| 258 @ISA = qw(Bio::Root::Root Bio::Root::IO); | |
| 259 | |
| 260 =head2 new | |
| 261 | |
| 262 Title : new | |
| 263 Usage : $stream = Bio::Structure::IO->new(-file => $filename, -format => 'Format') | |
| 264 Function: Returns a new structIOstream | |
| 265 Returns : A Bio::Structure::IO handler initialised with the appropriate format | |
| 266 Args : -file => $filename | |
| 267 -format => format | |
| 268 -fh => filehandle to attach to | |
| 269 | |
| 270 =cut | |
| 271 | |
| 272 my $entry = 0; | |
| 273 | |
| 274 sub new { | |
| 275 my ($caller,@args) = @_; | |
| 276 my $class = ref($caller) || $caller; | |
| 277 | |
| 278 # or do we want to call SUPER on an object if $caller is an | |
| 279 # object? | |
| 280 if( $class =~ /Bio::Structure::IO::(\S+)/ ) { | |
| 281 my ($self) = $class->SUPER::new(@args); | |
| 282 $self->_initialize(@args); | |
| 283 return $self; | |
| 284 } else { | |
| 285 | |
| 286 my %param = @args; | |
| 287 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
| 288 my $format = $param{'-format'} || | |
| 289 $class->_guess_format( $param{-file} || $ARGV[0] ) || | |
| 290 'pdb'; | |
| 291 $format = "\L$format"; # normalize capitalization to lower case | |
| 292 | |
| 293 # normalize capitalization | |
| 294 return undef unless( &_load_format_module($format) ); | |
| 295 return "Bio::Structure::IO::$format"->new(@args); | |
| 296 } | |
| 297 } | |
| 298 | |
| 299 =head2 newFh | |
| 300 | |
| 301 Title : newFh | |
| 302 Usage : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format') | |
| 303 Function: does a new() followed by an fh() | |
| 304 Example : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format') | |
| 305 $structure = <$fh>; # read a structure object | |
| 306 print $fh $structure; # write a structure object | |
| 307 Returns : filehandle tied to the Bio::Structure::IO::Fh class | |
| 308 Args : | |
| 309 | |
| 310 =cut | |
| 311 | |
| 312 sub newFh { | |
| 313 my $class = shift; | |
| 314 return unless my $self = $class->new(@_); | |
| 315 return $self->fh; | |
| 316 } | |
| 317 | |
| 318 =head2 fh | |
| 319 | |
| 320 Title : fh | |
| 321 Usage : $obj->fh | |
| 322 Function: | |
| 323 Example : $fh = $obj->fh; # make a tied filehandle | |
| 324 $structure = <$fh>; # read a structure object | |
| 325 print $fh $structure; # write a structure object | |
| 326 Returns : filehandle tied to the Bio::Structure::IO::Fh class | |
| 327 Args : | |
| 328 | |
| 329 =cut | |
| 330 | |
| 331 | |
| 332 sub fh { | |
| 333 my $self = shift; | |
| 334 my $class = ref($self) || $self; | |
| 335 my $s = Symbol::gensym; | |
| 336 tie $$s,$class,$self; | |
| 337 return $s; | |
| 338 } | |
| 339 | |
| 340 | |
| 341 # _initialize is chained for all SeqIO classes | |
| 342 | |
| 343 sub _initialize { | |
| 344 my($self, @args) = @_; | |
| 345 | |
| 346 # not really necessary unless we put more in RootI | |
| 347 $self->SUPER::_initialize(@args); | |
| 348 | |
| 349 # initialize the IO part | |
| 350 $self->_initialize_io(@args); | |
| 351 } | |
| 352 | |
| 353 =head2 next_structure | |
| 354 | |
| 355 Title : next_structure | |
| 356 Usage : $structure = stream->next_structure | |
| 357 Function: Reads the next structure object from the stream and returns it. | |
| 358 | |
| 359 Certain driver modules may encounter entries in the stream that | |
| 360 are either misformatted or that use syntax not yet understood | |
| 361 by the driver. If such an incident is recoverable, e.g., by | |
| 362 dismissing a feature of a feature table or some other non-mandatory | |
| 363 part of an entry, the driver will issue a warning. In the case | |
| 364 of a non-recoverable situation an exception will be thrown. | |
| 365 Do not assume that you can resume parsing the same stream after | |
| 366 catching the exception. Note that you can always turn recoverable | |
| 367 errors into exceptions by calling $stream->verbose(2) (see | |
| 368 Bio::RootI POD page). | |
| 369 Returns : a Bio::Structure structure object | |
| 370 Args : none | |
| 371 | |
| 372 =cut | |
| 373 | |
| 374 sub next_structure { | |
| 375 my ($self, $struc) = @_; | |
| 376 $self->throw("Sorry, you cannot read from a generic Bio::Structure::IO object."); | |
| 377 } | |
| 378 | |
| 379 # Do we want people to read out the sequence directly from a $structIO stream | |
| 380 # | |
| 381 ##=head2 next_primary_seq | |
| 382 ## | |
| 383 ## Title : next_primary_seq | |
| 384 ## Usage : $seq = $stream->next_primary_seq | |
| 385 ## Function: Provides a primaryseq type of sequence object | |
| 386 ## Returns : A Bio::PrimarySeqI object | |
| 387 ## Args : none | |
| 388 ## | |
| 389 ## | |
| 390 ##=cut | |
| 391 ## | |
| 392 ##sub next_primary_seq { | |
| 393 ## my ($self) = @_; | |
| 394 ## | |
| 395 ## # in this case, we default to next_seq. This is because | |
| 396 ## # Bio::Seq's are Bio::PrimarySeqI objects. However we | |
| 397 ## # expect certain sub classes to override this method to provide | |
| 398 ## # less parsing heavy methods to retrieving the objects | |
| 399 ## | |
| 400 ## return $self->next_seq(); | |
| 401 ##} | |
| 402 | |
| 403 =head2 write_structure | |
| 404 | |
| 405 Title : write_structure | |
| 406 Usage : $stream->write_structure($structure) | |
| 407 Function: writes the $structure object into the stream | |
| 408 Returns : 1 for success and 0 for error | |
| 409 Args : Bio::Structure object | |
| 410 | |
| 411 =cut | |
| 412 | |
| 413 sub write_seq { | |
| 414 my ($self, $struc) = @_; | |
| 415 $self->throw("Sorry, you cannot write to a generic Bio::Structure::IO object."); | |
| 416 } | |
| 417 | |
| 418 | |
| 419 # De we need this here | |
| 420 # | |
| 421 ##=head2 alphabet | |
| 422 ## | |
| 423 ## Title : alphabet | |
| 424 ## Usage : $self->alphabet($newval) | |
| 425 ## Function: Set/get the molecule type for the Seq objects to be created. | |
| 426 ## Example : $seqio->alphabet('protein') | |
| 427 ## Returns : value of alphabet: 'dna', 'rna', or 'protein' | |
| 428 ## Args : newvalue (optional) | |
| 429 ## Throws : Exception if the argument is not one of 'dna', 'rna', or 'protein' | |
| 430 ## | |
| 431 ##=cut | |
| 432 ## | |
| 433 ##sub alphabet { | |
| 434 ## my ($self, $value) = @_; | |
| 435 ## | |
| 436 ## if ( defined $value) { | |
| 437 ## # instead of hard-coding the allowed values once more, we check by | |
| 438 ## # creating a dummy sequence object | |
| 439 ## eval { | |
| 440 ## my $seq = Bio::PrimarySeq->new('-alphabet' => $value); | |
| 441 ## }; | |
| 442 ## if($@) { | |
| 443 ## $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values."); | |
| 444 ## } | |
| 445 ## $self->{'alphabet'} = "\L$value"; | |
| 446 ## } | |
| 447 ## return $self->{'alphabet'}; | |
| 448 ##} | |
| 449 | |
| 450 =head2 _load_format_module | |
| 451 | |
| 452 Title : _load_format_module | |
| 453 Usage : *INTERNAL Structure::IO stuff* | |
| 454 Function: Loads up (like use) a module at run time on demand | |
| 455 Example : | |
| 456 Returns : | |
| 457 Args : | |
| 458 | |
| 459 =cut | |
| 460 | |
| 461 sub _load_format_module { | |
| 462 my ($format) = @_; | |
| 463 my ($module, $load, $m); | |
| 464 | |
| 465 $module = "_<Bio/Structure/IO/$format.pm"; | |
| 466 $load = "Bio/Structure/IO/$format.pm"; | |
| 467 | |
| 468 return 1 if $main::{$module}; | |
| 469 eval { | |
| 470 require $load; | |
| 471 }; | |
| 472 if ( $@ ) { | |
| 473 print STDERR <<END; | |
| 474 $load: $format cannot be found | |
| 475 Exception $@ | |
| 476 For more information about the Structure::IO system please see the | |
| 477 Bio::Structure::IO docs. This includes ways of checking for formats at | |
| 478 compile time, not run time | |
| 479 END | |
| 480 ; | |
| 481 return; | |
| 482 } | |
| 483 return 1; | |
| 484 } | |
| 485 | |
| 486 =head2 _concatenate_lines | |
| 487 | |
| 488 Title : _concatenate_lines | |
| 489 Usage : $s = _concatenate_lines($line, $continuation_line) | |
| 490 Function: Private. Concatenates two strings assuming that the second stems | |
| 491 from a continuation line of the first. Adds a space between both | |
| 492 unless the first ends with a dash. | |
| 493 | |
| 494 Takes care of either arg being empty. | |
| 495 Example : | |
| 496 Returns : A string. | |
| 497 Args : | |
| 498 | |
| 499 =cut | |
| 500 | |
| 501 sub _concatenate_lines { | |
| 502 my ($self, $s1, $s2) = @_; | |
| 503 $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2); | |
| 504 return ($s1 ? $s1 : "") . ($s2 ? $s2 : ""); | |
| 505 } | |
| 506 | |
| 507 =head2 _filehandle | |
| 508 | |
| 509 Title : _filehandle | |
| 510 Usage : $obj->_filehandle($newval) | |
| 511 Function: This method is deprecated. Call _fh() instead. | |
| 512 Example : | |
| 513 Returns : value of _filehandle | |
| 514 Args : newvalue (optional) | |
| 515 | |
| 516 | |
| 517 =cut | |
| 518 | |
| 519 sub _filehandle { | |
| 520 my ($self,@args) = @_; | |
| 521 return $self->_fh(@args); | |
| 522 } | |
| 523 | |
| 524 =head2 _guess_format | |
| 525 | |
| 526 Title : _guess_format | |
| 527 Usage : $obj->_guess_format($filename) | |
| 528 Function: | |
| 529 Example : | |
| 530 Returns : guessed format of filename (lower case) | |
| 531 Args : | |
| 532 | |
| 533 =cut | |
| 534 | |
| 535 sub _guess_format { | |
| 536 my $class = shift; | |
| 537 return unless $_ = shift; | |
| 538 return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i; | |
| 539 return 'genbank' if /\.(gb|gbank|genbank)$/i; | |
| 540 return 'scf' if /\.scf$/i; | |
| 541 return 'pir' if /\.pir$/i; | |
| 542 return 'embl' if /\.(embl|ebl|emb|dat)$/i; | |
| 543 return 'raw' if /\.(txt)$/i; | |
| 544 return 'gcg' if /\.gcg$/i; | |
| 545 return 'ace' if /\.ace$/i; | |
| 546 return 'bsml' if /\.(bsm|bsml)$/i; | |
| 547 return 'pdb' if /\.(ent|pdb)$/i; | |
| 548 } | |
| 549 | |
| 550 sub DESTROY { | |
| 551 my $self = shift; | |
| 552 | |
| 553 $self->close(); | |
| 554 } | |
| 555 | |
| 556 sub TIEHANDLE { | |
| 557 my ($class,$val) = @_; | |
| 558 return bless {'structio' => $val}, $class; | |
| 559 } | |
| 560 | |
| 561 sub READLINE { | |
| 562 my $self = shift; | |
| 563 return $self->{'structio'}->next_seq() unless wantarray; | |
| 564 my (@list, $obj); | |
| 565 push @list, $obj while $obj = $self->{'structio'}->next_seq(); | |
| 566 return @list; | |
| 567 } | |
| 568 | |
| 569 sub PRINT { | |
| 570 my $self = shift; | |
| 571 $self->{'structio'}->write_seq(@_); | |
| 572 } | |
| 573 | |
| 574 1; | |
| 575 | 
