diff variant_effect_predictor/Bio/Structure/IO.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Structure/IO.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,575 @@
+# $Id: IO.pm,v 1.3 2002/10/22 07:45:21 lapp Exp $
+#
+# BioPerl module for Bio::Structure::IO
+#
+# Cared for by Ewan Birney <birney@sanger.ac.uk>
+#       and Lincoln Stein  <lstein@cshl.org>
+#       and Kris Boulez	   <kris.boulez@algonomics.com>
+#
+# Copyright 2001, 2002 Kris Boulez
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# October 18, 1999  Largely rewritten by Lincoln Stein
+# November 16, 2001 Copied Bio::SeqIO to Bio::Structure::IO and modified
+# 			where needed. Factoring out common methods
+# 			(to Bio::Root::IO) might be a good idea.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::IO - Handler for Structure Formats
+
+=head1 SYNOPSIS
+
+    use Bio::Structure::IO;
+
+    $in  = Bio::Structure::IO->new(-file => "inputfilename" , '-format' => 'pdb');
+    $out = Bio::Structure::IO->new(-file => ">outputfilename" , '-format' => 'pdb');
+    # note: we quote -format to keep older perl's from complaining.
+
+    while ( my $struc = $in->next_structure() ) {
+	$out->write_structure($struc);
+    }
+
+now, to actually get at the structure object, use the standard Bio::Structure
+methods (look at L<Bio::Structure> if you don't know what they are)
+
+    use Bio::Structure::IO;
+
+    $in  = Bio::Structure::IO->new(-file => "inputfilename" , '-format' => 'pdb');
+
+    while ( my $struc = $in->next_structure() ) {
+       print "Structure ",$struc->id," number of models: ",scalar $struc->model,"\n";
+    }
+
+
+
+=head1 DESCRIPTION
+
+[ The following description is a copy-paste from the Bio::SeqIO description. 
+  This is not surprising as the code is also mostly a copy. ]
+
+Bio::Structure::IO is a handler module for the formats in the Structure::IO set 
+(eg, Bio::Structure::IO::pdb). It is the officially sanctioned way of getting at
+the format objects, which most people should use.
+
+The Bio::Structure::IO system can be thought of like biological file handles.
+They are attached to filehandles with smart formatting rules (eg, PDB format) 
+and can either read or write structure objects (Bio::Structure objects, or
+more correctly, Bio::Structure::StructureI implementing objects, of which 
+Bio::Structure is one such object). If you want to know what to do with a 
+Bio::Structure object, read L<Bio::Structure>
+
+The idea is that you request a stream object for a particular format.
+All the stream objects have a notion of an internal file that is read
+from or written to. A particular Structure::IO object instance is configured
+for either input or output. A specific example of a stream object is
+the Bio::Structure::IO::pdb object.
+
+Each stream object has functions
+
+   $stream->next_structure();
+
+and
+
+   $stream->write_structure($struc);
+
+also
+
+   $stream->type() # returns 'INPUT' or 'OUTPUT'
+
+As an added bonus, you can recover a filehandle that is tied to the
+Structure::IOIO object, allowing you to use the standard E<lt>E<gt> and print operations
+to read and write structure::IOuence objects:
+
+    use Bio::Structure::IO;
+
+    $stream = Bio::Structure::IO->newFh(-format => 'pdb'); # read from standard input
+
+    while ( $structure = <$stream> ) {
+	# do something with $structure
+    }
+
+and
+
+    print $stream $structure; # when stream is in output mode
+
+
+=head1 CONSTRUCTORS
+
+=head2 Bio::Structure::IO-E<gt>new()
+
+   $stream = Bio::Structure::IO->new(-file => 'filename',   -format=>$format);
+   $stream = Bio::Structure::IO->new(-fh   => \*FILEHANDLE, -format=>$format);
+   $stream = Bio::Structure::IO->new(-format => $format);
+
+The new() class method constructs a new Bio::Structure::IO object. The
+returned object can be used to retrieve or print Bio::Structure objects.
+new() accepts the following parameters:
+
+=over 4
+
+=item -file
+
+A file path to be opened for reading or writing.  The usual Perl
+conventions apply:
+
+   'file'       # open file for reading
+   '>file'      # open file for writing
+   '>>file'     # open file for appending
+   '+<file'     # open file read/write
+   'command |'  # open a pipe from the command
+   '| command'  # open a pipe to the command
+
+=item -fh
+
+You may provide new() with a previously-opened filehandle.  For
+example, to read from STDIN:
+
+   $strucIO = Bio::Structure::IO->new(-fh => \*STDIN);
+
+Note that you must pass filehandles as references to globs.
+
+If neither a filehandle nor a filename is specified, then the module
+will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
+semantics.
+
+A string filehandle is handy if you want to modify the output in the
+memory, before printing it out. The following program reads in EMBL
+formatted entries from a file and prints them out in fasta format with
+some HTML tags:
+[ not relevant for Bio::Structure::IO as only one format is supported
+  at the moment ]
+
+  use Bio::SeqIO;
+  use IO::String;
+  my $in  = Bio::SeqIO->new('-file' => "emblfile" , 
+  			    '-format' => 'EMBL');
+  while ( my $seq = $in->next_seq() ) {
+      # the output handle is reset for every file
+      my $stringio = IO::String->new($string);
+      my $out = Bio::SeqIO->new('-fh' => $stringio,
+  			        '-format' => 'fasta');
+      # output goes into $string
+      $out->write_seq($seq);
+      # modify $string
+      $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g;
+      # print into STDOUT
+      print $string;
+  }
+
+=item -format
+
+Specify the format of the file.  Supported formats include:
+
+   PDB         Protein Data Bank format
+
+If no format is specified and a filename is given, then the module
+will attempt to deduce it from the filename.  If this is unsuccessful,
+PDB format is assumed.
+
+The format name is case insensitive.  'PDB', 'Pdb' and 'pdb' are
+all supported.
+
+=back
+
+=head2 Bio::Structure::IO-E<gt>newFh()
+
+   $fh = Bio::Structure::IO->newFh(-fh   => \*FILEHANDLE, -format=>$format);
+   $fh = Bio::Structure::IO->newFh(-format => $format);
+   # etc.
+
+This constructor behaves like new(), but returns a tied filehandle
+rather than a Bio::Structure::IO object.  You can read structures from this
+object using the familiar E<lt>E<gt> operator, and write to it using
+print().  The usual array and $_ semantics work.  For example, you can
+read all structure objects into an array like this:
+
+  @structures = <$fh>;
+
+Other operations, such as read(), sysread(), write(), close(), and printf() 
+are not supported.
+
+=head1 OBJECT METHODS
+
+See below for more detailed summaries.  The main methods are:
+
+=head2 $structure = $structIO-E<gt>next_structure()
+
+Fetch the next structure from the stream.
+
+=head2 $structIO-E<gt>write_structure($struc [,$another_struc,...])
+
+Write the specified structure(s) to the stream.
+
+=head2 TIEHANDLE(), READLINE(), PRINT()
+
+These provide the tie interface.  See L<perltie> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                  - General discussion
+  http://bioperl.org/MailList.shtml      - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.
+ Bug reports can be submitted via email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Ewan Birney, Lincoln Stein, Kris Boulez
+
+Email birney@ebi.ac.uk, kris.boulez@algonomics
+
+Describe contact details here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Structure::IO;
+
+use strict;
+use vars qw(@ISA);
+
+use Bio::Root::Root;
+use Bio::Root::IO;
+use Bio::PrimarySeq;
+use Symbol();
+
+@ISA = qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::Structure::IO->new(-file => $filename, -format => 'Format')
+ Function: Returns a new structIOstream
+ Returns : A Bio::Structure::IO handler initialised with the appropriate format
+ Args    : -file => $filename
+           -format => format
+           -fh => filehandle to attach to
+
+=cut
+
+my $entry = 0;
+
+sub new {
+    my ($caller,@args) = @_;
+    my $class = ref($caller) || $caller;
+    
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::Structure::IO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);	
+	$self->_initialize(@args);
+	return $self;
+    } else { 
+
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} || 
+	    $class->_guess_format( $param{-file} || $ARGV[0] ) ||
+		'pdb';
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	# normalize capitalization
+	return undef unless( &_load_format_module($format) );
+	return "Bio::Structure::IO::$format"->new(@args);
+    }
+}
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
+           $structure = <$fh>;   # read a structure object
+           print $fh $structure; # write a structure object
+ Returns : filehandle tied to the Bio::Structure::IO::Fh class
+ Args    :
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $structure = <$fh>;   # read a structure object
+           print $fh $structure; # write a structure object
+ Returns : filehandle tied to the Bio::Structure::IO::Fh class
+ Args    :
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+
+# _initialize is chained for all SeqIO classes
+
+sub _initialize {
+    my($self, @args) = @_;
+    
+    # not really necessary unless we put more in RootI
+    $self->SUPER::_initialize(@args);
+    
+    # initialize the IO part
+    $self->_initialize_io(@args);
+}
+
+=head2 next_structure
+
+ Title   : next_structure
+ Usage   : $structure = stream->next_structure
+ Function: Reads the next structure object from the stream and returns it.
+
+           Certain driver modules may encounter entries in the stream that
+           are either misformatted or that use syntax not yet understood
+           by the driver. If such an incident is recoverable, e.g., by
+           dismissing a feature of a feature table or some other non-mandatory
+           part of an entry, the driver will issue a warning. In the case
+           of a non-recoverable situation an exception will be thrown.
+           Do not assume that you can resume parsing the same stream after
+           catching the exception. Note that you can always turn recoverable
+           errors into exceptions by calling $stream->verbose(2) (see
+           Bio::RootI POD page).
+ Returns : a Bio::Structure structure object
+ Args    : none
+
+=cut
+
+sub next_structure {
+   my ($self, $struc) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::Structure::IO object.");
+}
+
+# Do we want people to read out the sequence directly from a $structIO stream
+# 
+##=head2 next_primary_seq
+##
+## Title   : next_primary_seq
+## Usage   : $seq = $stream->next_primary_seq
+## Function: Provides a primaryseq type of sequence object
+## Returns : A Bio::PrimarySeqI object
+## Args    : none
+##
+##
+##=cut
+##
+##sub next_primary_seq {
+##   my ($self) = @_;
+##
+##   # in this case, we default to next_seq. This is because
+##   # Bio::Seq's are Bio::PrimarySeqI objects. However we
+##   # expect certain sub classes to override this method to provide
+##   # less parsing heavy methods to retrieving the objects
+##
+##   return $self->next_seq();
+##}
+
+=head2 write_structure
+
+ Title   : write_structure
+ Usage   : $stream->write_structure($structure)
+ Function: writes the $structure object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Structure object
+
+=cut
+
+sub write_seq {
+    my ($self, $struc) = @_;
+    $self->throw("Sorry, you cannot write to a generic Bio::Structure::IO object.");
+}
+
+
+# De we need this here
+#
+##=head2 alphabet
+##
+## Title   : alphabet
+## Usage   : $self->alphabet($newval)
+## Function: Set/get the molecule type for the Seq objects to be created.
+## Example : $seqio->alphabet('protein')
+## Returns : value of alphabet: 'dna', 'rna', or 'protein'
+## Args    : newvalue (optional)
+## Throws  : Exception if the argument is not one of 'dna', 'rna', or 'protein'
+##
+##=cut
+##
+##sub alphabet {
+##   my ($self, $value) = @_;
+##
+##   if ( defined $value) {
+##       # instead of hard-coding the allowed values once more, we check by
+##       # creating a dummy sequence object
+##       eval {
+##	   my $seq = Bio::PrimarySeq->new('-alphabet' => $value);
+##       };
+##       if($@) {
+##	   $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
+##       }
+##       $self->{'alphabet'} = "\L$value";
+##   }
+##   return $self->{'alphabet'};
+##}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL Structure::IO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+  my ($format) = @_;
+  my ($module, $load, $m);
+
+  $module = "_<Bio/Structure/IO/$format.pm";
+  $load = "Bio/Structure/IO/$format.pm";
+
+  return 1 if $main::{$module};
+  eval {
+    require $load;
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$load: $format cannot be found
+Exception $@
+For more information about the Structure::IO system please see the 
+Bio::Structure::IO docs.  This includes ways of checking for formats at 
+compile time, not run time
+END
+  ;
+    return;
+  }
+  return 1;
+}
+
+=head2 _concatenate_lines
+
+ Title   : _concatenate_lines
+ Usage   : $s = _concatenate_lines($line, $continuation_line)
+ Function: Private. Concatenates two strings assuming that the second stems
+           from a continuation line of the first. Adds a space between both
+           unless the first ends with a dash.
+
+           Takes care of either arg being empty.
+ Example :
+ Returns : A string.
+ Args    :
+
+=cut
+
+sub _concatenate_lines {
+    my ($self, $s1, $s2) = @_;
+    $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2);
+    return ($s1 ? $s1 : "") . ($s2 ? $s2 : "");
+}
+
+=head2 _filehandle
+
+ Title   : _filehandle
+ Usage   : $obj->_filehandle($newval)
+ Function: This method is deprecated. Call _fh() instead.
+ Example :
+ Returns : value of _filehandle
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _filehandle {
+    my ($self,@args) = @_;
+    return $self->_fh(@args);
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'fasta'   if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
+   return 'genbank' if /\.(gb|gbank|genbank)$/i;
+   return 'scf'     if /\.scf$/i;
+   return 'pir'     if /\.pir$/i;
+   return 'embl'    if /\.(embl|ebl|emb|dat)$/i;
+   return 'raw'     if /\.(txt)$/i;
+   return 'gcg'     if /\.gcg$/i;
+   return 'ace'     if /\.ace$/i;
+   return 'bsml'    if /\.(bsm|bsml)$/i;
+   return 'pdb'     if /\.(ent|pdb)$/i;
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'structio' => $val}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'structio'}->next_seq() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'structio'}->next_seq();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'structio'}->write_seq(@_);
+}
+
+1;
+