diff variant_effect_predictor/Bio/ClusterIO.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/ClusterIO.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,319 @@
+# $Id: ClusterIO.pm,v 1.11.2.1 2003/01/21 01:11:17 jason Exp $
+#
+# BioPerl module for Bio::ClusterIO.pm
+#
+# Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
+#
+# Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
+# Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
+# http://anatomy.otago.ac.nz/meg
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+#
+# May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
+# by Andrew Macgregor
+#
+# April 17, 2002 - Initial implementation by Andrew Macgregor
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::ClusterIO - Handler for Cluster Formats
+
+=head1 SYNOPSIS
+
+  #NB: This example is unigene specific
+
+  use Bio::ClusterIO;
+
+  $stream  = Bio::ClusterIO->new('-file' => "Hs.data", 
+                                 '-format' => "unigene");
+  # note: we quote -format to keep older perl's from complaining.
+
+  while ( my $in = $stream->next_cluster() ) {
+      print $in->unigene_id() . "\n";
+      while ( my $sequence = $in->next_seq() ) {
+          print $sequence->accession_number() . "\n";
+      }
+  }
+  # Parsing errors are printed to STDERR.
+
+=head1 DESCRIPTION
+
+The ClusterIO module works with the ClusterIO format module to read
+various cluster formats such as NCBI UniGene.
+
+
+=head1 CONSTRUCTORS
+
+=head2 Bio::ClusterIO-E<gt>new()
+
+   $str = Bio::ClusterIO->new(-file => 'filename',
+                              -format=>$format);
+
+The new() class method constructs a new Bio::ClusterIO object.  The
+returned object can be used to retrieve or print cluster
+objects. new() accepts the following parameters:
+
+=over 4
+
+=item -file
+
+A file path to be opened for reading.
+
+=item -format
+
+Specify the format of the file.  Supported formats include:
+
+   unigene		*.data	UniGene build files.
+   dbsnp		*.xml	dbSNP XML files
+
+If no format is specified and a filename is given, then the module
+will attempt to deduce it from the filename.  If this is unsuccessful,
+the main UniGene build format is assumed.
+
+The format name is case insensitive.  'UNIGENE', 'UniGene' and
+'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
+
+=back
+
+=head1 OBJECT METHODS
+
+See below for more detailed summaries.  The main methods are:
+
+=head2 $cluster = $str-E<gt>next_cluster()
+
+Fetch the next cluster from the stream.
+
+
+=head2 TIEHANDLE(), READLINE(), PRINT()
+
+These I've left in here because they were in the SeqIO
+module. Feedback appreciated. There they provide the tie interface.
+See L<perltie> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                  - General discussion
+  http://bioperl.org/MailList.shtml      - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.
+ Bug reports can be submitted via email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Andrew Macgregor
+
+Email andrew@anatomy.otago.ac.nz
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::ClusterIO;
+
+use strict;
+use vars qw(@ISA);
+
+use Bio::Root::Root;
+use Bio::Root::IO;
+
+@ISA = qw(Bio::Root::Root Bio::Root::IO);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : Bio::ClusterIO->new(-file => $filename, -format => 'format')
+ Function: Returns a new cluster stream
+ Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
+ Args    : -file => $filename
+           -format => format
+
+=cut
+
+
+my $entry = 0;
+
+sub new {
+    my ($caller,@args) = @_;
+    my $class = ref($caller) || $caller;
+    
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);	
+	$self->_initialize(@args);
+	return $self;
+    } else { 
+
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} || 
+	    $class->_guess_format( $param{-file} || $ARGV[0] );
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	return undef unless( $class->_load_format_module($format) );
+	return "Bio::ClusterIO::$format"->new(@args);
+    }
+}
+
+
+# _initialize is chained for all ClusterIO classes
+
+sub _initialize {
+    my($self, @args) = @_;
+    # initialize the IO part
+    $self->_initialize_io(@args);
+}
+
+=head2 next_cluster
+
+ Title   : next_cluster
+ Usage   : $cluster = $stream->next_cluster()
+ Function: Reads the next cluster object from the stream and returns it.
+ Returns : a L<Bio::ClusterI> compliant object
+ Args    : none
+
+
+=cut
+
+sub next_cluster {
+   my ($self, $seq) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
+}
+
+=head2 cluster_factory
+
+ Title   : cluster_factory
+ Usage   : $obj->cluster_factory($newval)
+ Function: Get/set the object factory to use for creating the cluster
+           objects.
+ Example : 
+ Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
+ Args    : on set, new value (a L<Bio::Factory::ObjectFactoryI> 
+           compliant object or undef, optional)
+
+
+=cut
+
+sub cluster_factory{
+    my $self = shift;
+
+    return $self->{'cluster_factory'} = shift if @_;
+    return $self->{'cluster_factory'};
+}
+
+=head2 object_factory
+
+ Title   : object_factory
+ Usage   : $obj->object_factory($newval)
+ Function: This is an alias to cluster_factory with a more generic name.
+ Example : 
+ Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
+ Args    : on set, new value (a L<Bio::Factory::ObjectFactoryI> 
+           compliant object or undef, optional)
+
+
+=cut
+
+sub object_factory{
+    return shift->cluster_factory(@_);
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL ClusterIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::ClusterIO::" . $format;
+  my $ok;
+  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$self: could not load $format - for more details on supported formats please see the ClusterIO docs
+Exception $@
+END
+  ;
+  }
+  return $ok;
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function: guess format based on file suffix
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+ Notes   : formats that _filehandle() will guess include unigene and dbsnp
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'unigene'   if /\.(data)$/i;
+   return 'dbsnp'     if /\.(xml)$/i;
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+# I need some direction on these!! The module works so I haven't fiddled with them!
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'seqio' => $val}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'seqio'}->next_seq() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'seqio'}->next_seq();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'seqio'}->write_seq(@_);
+}
+
+1;
+