diff variant_effect_predictor/Bio/Biblio/IO.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Biblio/IO.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,373 @@
+# $Id: IO.pm,v 1.8 2002/10/22 07:45:11 lapp Exp $
+#
+# BioPerl module for Bio::Biblio::IO
+#
+# Cared for by Martin Senger <senger@ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::IO - Handling the bibliographic references
+
+=head1 SYNOPSIS
+
+    use Bio::Biblio::IO;
+
+    # getting citations from a file
+    $in = Bio::Biblio::IO->new ('-file' => 'myfile.xml' ,
+				'-format' => 'medlinexml');
+ --- OR ---
+
+    # getting citations from a string
+    $in = Bio::Biblio::IO->new ('-data' => '<MedlineCitation>...</MedlineCitation>' ,
+				'-format' => 'medlinexml');
+ --- OR ---
+
+    # getting citations from a string if IO::String is installed
+    use IO::String;
+    $in = Bio::Biblio::IO->new ('-fh' => IO::String->new ($citation),
+				'-format' => 'medlinexml');
+
+    $in = Bio::Biblio::IO->new(-fh => $io_handle , '-format' => 'medlinexml');
+
+ --- OR ---
+
+    # getting citations from any IO handler
+    $in = Bio::Biblio::IO->new('-fh' => $io_handle ,
+			       '-format' => 'medlinexml');
+
+
+    # now, having $in, we can read all citations
+    while ( my $citation = $in->next_bibref() ) {
+	&do_something_with_citation ($citation);
+    }
+
+ --- OR ---
+
+    # again reading all citation but now a callback defined in your
+    # code is used (note that the reading starts already when new()
+    # is called)
+    $io = new Bio::Biblio::IO ('-format'   => 'medlinexml',
+			       '-file'     => $testfile,
+			       '-callback' => \&callback);
+    sub callback {
+        my $citation = shift;
+	print $citation->{'_identifier'} . "\n";
+    }
+
+Now, to actually get a citation in an XML format,
+use I<Bio::Biblio> module which returns an XML string:
+
+    use Bio::Biblio;
+    my $xml = new Bio::Biblio->get_by_id ('94033980');
+    my $reader = Bio::Biblio::IO->new ('-data' => $xml,
+				       '-format' => 'medlinexml');
+
+    while (my $citation = $reader->next_bibref()) {
+       ... do something here with $citation
+       }
+
+And, finally, the resulting citation can be received in different
+output formats:
+
+    $io = new Bio::Biblio::IO ('-format' => 'medlinexml',
+			       '-result' => 'raw');
+ --- OR ---
+
+    $io = new Bio::Biblio::IO ('-format' => 'medlinexml',
+			       '-result' => 'medline2ref');
+
+ --- OR ---
+
+    $io = new Bio::Biblio::IO ('-format' => 'pubmedxml',
+			       '-result' => 'pubmed2ref');
+
+=head1 DESCRIPTION
+
+Bio::Biblio::IO is a handler module for accessing bibliographic
+citations. The citations can be in different formats - assuming that
+there is a corresponding module knowing that format in Bio::Biblio::IO
+directory (e.g. Bio::Biblio::IO::medlinexml). The format (and the
+module name) is given by the argument I<-format>.
+
+Once an instance of C<Bio::Biblio::IO> class is available, the
+citations can be read by calling repeatedly method I<next_bibref>:
+
+    while (my $citation = $reader->next_bibref()) {
+       ... do something here with $citation
+       }
+
+However, this may imply that all citations were already read into the
+memory. If you expect a huge amount of citations to be read, you may
+choose a I<callback> option. Your subroutine is specified in the
+C<new()> method and is called everytime a new citation is available
+(see an example above in SYNOPSIS).
+
+The citations returned by I<next_bibref> or given to your callback
+routine can be of different formats depending on the argument
+I<-result>. One result type is I<raw> and it is represented by a
+simple, not blessed hash table:
+
+    $io = new Bio::Biblio::IO ('-result' => 'raw');
+
+What other result formats are available depends on the module who
+reads the citations in the first place. At the moment, the following
+ones are available:
+
+    $io = new Bio::Biblio::IO ('-result' => 'medline2ref');
+
+This is a default result format for reading citations by the
+I<medlinexml> module. The C<medlinexml> module is again the default
+one. Which means that you can almost omit arguments (you still need to
+say where the citations come from):
+
+    $io = new Bio::Biblio::IO ('-file' => 'data/medline_data.xml');
+
+Another result format available is for PUBMED citations (which is a
+super-set of the MEDLINE citations having few more tags):
+
+    $io = new Bio::Biblio::IO ('-format' => 'pubmedxml',
+			       '-result' => 'pubmed2ref',
+			       '-data'   => $citation);
+
+Or, because C<pubmed2ref> is a default one for PUBMED citations, you can say just:
+
+    $io = new Bio::Biblio::IO ('-format' => 'pubmedxml',
+			       '-data'   => $citation);
+
+Both C<medline2ref> and C<pubmed2ref> results are objects defined in
+the directory C<Bio::Biblio>.
+
+=head1 SEE ALSO
+
+=over
+
+=item *
+
+An example script I<examples/biblio.pl>. It has many options and its
+own help.  The relevant options to this IO module are I<-f>
+(specifying what file to read) and I<-O> (specifying what result
+format to achieve).
+
+=item *
+
+OpenBQS home page: http://industry.ebi.ac.uk/openBQS
+
+=item *
+
+Comments to the Perl client: http://industry.ebi.ac.uk/openBQS/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                  - General discussion
+  http://bioperl.org/MailList.shtml      - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.
+ Bug reports can be submitted via email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger@ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Biblio::IO;
+
+use strict;
+use vars qw(@ISA);
+
+use Bio::Root::Root;
+use Bio::Root::IO;
+use Symbol();
+
+@ISA = qw(Bio::Root::Root Bio::Root::IO);
+
+my $entry = 0;
+
+sub new {
+    my ($caller, @args) = @_;
+    my $class = ref ($caller) || $caller;
+    
+    # if $caller is an object, or if it is an underlying
+    # 'real-work-doing' class (e.g. Bio::Biblio::IO::medlinexml) then
+    # we want to call SUPER to create and bless an object
+    if( $class =~ /Bio::Biblio::IO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new (@args);	
+	$self->_initialize (@args);
+	return $self;
+
+    # this is called only the first time when somebody calls: 'new
+    # Bio::Biblio::IO (...)', and it actually loads a 'real-work-doing'
+    # module and call this new() method again (unless the loaded
+    # module has its own new() method)
+    } else { 
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} || 
+	    $class->_guess_format( $param{-file} || $ARGV[0] ) ||
+		'medlinexml';
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	# load module with the real implementation - as defined in $format
+	return undef unless (&_load_format_module ($format));
+
+	# this will call this same method new() - but rather its
+	# upper (object) branche
+	return "Bio::Biblio::IO::$format"->new(@args);
+    }
+}
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+# _initialize is chained for all Bio::Biblio::IO classes
+
+sub _initialize {
+    my ($self, @args) = @_;
+    # initialize the IO part
+    $self->_initialize_io (@args);
+}
+
+=head2 next_bibref
+
+ Usage   : $citation = stream->next_bibref
+ Function: Reads the next citation object from the stream and returns it.
+ Returns : a Bio::Biblio::Ref citation object, or something else
+           (depending on the '-result' argument given in the 'new()'
+	    method).
+ Args    : none
+
+=cut
+
+sub next_bibref {
+   my ($self) = shift;
+   $self->throw ("Sorry, you cannot read from a generic Bio::Biblio::IO object.");
+}
+
+# -----------------------------------------------------------------------------
+
+=head2 _load_format_module
+
+ Usage   : $class->_load_format_module ($format)
+ Returns : 1 on success, undef on failure
+ Args    : 'format' should contain the last part of the
+           name of a module who does the real implementation
+
+It does (in run-time) a similar thing as
+
+   require Bio::Biblio::IO::$format
+
+It throws an exception if it fails to find and load the module
+(for example, because of the compilation errors in the module).
+
+=cut
+
+sub _load_format_module {
+  my ($format) = @_;
+  my ($module, $load, $m);
+
+  $module = "_<Bio/Biblio/IO/$format.pm";
+  $load = "Bio/Biblio/IO/$format.pm";
+
+  return 1 if $main::{$module};
+  eval {
+    require $load;
+  };
+  if ( $@ ) {
+    Bio::Root::Root->throw (<<END);
+$load: $format cannot be found or loaded
+Exception $@
+For more information about the Biblio system please see the Bio::Biblio::IO docs.
+END
+  ;
+    return;
+  }
+  return 1;
+}
+
+=head2 _guess_format
+
+ Usage   : $class->_guess_format ($filename)
+ Returns : string with a guessed format of the input data (e.g. 'medlinexml')
+ Args    : a file name whose extension can help to guess its format
+
+It makes an expert guess what kind of data are in the given file
+(but be prepare that $filename may be empty).
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'medlinexml'   if (/\.(xml|medlinexml)$/i);
+   return;
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'biblio' => $val}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'biblio'}->next_bibref() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'biblio'}->next_bibref();
+  return @list;
+}
+
+1;