Mercurial > repos > mahtabm > ensembl

diff variant_effect_predictor/Bio/DB/Biblio/biofetch.pm @ 0:1f6dce3d34e0
Uploaded
author: mahtabm
date: Thu, 11 Apr 2013 02:01:53 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/DB/Biblio/biofetch.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,309 @@
+# $Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $
+#
+# BioPerl module Bio::DB::Biblio::biofetch.pm
+#
+# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Biblio::biofetch - A BioFetch-based access to a bibliographic 
+  citation retrieval
+
+=head1 SYNOPSIS
+
+Do not use this object directly, only access it through the
+I<Bio::Biblio> module:
+
+  use Bio::Biblio;
+  my $biblio = new Bio::Biblio (-access => 'biofetch');
+  my $ref = $biblio->get_by_id('20063307'));
+
+  my $ids = ['20063307', '98276153'];
+  my $refio = $biblio->get_all($ids);
+  while ($ref = $refio->next_bibref) { 
+    print $ref->identifier, "\n";
+  }
+
+=head1 DESCRIPTION
+
+This class uses BioFetch protocol based service to retrieve Medline
+references by their ID.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho (heikki@ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 BUGS AND LIMITATIONS
+
+=over 1
+
+=item *
+
+Only method get_by_id() is supported.
+
+=back
+
+=head1 APPENDIX
+
+The main documentation details are to be found in
+L<Bio::DB::BiblioI>.
+
+Here is the rest of the object methods.  Internal methods are preceded
+with an underscore _.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::DB::Biblio::biofetch;
+use vars qw(@ISA $VERSION %HOSTS  %FORMATMAP  $DEFAULTFORMAT 
+	    $Revision $DEFAULT_SERVICE $DEFAULT_NAMESPACE);
+use strict;
+
+use Bio::Biblio;
+use Bio::DB::DBFetch;
+use Bio::Biblio::IO;
+
+@ISA = qw( Bio::DB::DBFetch Bio::Biblio);
+
+BEGIN {
+
+    # set the version for version checking
+    $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d.%-02d", @r };
+    $Revision = q$Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $;
+
+    # you can add your own here theoretically.
+    %HOSTS = (
+	       'dbfetch' => {
+		   baseurl => 'http://%s/cgi-bin/dbfetch?db=medline&style=raw',
+		   hosts   => {
+		       'ebi'  => 'www.ebi.ac.uk'
+		       }
+	       }
+	      );
+    %FORMATMAP = ( 'default' => 'medlinexml'
+		   );
+    $DEFAULTFORMAT = 'default';
+
+    $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/cgi-bin/dbfetch';
+
+}
+
+
+sub new {
+    my ($class, @args ) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    $self->{ '_hosts' } = {};
+    $self->{ '_formatmap' } = {};
+
+    $self->hosts(\%HOSTS);
+    $self->formatmap(\%FORMATMAP);
+    $self->{'_default_format'} = $DEFAULTFORMAT;
+
+    return $self;
+}
+
+=head2 get_by_id
+
+ Title   : get_by_id
+ Usage   : $entry = $db->get__by_id('20063307')
+ Function: Gets a Bio::Biblio::RefI object by its name
+ Returns : a Bio::Biblio::Medline object
+ Args    : the id (as a string) of the reference
+
+=cut
+
+sub get_by_id {
+    my ($self,$id) = @_;
+    my $io = $self->get_Stream_by_id([$id]);
+    $self->throw("id does not exist") if( !defined $io ) ;
+    return $io->next_bibref();
+}
+
+
+=head2 get_all
+
+  Title   : get_all
+  Usage   : $seq = $db->get_all($ref);
+  Function: Retrieves reference objects from the server 'en masse', 
+            rather than one  at a time.  For large numbers of sequences, 
+            this is far superior than get_by_id().
+  Example :
+  Returns : a stream of Bio::Biblio::Medline objects
+  Args    : $ref : either an array reference, a filename, or a filehandle
+            from which to get the list of unique ids/accession numbers.
+
+=cut
+
+sub get_all {
+    my ($self, $ids) = @_;
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : my $seqio = $self->get_seq_sream(%qualifiers)
+ Function: builds a url and queries a web db
+ Returns : a Bio::SeqIO stream capable of producing sequence
+ Args    : %qualifiers = a hash qualifiers that the implementing class 
+           will process to make a url suitable for web querying 
+
+=cut
+
+sub get_seq_stream {
+    my ($self, %qualifiers) = @_;
+    my ($rformat, $ioformat) = $self->request_format();
+    my $seen = 0;
+    foreach my $key ( keys %qualifiers ) {
+	if( $key =~ /format/i ) {
+	    $rformat = $qualifiers{$key};
+	    $seen = 1;
+	}
+    }
+    $qualifiers{'-format'} = $rformat if( !$seen);
+    ($rformat, $ioformat) = $self->request_format($rformat);
+    
+    my $request = $self->get_request(%qualifiers);
+    my ($stream,$resp);
+    if( $self->retrieval_type =~ /temp/i ) {
+	my $dir = $self->io()->tempdir( CLEANUP => 1);
+	my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
+	close $fh;
+	my ($resp) = $self->_request($request, $tmpfile);		
+	if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
+            $self->throw("WebDBSeqI Error - check query sequences!\n");
+	}
+	$self->postprocess_data('type' => 'file',
+				'location' => $tmpfile);	
+	# this may get reset when requesting batch mode
+	($rformat,$ioformat) = $self->request_format();
+	if( $self->verbose > 0 ) {
+	    open(ERR, "<$tmpfile");
+	    while(<ERR>) { $self->debug($_);}
+	} 
+	$stream = new Bio::Biblio::IO('-format' => $ioformat,
+				      '-file'   => $tmpfile);
+    } elsif( $self->retrieval_type =~ /io_string/i ) {
+	my ($resp) = $self->_request($request);
+        my $content = $resp->content_ref;
+	$self->debug( "content is $$content\n");
+	if( ! $resp->is_success() || length(${$resp->content_ref()}) == 0 ) {
+	    $self->throw("WebDBSeqI Error - check query sequences!\n");	
+        }  
+	($rformat,$ioformat) = $self->request_format();
+	$self->postprocess_data('type'=> 'string',
+				'location' => $content);
+	$stream = new Bio::Biblio::IO('-format' => $ioformat,
+#				      '-data'   => "<tag>". $$content. "</tag>");
+				      '-data'   => $$content
+				      );
+    } else { 
+	$self->throw("retrieval type " . $self->retrieval_type . 
+		     " unsupported\n");
+    }
+    return $stream;
+}
+
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data ( 'type' => 'string',
+				     'location' => \$datastr);
+ Function: process downloaded data before loading into a Bio::SeqIO
+ Returns : void
+ Args    : hash with two keys - 'type' can be 'string' or 'file'
+                              - 'location' either file location or string 
+                                           reference containing data
+
+=cut
+
+# the default method, works for genbank/genpept, other classes should
+# override it with their own method.
+
+sub postprocess_data {    
+    my ($self, %args) = @_;
+    my $data;
+    my $type = uc $args{'type'};
+    my $location = $args{'location'};
+    if( !defined $type || $type eq '' || !defined $location) {
+	return;
+    } elsif( $type eq 'STRING' ) {
+	$data = $$location; 
+    } elsif ( $type eq 'FILE' ) {
+	open(TMP, $location) or $self->throw("could not open file $location");
+	my @in = <TMP>;
+	close TMP;
+	$data = join("", @in);
+    }
+
+    $data = "<tag>". $data. "</tag>";
+    
+    if( $type eq 'FILE'  ) {
+	open(TMP, ">$location") or $self->throw("could overwrite file $location");
+	print TMP $data;
+	close TMP;
+    } elsif ( $type eq 'STRING' ) {
+	${$args{'location'}} = $data;
+    }
+    
+    $self->debug("format is ". $self->request_format(). " data is $data\n");
+
+}
+
+
+
+
+=head2 VERSION and Revision
+
+ Usage   : print $Bio::DB::Biblio::biofetch::VERSION;
+           print $Bio::DB::Biblio::biofetch::Revision;
+
+=cut
+
+=head2 Defaults
+
+ Usage   : print $Bio::DB::Biblio::biofetch::DEFAULT_SERVICE;
+
+=cut
+
+1;
+__END__
author	mahtabm
date	Thu, 11 Apr 2013 02:01:53 -0400
parents
children