Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/DB/Biblio/biofetch.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/DB/Biblio/biofetch.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,309 @@ +# $Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $ +# +# BioPerl module Bio::DB::Biblio::biofetch.pm +# +# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> +# For copyright and disclaimer see below. + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::DB::Biblio::biofetch - A BioFetch-based access to a bibliographic + citation retrieval + +=head1 SYNOPSIS + +Do not use this object directly, only access it through the +I<Bio::Biblio> module: + + use Bio::Biblio; + my $biblio = new Bio::Biblio (-access => 'biofetch'); + my $ref = $biblio->get_by_id('20063307')); + + my $ids = ['20063307', '98276153']; + my $refio = $biblio->get_all($ids); + while ($ref = $refio->next_bibref) { + print $ref->identifier, "\n"; + } + +=head1 DESCRIPTION + +This class uses BioFetch protocol based service to retrieve Medline +references by their ID. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR + +Heikki Lehvaslaiho (heikki@ebi.ac.uk) + +=head1 COPYRIGHT + +Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved. + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 DISCLAIMER + +This software is provided "as is" without warranty of any kind. + +=head1 BUGS AND LIMITATIONS + +=over 1 + +=item * + +Only method get_by_id() is supported. + +=back + +=head1 APPENDIX + +The main documentation details are to be found in +L<Bio::DB::BiblioI>. + +Here is the rest of the object methods. Internal methods are preceded +with an underscore _. + +=cut + + +# Let the code begin... + + +package Bio::DB::Biblio::biofetch; +use vars qw(@ISA $VERSION %HOSTS %FORMATMAP $DEFAULTFORMAT + $Revision $DEFAULT_SERVICE $DEFAULT_NAMESPACE); +use strict; + +use Bio::Biblio; +use Bio::DB::DBFetch; +use Bio::Biblio::IO; + +@ISA = qw( Bio::DB::DBFetch Bio::Biblio); + +BEGIN { + + # set the version for version checking + $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d.%-02d", @r }; + $Revision = q$Id: biofetch.pm,v 1.5 2002/10/22 07:45:14 lapp Exp $; + + # you can add your own here theoretically. + %HOSTS = ( + 'dbfetch' => { + baseurl => 'http://%s/cgi-bin/dbfetch?db=medline&style=raw', + hosts => { + 'ebi' => 'www.ebi.ac.uk' + } + } + ); + %FORMATMAP = ( 'default' => 'medlinexml' + ); + $DEFAULTFORMAT = 'default'; + + $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/cgi-bin/dbfetch'; + +} + + +sub new { + my ($class, @args ) = @_; + my $self = $class->SUPER::new(@args); + + $self->{ '_hosts' } = {}; + $self->{ '_formatmap' } = {}; + + $self->hosts(\%HOSTS); + $self->formatmap(\%FORMATMAP); + $self->{'_default_format'} = $DEFAULTFORMAT; + + return $self; +} + +=head2 get_by_id + + Title : get_by_id + Usage : $entry = $db->get__by_id('20063307') + Function: Gets a Bio::Biblio::RefI object by its name + Returns : a Bio::Biblio::Medline object + Args : the id (as a string) of the reference + +=cut + +sub get_by_id { + my ($self,$id) = @_; + my $io = $self->get_Stream_by_id([$id]); + $self->throw("id does not exist") if( !defined $io ) ; + return $io->next_bibref(); +} + + +=head2 get_all + + Title : get_all + Usage : $seq = $db->get_all($ref); + Function: Retrieves reference objects from the server 'en masse', + rather than one at a time. For large numbers of sequences, + this is far superior than get_by_id(). + Example : + Returns : a stream of Bio::Biblio::Medline objects + Args : $ref : either an array reference, a filename, or a filehandle + from which to get the list of unique ids/accession numbers. + +=cut + +sub get_all { + my ($self, $ids) = @_; + return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single'); +} + +=head2 get_seq_stream + + Title : get_seq_stream + Usage : my $seqio = $self->get_seq_sream(%qualifiers) + Function: builds a url and queries a web db + Returns : a Bio::SeqIO stream capable of producing sequence + Args : %qualifiers = a hash qualifiers that the implementing class + will process to make a url suitable for web querying + +=cut + +sub get_seq_stream { + my ($self, %qualifiers) = @_; + my ($rformat, $ioformat) = $self->request_format(); + my $seen = 0; + foreach my $key ( keys %qualifiers ) { + if( $key =~ /format/i ) { + $rformat = $qualifiers{$key}; + $seen = 1; + } + } + $qualifiers{'-format'} = $rformat if( !$seen); + ($rformat, $ioformat) = $self->request_format($rformat); + + my $request = $self->get_request(%qualifiers); + my ($stream,$resp); + if( $self->retrieval_type =~ /temp/i ) { + my $dir = $self->io()->tempdir( CLEANUP => 1); + my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir ); + close $fh; + my ($resp) = $self->_request($request, $tmpfile); + if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) { + $self->throw("WebDBSeqI Error - check query sequences!\n"); + } + $self->postprocess_data('type' => 'file', + 'location' => $tmpfile); + # this may get reset when requesting batch mode + ($rformat,$ioformat) = $self->request_format(); + if( $self->verbose > 0 ) { + open(ERR, "<$tmpfile"); + while(<ERR>) { $self->debug($_);} + } + $stream = new Bio::Biblio::IO('-format' => $ioformat, + '-file' => $tmpfile); + } elsif( $self->retrieval_type =~ /io_string/i ) { + my ($resp) = $self->_request($request); + my $content = $resp->content_ref; + $self->debug( "content is $$content\n"); + if( ! $resp->is_success() || length(${$resp->content_ref()}) == 0 ) { + $self->throw("WebDBSeqI Error - check query sequences!\n"); + } + ($rformat,$ioformat) = $self->request_format(); + $self->postprocess_data('type'=> 'string', + 'location' => $content); + $stream = new Bio::Biblio::IO('-format' => $ioformat, +# '-data' => "<tag>". $$content. "</tag>"); + '-data' => $$content + ); + } else { + $self->throw("retrieval type " . $self->retrieval_type . + " unsupported\n"); + } + return $stream; +} + + +=head2 postprocess_data + + Title : postprocess_data + Usage : $self->postprocess_data ( 'type' => 'string', + 'location' => \$datastr); + Function: process downloaded data before loading into a Bio::SeqIO + Returns : void + Args : hash with two keys - 'type' can be 'string' or 'file' + - 'location' either file location or string + reference containing data + +=cut + +# the default method, works for genbank/genpept, other classes should +# override it with their own method. + +sub postprocess_data { + my ($self, %args) = @_; + my $data; + my $type = uc $args{'type'}; + my $location = $args{'location'}; + if( !defined $type || $type eq '' || !defined $location) { + return; + } elsif( $type eq 'STRING' ) { + $data = $$location; + } elsif ( $type eq 'FILE' ) { + open(TMP, $location) or $self->throw("could not open file $location"); + my @in = <TMP>; + close TMP; + $data = join("", @in); + } + + $data = "<tag>". $data. "</tag>"; + + if( $type eq 'FILE' ) { + open(TMP, ">$location") or $self->throw("could overwrite file $location"); + print TMP $data; + close TMP; + } elsif ( $type eq 'STRING' ) { + ${$args{'location'}} = $data; + } + + $self->debug("format is ". $self->request_format(). " data is $data\n"); + +} + + + + +=head2 VERSION and Revision + + Usage : print $Bio::DB::Biblio::biofetch::VERSION; + print $Bio::DB::Biblio::biofetch::Revision; + +=cut + +=head2 Defaults + + Usage : print $Bio::DB::Biblio::biofetch::DEFAULT_SERVICE; + +=cut + +1; +__END__