diff variant_effect_predictor/Bio/DB/DBFetch.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/DB/DBFetch.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,370 @@
+#
+# $Id: DBFetch.pm,v 1.8 2002/12/22 22:02:13 lstein Exp $
+#
+# BioPerl module for Bio::DB::DBFetch
+#
+# Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
+
+=head1 SYNOPSIS
+
+  #do not use this module directly
+
+=head1 DESCRIPTION
+
+Allows the dynamic retrieval of entries from databases using the
+dbfetch script at EBI:
+L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
+
+In order to make changes transparent we have host type (currently only
+ebi) and location (defaults to ebi) separated out.  This allows later
+additions of more servers in different geographical locations.
+
+This is a superclass which is called by instantiable subclasses with
+correct parameters.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                         - General discussion
+  http://bio.perl.org/MailList.html             - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email Heikki Lehvaslaiho E<lt>Heikki@ebi.ac.ukE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::DBFetch;
+use strict;
+use vars qw(@ISA $MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
+	    $DEFAULTSERVERTYPE);
+
+$MODVERSION = '0.1';
+use HTTP::Request::Common;
+use Bio::DB::WebDBSeqI;
+
+@ISA = qw(Bio::DB::WebDBSeqI);
+
+# the new way to make modules a little more lightweight
+
+BEGIN { 	
+    # global vars
+    $DEFAULTSERVERTYPE = 'dbfetch';
+    $DEFAULTLOCATION = 'ebi';
+}
+
+
+=head1 Routines from Bio::DB::WebDBSeqI
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: returns a HTTP::Request object
+ Returns :
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+sub get_request {
+    my ($self, @qualifiers) = @_;
+    my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
+					    @qualifiers);
+
+    $self->throw("Must specify a value for UIDs to fetch")
+	unless defined $uids;
+    my $tmp;
+    my $format_string = '';
+    $format ||= $self->default_format;
+    ($format, $tmp) = $self->request_format($format);
+    $format_string = "&format=$format" if $format ne $self->default_format;
+    my $url = $self->location_url();
+    my $uid;
+    if( ref($uids) =~ /ARRAY/i ) {
+	$uid = join (',', @$uids);
+	$self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
+	    if scalar @$uids >50;
+    } else {
+	$uid = $uids;
+    }
+
+    return GET $url. $format_string. '&id='. $uid;
+}
+
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data ( 'type' => 'string',
+				     'location' => \$datastr);
+ Function: process downloaded data before loading into a Bio::SeqIO
+ Returns : void
+ Args    : hash with two keys - 'type' can be 'string' or 'file'
+                              - 'location' either file location or string
+                                           reference containing data
+
+=cut
+
+# remove occasional blank lines at top of web output
+sub postprocess_data {
+  my ($self, %args) = @_;
+  if ($args{type} eq 'string') {
+    ${$args{location}} =~ s/^\s+//;  # get rid of leading whitespace
+  }
+  elsif ($args{type} eq 'file') {
+    open F,$args{location} or $self->throw("Cannot open $args{location}: $!");
+    my @data = <F>;
+    for (@data) {
+      last unless /^\s+$/;
+      shift @data;
+    }
+    open F,">$args{location}" or $self->throw("Cannot write to $args{location}: $!");
+    print F @data;
+    close F;
+  }
+}
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : my $format = $self->default_format
+ Function: Returns default sequence format for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_format {
+    my ($self) = @_;
+    return $self->{'_default_format'};
+}
+
+=head1 Bio::DB::DBFetch specific routines
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $seq = $db->get_Stream_by_id($ref);
+  Function: Retrieves Seq objects from the server 'en masse', rather than one
+            at a time.  For large numbers of sequences, this is far superior
+            than get_Stream_by_[id/acc]().
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : either an array reference, a filename, or a filehandle
+            from which to get the list of unique ids/accession numbers.
+
+NOTE: for backward compatibility, this method is also called
+get_Stream_by_batch.
+
+=cut
+
+sub get_Stream_by_id {
+    my ($self, $ids) = @_;
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : version number (as a string)
+ Throws  : "version does not exist" exception
+
+=cut
+
+sub get_Seq_by_version {
+   my ($self,$seqid) = @_;
+   my $seqio = $self->get_Stream_by_acc([$seqid]);
+   $self->throw("version does not exist") if( !defined $seqio );
+   return $seqio->next_seq();
+}
+
+=head2 request_format
+
+ Title   : request_format
+ Usage   : my ($req_format, $ioformat) = $self->request_format;
+           $self->request_format("genbank");
+           $self->request_format("fasta");
+ Function: Get/Set sequence format retrieval. The get-form will normally not
+           be used outside of this and derived modules.
+ Returns : Array of two strings, the first representing the format for
+           retrieval, and the second specifying the corresponding SeqIO format.
+ Args    : $format = sequence format
+
+=cut
+
+sub request_format {
+    my ($self, $value) = @_;
+    if( defined $value ) {
+	$value = lc $value;
+	$self->{'_format'} = $value;
+	return ($value, $value);
+    }
+    $value = $self->{'_format'};
+    if( $value and defined $self->formatmap->{$value} ) {
+	return ($value, $self->formatmap->{$value});
+    } else {
+	# Try to fall back to a default.
+	return ($self->default_format, $self->default_format );
+    }
+}
+
+
+=head2 servertype
+
+ Title   : servertype
+ Usage   : my $servertype = $self->servertype
+	    $self->servertype($servertype);
+ Function: Get/Set server type
+ Returns : string
+ Args    : server type string [optional]
+
+=cut
+
+sub servertype {
+    my ($self, $servertype) = @_;
+    if( defined $servertype && $servertype ne '') {		
+	 $self->throw("You gave an invalid server type ($servertype)".
+			  " - available types are ".
+			  keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
+	 $self->{'_servertype'} = $servertype;
+    }
+    $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
+    return $self->{'_servertype'};
+}
+
+=head2 hostlocation
+
+ Title   : hostlocation
+ Usage   : my $location = $self->hostlocation()
+          $self->hostlocation($location)
+ Function: Set/Get Hostlocation
+ Returns : string representing hostlocation
+ Args    : string specifying hostlocation [optional]
+
+=cut
+
+sub hostlocation {
+    my ($self, $location ) = @_;
+    $location = lc $location;
+    my $servertype = $self->servertype;
+    $self->throw("Must have a valid servertype defined not $servertype")
+	unless defined $servertype; 
+    my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
+    if( defined $location && $location ne '' ) {
+	if( ! $hosts{$location} ) {
+	    $self->throw("Must specify a known host, not $location,".
+			 " possible values (".
+			 join(",", sort keys %hosts ). ")");
+	}
+	$self->{'_hostlocation'} = $location;
+    }
+    $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
+    return $self->{'_hostlocation'};
+}
+
+=head2 location_url
+
+ Title   : location
+ Usage   : my $url = $self->location_url()
+ Function: Get host url
+ Returns : string representing url
+ Args    : none
+
+=cut
+
+sub location_url {
+    my ($self) = @_;
+    my $servertype = $self->servertype();
+    my $location = $self->hostlocation();
+    if( ! defined $location || !defined $servertype )  {	
+	$self->throw("must have a valid hostlocation and servertype set before calling location_url");
+    }
+    return sprintf($self->hosts->{$servertype}->{'baseurl'},
+		   $self->hosts->{$servertype}->{'hosts'}->{$location});
+}		
+
+=head1 Bio::DB::DBFetch routines
+
+These methods allow subclasses to pass parameters.
+
+=head2 hosts
+
+ Title   : hosts
+ Usage   : 
+ Function: get/set for host hash 
+ Returns : 
+ Args    : optional hash
+
+=cut
+
+sub hosts {
+    my ($self, $value) = @_;
+    if (defined $value) {
+	$self->{'_hosts'} = $value;
+    }
+    unless (exists $self->{'_hosts'}) {
+	return ('');
+    } else {
+	return $self->{'_hosts'};
+    }
+}		
+
+=head2 formatmap
+
+ Title   : formatmap
+ Usage   : 
+ Function: get/set for format hash
+ Returns : 
+ Args    : optional hash
+
+=cut
+
+sub formatmap {
+    my ($self, $value) = @_;
+    if (defined $value) {
+	$self->{'_formatmap'} = $value;
+    }
+    unless (exists $self->{'_formatmap'}) {
+	return ('');
+    } else {
+	return $self->{'_formatmap'};
+    }
+}		
+
+
+1;
+__END__