diff variant_effect_predictor/Bio/DB/GDB.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/DB/GDB.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,315 @@
+# $Id: GDB.pm,v 1.12 2002/12/01 00:05:19 jason Exp $
+#
+# BioPerl module for Bio::DB::GenBank
+#
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+
+=head1 NAME
+
+Bio::DB::GDB - Database object interface to GDB HTTP query
+
+=head1 SYNOPSIS
+
+    $gdb = new Bio::DB::GDB;
+
+    $info = $gdb->get_info(-type=>'marker',
+			     -id=>'D1S243'); # Marker name
+
+   print "genbank id is ", $info->{'gdbid'},
+    "\nprimers are (fwd, rev) ", join(",", @{$info->{'primers'}}), 
+    "\nproduct length is ", $info->{'length'}, "\n";
+
+=head1 DESCRIPTION
+
+This class allows connections to the Genome Database (GDB) and queries
+to retrieve any database objects. See http://www.gdb.org/ or any
+mirror for details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via email or the
+web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason@bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::GDB;
+use strict;
+use Bio::Root::Root;
+use LWP::UserAgent;
+use HTTP::Request::Common;
+use HTML::Parser;
+
+use vars qw(@ISA $BASEADDRESS %PARAMSTRING $MODVERSION);
+
+@ISA = qw(Bio::Root::Root);
+
+$MODVERSION = '0.01';
+$BASEADDRESS = 'http://www.gdb.org/gdb-bin/genera/genera/hgd/GenomicSegment';
+%PARAMSTRING = ( 
+		 gene   => { '!action' => 'query' }, 
+		 marker => { '!action' => 'query' },
+		 );
+
+# the new way to make modules a little more lightweight
+sub new {
+    my($class,@args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    my $ua = new LWP::UserAgent;
+    $ua->agent(ref($self) ."/$MODVERSION");
+    $self->ua($ua);    
+
+    return $self;
+}
+
+=head2 ua
+
+ Title   : ua
+ Usage   : my $ua = $self->ua or 
+           $self->ua($ua)
+ Function: Get/Set a LWP::UserAgent for use
+ Returns : reference to LWP::UserAgent Object
+ Args    : $ua - must be a LWP::UserAgent
+
+=cut
+
+sub ua {
+    my ($self, $ua) = @_;
+    if( defined $ua && $ua->isa("LWP::UserAgent") ) {
+	$self->{_ua} = $ua;
+    }
+    return $self->{_ua};
+}
+
+# helper method to get specific options
+
+=head2 get_params
+    
+ Title   : get_params
+ Usage   : my %params = $self->get_params($mode)
+ Function: Returns key,value pairs to be passed to query
+            for mode ('marker', 'gene')
+ Returns : a key,value pair hash
+ Args    : 'marker' or 'gene' mode for retrieval
+
+=cut
+
+sub get_params {
+    my ($self, $mode) = @_;
+    return %{$PARAMSTRING{$mode}};
+}
+
+=head2 get_info
+
+ Title   : get_info
+ Usage   : my $info = $self->get_info(-type => 'marker',
+				      -id   => 'D1S234'); 
+ Function: Returns key,value pairs specific
+ Returns : a key,value pair hash
+ Args    : -type => 'marker' or 'gene' mode for retrieval
+           -id   => unique id to query for
+
+=cut
+
+sub get_info {
+    my ($self, @args) = @_;
+    my ( $type, $id) = $self->_rearrange([qw(TYPE ID)], @args);
+    if( !defined $type ) {
+	$self->throw("Must specify a type you are querying for");
+    } elsif( !defined $id ) {
+	$self->throw("Must specify a id to query for");
+    }
+    my %params = $self->get_params($type);
+
+    $params{'displayName'} = $id;
+
+    if( $type eq 'marker' ) {
+	# do more specific stuff?
+    } elsif( $type eq 'gene' ) {
+	# do more specific stuff?
+    }
+    my $url = $self->get_request(%params);    
+    
+    my ($resp) = $self->_request($url);
+    if( ! defined $resp || ! ref($resp) ) {
+	$self->warn("Did not get any data for url ". $url->uri);
+	return undef;
+    }
+    my $content = $resp->content;	
+    if( $content =~ /ERROR/ || length($resp->content) == 0 ) {
+	$self->warn("Error getting for url " . $url->uri . "!\n");
+	return undef;
+    }
+    my (@primers, $length, $markerurl, $realname);
+    my $state = 0;
+    my $title = 0;
+    my $p;
+    $p = new HTML::Parser( api_version => 3,
+			   start_h => [ sub { 
+			       return if( $title == 2 || $state == 3);
+			       my($tag,$attr,$text) = @_;
+			       return if( !defined $tag);
+			       if( $tag eq 'table' ) {
+				   $state = 1;
+			       } elsif( $tag eq 'title' ) {
+				   $title = 1;
+			       } elsif( $state == 2 && 
+					$tag eq 'a' &&
+					$attr->{'href'} ) {
+				   $state = 3; 
+				   if( $text =~ m(href="?(http://.+)"?\s*>) ) { 
+				       $markerurl = $1;
+				   }
+			       } 
+			   }, "tagname, attr, text" ],
+			   end_h   => [ sub { 
+			       return if ($title == 2 || $state == 3);
+			       my ( $tag ) = @_;
+			       $title = 0 if( $tag eq 'title' );
+			   }, "tagname" ],
+			   text_h  => [ sub { 
+			       return if( $title == 2 || $state == 3);
+			       my($text) = @_;
+			       if( $title && $text =~ /Amplimer/ ) {
+				   $markerurl = 'this';
+				   $title = 2;
+			       }
+			       $state = 2 if( $state == 1 && $text =~ /Amplimer/);
+			   }, "text" ],
+			   marked_sections =>1);
+    $p->parse($content) or die "Can't open: $!";        
+    if( ! defined $markerurl ) {
+	@primers = ('notfound','notfound', '?');
+    } elsif( $markerurl eq 'this' ) {
+
+    }
+    else { 
+	my $resp = $self->_request(GET $markerurl);
+        return undef if ( !defined $resp );
+	$content = $resp->content();
+    }
+    $state = 0;
+    $realname = 'unknown';
+    my $lasttag = '';
+    $p = HTML::Parser->new(api_version => 3,			      
+			   start_h => [ sub { my ($tag) = @_;
+					      $tag = lc $tag;
+					      $lasttag = $tag;
+					      if( $state == 3 && $tag eq 'dd' ) {
+						  $state = 4;
+					      }
+					  } , 'tagname'],			   
+			   text_h  => [ sub { 
+			       my($text) = @_; 
+			       if( $text =~ /Primer Sequence/ ) {
+				   $state =1;
+			       } elsif( $state == 1 ) {
+				   foreach my $l ( split(/\n+/,$text) ) {
+				       $l =~ s/\s+(\S+)/$1/;
+				       my ($name,$primer) = split(/\s+/,$l);
+				       next if( !defined $name);
+				       push @primers, $primer;
+				       $state = 2;
+				   }
+			       } elsif( $state == 2 && 
+					($text =~ /Seq Min Len/i ||
+					 $text =~ /Seq Max Len/i) ) {
+				   $state = 3;
+			       }  elsif ( $state == 4 ) {
+				   my ($len) = ( $text =~ /(\d+\.\d+)/
+);
+				   $length = $len;
+				   $length *= 1000 if( $len < 1 );
+				   $state = 0;
+			       } elsif( $lasttag eq 'dd' && 
+					$text =~ /(GDB:\d+)/i ) {
+				   $realname = $1;
+			       }
+			   }  , "text" ],
+			   marked_sections =>1,
+			   );
+    $p->parse($content) || die "Can't open: $!";
+
+    return { 'gdbid' => $realname, 'length' => $length, 'primers' => \@primers };
+}
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: HTTP::Request
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+sub get_request {
+    my ($self, %params) = @_;
+    if( ! %params ) {
+	$self->throw("must provide parameters with which to query");
+    }
+    my $url = $BASEADDRESS;    
+    my $querystr = '?' . join("&", map { "$_=$params{$_}" } keys %params);
+    return GET $url . $querystr;
+}
+
+# private methods
+sub _request {
+
+    my ($self, $url,$tmpfile) = @_;
+    my ($resp);
+    if( defined $tmpfile && $tmpfile ne '' ) { 
+	$resp =  $self->ua->request($url, $tmpfile);
+    } else { $resp =  $self->ua->request($url); } 
+
+    if( $resp->is_error  ) {
+	$self->warn($resp->as_string() . "\nError getting for url " .
+		     $url->uri . "!\n");
+	return undef;
+    }
+    return $resp;
+}
+
+sub _gdb_search_tag_start {
+
+}
+
+1;
+__END__