Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/DB/GDB.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/DB/GDB.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,315 @@ +# $Id: GDB.pm,v 1.12 2002/12/01 00:05:19 jason Exp $ +# +# BioPerl module for Bio::DB::GenBank +# +# Cared for by Jason Stajich <jason@bioperl.org> +# +# Copyright Jason Stajich +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code +# + +=head1 NAME + +Bio::DB::GDB - Database object interface to GDB HTTP query + +=head1 SYNOPSIS + + $gdb = new Bio::DB::GDB; + + $info = $gdb->get_info(-type=>'marker', + -id=>'D1S243'); # Marker name + + print "genbank id is ", $info->{'gdbid'}, + "\nprimers are (fwd, rev) ", join(",", @{$info->{'primers'}}), + "\nproduct length is ", $info->{'length'}, "\n"; + +=head1 DESCRIPTION + +This class allows connections to the Genome Database (GDB) and queries +to retrieve any database objects. See http://www.gdb.org/ or any +mirror for details. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the +evolution of this and other Bioperl modules. Send +your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation +is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to +help us keep track the bugs and their resolution. +Bug reports can be submitted via email or the +web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Jason Stajich + +Email jason@bioperl.org + +=head1 APPENDIX + +The rest of the documentation details each of the +object methods. Internal methods are usually +preceded with a _ + +=cut + +# Let the code begin... + +package Bio::DB::GDB; +use strict; +use Bio::Root::Root; +use LWP::UserAgent; +use HTTP::Request::Common; +use HTML::Parser; + +use vars qw(@ISA $BASEADDRESS %PARAMSTRING $MODVERSION); + +@ISA = qw(Bio::Root::Root); + +$MODVERSION = '0.01'; +$BASEADDRESS = 'http://www.gdb.org/gdb-bin/genera/genera/hgd/GenomicSegment'; +%PARAMSTRING = ( + gene => { '!action' => 'query' }, + marker => { '!action' => 'query' }, + ); + +# the new way to make modules a little more lightweight +sub new { + my($class,@args) = @_; + my $self = $class->SUPER::new(@args); + + my $ua = new LWP::UserAgent; + $ua->agent(ref($self) ."/$MODVERSION"); + $self->ua($ua); + + return $self; +} + +=head2 ua + + Title : ua + Usage : my $ua = $self->ua or + $self->ua($ua) + Function: Get/Set a LWP::UserAgent for use + Returns : reference to LWP::UserAgent Object + Args : $ua - must be a LWP::UserAgent + +=cut + +sub ua { + my ($self, $ua) = @_; + if( defined $ua && $ua->isa("LWP::UserAgent") ) { + $self->{_ua} = $ua; + } + return $self->{_ua}; +} + +# helper method to get specific options + +=head2 get_params + + Title : get_params + Usage : my %params = $self->get_params($mode) + Function: Returns key,value pairs to be passed to query + for mode ('marker', 'gene') + Returns : a key,value pair hash + Args : 'marker' or 'gene' mode for retrieval + +=cut + +sub get_params { + my ($self, $mode) = @_; + return %{$PARAMSTRING{$mode}}; +} + +=head2 get_info + + Title : get_info + Usage : my $info = $self->get_info(-type => 'marker', + -id => 'D1S234'); + Function: Returns key,value pairs specific + Returns : a key,value pair hash + Args : -type => 'marker' or 'gene' mode for retrieval + -id => unique id to query for + +=cut + +sub get_info { + my ($self, @args) = @_; + my ( $type, $id) = $self->_rearrange([qw(TYPE ID)], @args); + if( !defined $type ) { + $self->throw("Must specify a type you are querying for"); + } elsif( !defined $id ) { + $self->throw("Must specify a id to query for"); + } + my %params = $self->get_params($type); + + $params{'displayName'} = $id; + + if( $type eq 'marker' ) { + # do more specific stuff? + } elsif( $type eq 'gene' ) { + # do more specific stuff? + } + my $url = $self->get_request(%params); + + my ($resp) = $self->_request($url); + if( ! defined $resp || ! ref($resp) ) { + $self->warn("Did not get any data for url ". $url->uri); + return undef; + } + my $content = $resp->content; + if( $content =~ /ERROR/ || length($resp->content) == 0 ) { + $self->warn("Error getting for url " . $url->uri . "!\n"); + return undef; + } + my (@primers, $length, $markerurl, $realname); + my $state = 0; + my $title = 0; + my $p; + $p = new HTML::Parser( api_version => 3, + start_h => [ sub { + return if( $title == 2 || $state == 3); + my($tag,$attr,$text) = @_; + return if( !defined $tag); + if( $tag eq 'table' ) { + $state = 1; + } elsif( $tag eq 'title' ) { + $title = 1; + } elsif( $state == 2 && + $tag eq 'a' && + $attr->{'href'} ) { + $state = 3; + if( $text =~ m(href="?(http://.+)"?\s*>) ) { + $markerurl = $1; + } + } + }, "tagname, attr, text" ], + end_h => [ sub { + return if ($title == 2 || $state == 3); + my ( $tag ) = @_; + $title = 0 if( $tag eq 'title' ); + }, "tagname" ], + text_h => [ sub { + return if( $title == 2 || $state == 3); + my($text) = @_; + if( $title && $text =~ /Amplimer/ ) { + $markerurl = 'this'; + $title = 2; + } + $state = 2 if( $state == 1 && $text =~ /Amplimer/); + }, "text" ], + marked_sections =>1); + $p->parse($content) or die "Can't open: $!"; + if( ! defined $markerurl ) { + @primers = ('notfound','notfound', '?'); + } elsif( $markerurl eq 'this' ) { + + } + else { + my $resp = $self->_request(GET $markerurl); + return undef if ( !defined $resp ); + $content = $resp->content(); + } + $state = 0; + $realname = 'unknown'; + my $lasttag = ''; + $p = HTML::Parser->new(api_version => 3, + start_h => [ sub { my ($tag) = @_; + $tag = lc $tag; + $lasttag = $tag; + if( $state == 3 && $tag eq 'dd' ) { + $state = 4; + } + } , 'tagname'], + text_h => [ sub { + my($text) = @_; + if( $text =~ /Primer Sequence/ ) { + $state =1; + } elsif( $state == 1 ) { + foreach my $l ( split(/\n+/,$text) ) { + $l =~ s/\s+(\S+)/$1/; + my ($name,$primer) = split(/\s+/,$l); + next if( !defined $name); + push @primers, $primer; + $state = 2; + } + } elsif( $state == 2 && + ($text =~ /Seq Min Len/i || + $text =~ /Seq Max Len/i) ) { + $state = 3; + } elsif ( $state == 4 ) { + my ($len) = ( $text =~ /(\d+\.\d+)/ +); + $length = $len; + $length *= 1000 if( $len < 1 ); + $state = 0; + } elsif( $lasttag eq 'dd' && + $text =~ /(GDB:\d+)/i ) { + $realname = $1; + } + } , "text" ], + marked_sections =>1, + ); + $p->parse($content) || die "Can't open: $!"; + + return { 'gdbid' => $realname, 'length' => $length, 'primers' => \@primers }; +} + +=head2 get_request + + Title : get_request + Usage : my $url = $self->get_request + Function: HTTP::Request + Returns : + Args : %qualifiers = a hash of qualifiers (ids, format, etc) + +=cut + +sub get_request { + my ($self, %params) = @_; + if( ! %params ) { + $self->throw("must provide parameters with which to query"); + } + my $url = $BASEADDRESS; + my $querystr = '?' . join("&", map { "$_=$params{$_}" } keys %params); + return GET $url . $querystr; +} + +# private methods +sub _request { + + my ($self, $url,$tmpfile) = @_; + my ($resp); + if( defined $tmpfile && $tmpfile ne '' ) { + $resp = $self->ua->request($url, $tmpfile); + } else { $resp = $self->ua->request($url); } + + if( $resp->is_error ) { + $self->warn($resp->as_string() . "\nError getting for url " . + $url->uri . "!\n"); + return undef; + } + return $resp; +} + +sub _gdb_search_tag_start { + +} + +1; +__END__