Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/GDB.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: GDB.pm,v 1.12 2002/12/01 00:05:19 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::DB::GenBank | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 # | |
| 13 | |
| 14 =head1 NAME | |
| 15 | |
| 16 Bio::DB::GDB - Database object interface to GDB HTTP query | |
| 17 | |
| 18 =head1 SYNOPSIS | |
| 19 | |
| 20 $gdb = new Bio::DB::GDB; | |
| 21 | |
| 22 $info = $gdb->get_info(-type=>'marker', | |
| 23 -id=>'D1S243'); # Marker name | |
| 24 | |
| 25 print "genbank id is ", $info->{'gdbid'}, | |
| 26 "\nprimers are (fwd, rev) ", join(",", @{$info->{'primers'}}), | |
| 27 "\nproduct length is ", $info->{'length'}, "\n"; | |
| 28 | |
| 29 =head1 DESCRIPTION | |
| 30 | |
| 31 This class allows connections to the Genome Database (GDB) and queries | |
| 32 to retrieve any database objects. See http://www.gdb.org/ or any | |
| 33 mirror for details. | |
| 34 | |
| 35 =head1 FEEDBACK | |
| 36 | |
| 37 =head2 Mailing Lists | |
| 38 | |
| 39 User feedback is an integral part of the | |
| 40 evolution of this and other Bioperl modules. Send | |
| 41 your comments and suggestions preferably to one | |
| 42 of the Bioperl mailing lists. Your participation | |
| 43 is much appreciated. | |
| 44 | |
| 45 bioperl-l@bioperl.org - General discussion | |
| 46 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 47 | |
| 48 =head2 Reporting Bugs | |
| 49 | |
| 50 Report bugs to the Bioperl bug tracking system to | |
| 51 help us keep track the bugs and their resolution. | |
| 52 Bug reports can be submitted via email or the | |
| 53 web: | |
| 54 | |
| 55 bioperl-bugs@bio.perl.org | |
| 56 http://bugzilla.bioperl.org/ | |
| 57 | |
| 58 =head1 AUTHOR - Jason Stajich | |
| 59 | |
| 60 Email jason@bioperl.org | |
| 61 | |
| 62 =head1 APPENDIX | |
| 63 | |
| 64 The rest of the documentation details each of the | |
| 65 object methods. Internal methods are usually | |
| 66 preceded with a _ | |
| 67 | |
| 68 =cut | |
| 69 | |
| 70 # Let the code begin... | |
| 71 | |
| 72 package Bio::DB::GDB; | |
| 73 use strict; | |
| 74 use Bio::Root::Root; | |
| 75 use LWP::UserAgent; | |
| 76 use HTTP::Request::Common; | |
| 77 use HTML::Parser; | |
| 78 | |
| 79 use vars qw(@ISA $BASEADDRESS %PARAMSTRING $MODVERSION); | |
| 80 | |
| 81 @ISA = qw(Bio::Root::Root); | |
| 82 | |
| 83 $MODVERSION = '0.01'; | |
| 84 $BASEADDRESS = 'http://www.gdb.org/gdb-bin/genera/genera/hgd/GenomicSegment'; | |
| 85 %PARAMSTRING = ( | |
| 86 gene => { '!action' => 'query' }, | |
| 87 marker => { '!action' => 'query' }, | |
| 88 ); | |
| 89 | |
| 90 # the new way to make modules a little more lightweight | |
| 91 sub new { | |
| 92 my($class,@args) = @_; | |
| 93 my $self = $class->SUPER::new(@args); | |
| 94 | |
| 95 my $ua = new LWP::UserAgent; | |
| 96 $ua->agent(ref($self) ."/$MODVERSION"); | |
| 97 $self->ua($ua); | |
| 98 | |
| 99 return $self; | |
| 100 } | |
| 101 | |
| 102 =head2 ua | |
| 103 | |
| 104 Title : ua | |
| 105 Usage : my $ua = $self->ua or | |
| 106 $self->ua($ua) | |
| 107 Function: Get/Set a LWP::UserAgent for use | |
| 108 Returns : reference to LWP::UserAgent Object | |
| 109 Args : $ua - must be a LWP::UserAgent | |
| 110 | |
| 111 =cut | |
| 112 | |
| 113 sub ua { | |
| 114 my ($self, $ua) = @_; | |
| 115 if( defined $ua && $ua->isa("LWP::UserAgent") ) { | |
| 116 $self->{_ua} = $ua; | |
| 117 } | |
| 118 return $self->{_ua}; | |
| 119 } | |
| 120 | |
| 121 # helper method to get specific options | |
| 122 | |
| 123 =head2 get_params | |
| 124 | |
| 125 Title : get_params | |
| 126 Usage : my %params = $self->get_params($mode) | |
| 127 Function: Returns key,value pairs to be passed to query | |
| 128 for mode ('marker', 'gene') | |
| 129 Returns : a key,value pair hash | |
| 130 Args : 'marker' or 'gene' mode for retrieval | |
| 131 | |
| 132 =cut | |
| 133 | |
| 134 sub get_params { | |
| 135 my ($self, $mode) = @_; | |
| 136 return %{$PARAMSTRING{$mode}}; | |
| 137 } | |
| 138 | |
| 139 =head2 get_info | |
| 140 | |
| 141 Title : get_info | |
| 142 Usage : my $info = $self->get_info(-type => 'marker', | |
| 143 -id => 'D1S234'); | |
| 144 Function: Returns key,value pairs specific | |
| 145 Returns : a key,value pair hash | |
| 146 Args : -type => 'marker' or 'gene' mode for retrieval | |
| 147 -id => unique id to query for | |
| 148 | |
| 149 =cut | |
| 150 | |
| 151 sub get_info { | |
| 152 my ($self, @args) = @_; | |
| 153 my ( $type, $id) = $self->_rearrange([qw(TYPE ID)], @args); | |
| 154 if( !defined $type ) { | |
| 155 $self->throw("Must specify a type you are querying for"); | |
| 156 } elsif( !defined $id ) { | |
| 157 $self->throw("Must specify a id to query for"); | |
| 158 } | |
| 159 my %params = $self->get_params($type); | |
| 160 | |
| 161 $params{'displayName'} = $id; | |
| 162 | |
| 163 if( $type eq 'marker' ) { | |
| 164 # do more specific stuff? | |
| 165 } elsif( $type eq 'gene' ) { | |
| 166 # do more specific stuff? | |
| 167 } | |
| 168 my $url = $self->get_request(%params); | |
| 169 | |
| 170 my ($resp) = $self->_request($url); | |
| 171 if( ! defined $resp || ! ref($resp) ) { | |
| 172 $self->warn("Did not get any data for url ". $url->uri); | |
| 173 return undef; | |
| 174 } | |
| 175 my $content = $resp->content; | |
| 176 if( $content =~ /ERROR/ || length($resp->content) == 0 ) { | |
| 177 $self->warn("Error getting for url " . $url->uri . "!\n"); | |
| 178 return undef; | |
| 179 } | |
| 180 my (@primers, $length, $markerurl, $realname); | |
| 181 my $state = 0; | |
| 182 my $title = 0; | |
| 183 my $p; | |
| 184 $p = new HTML::Parser( api_version => 3, | |
| 185 start_h => [ sub { | |
| 186 return if( $title == 2 || $state == 3); | |
| 187 my($tag,$attr,$text) = @_; | |
| 188 return if( !defined $tag); | |
| 189 if( $tag eq 'table' ) { | |
| 190 $state = 1; | |
| 191 } elsif( $tag eq 'title' ) { | |
| 192 $title = 1; | |
| 193 } elsif( $state == 2 && | |
| 194 $tag eq 'a' && | |
| 195 $attr->{'href'} ) { | |
| 196 $state = 3; | |
| 197 if( $text =~ m(href="?(http://.+)"?\s*>) ) { | |
| 198 $markerurl = $1; | |
| 199 } | |
| 200 } | |
| 201 }, "tagname, attr, text" ], | |
| 202 end_h => [ sub { | |
| 203 return if ($title == 2 || $state == 3); | |
| 204 my ( $tag ) = @_; | |
| 205 $title = 0 if( $tag eq 'title' ); | |
| 206 }, "tagname" ], | |
| 207 text_h => [ sub { | |
| 208 return if( $title == 2 || $state == 3); | |
| 209 my($text) = @_; | |
| 210 if( $title && $text =~ /Amplimer/ ) { | |
| 211 $markerurl = 'this'; | |
| 212 $title = 2; | |
| 213 } | |
| 214 $state = 2 if( $state == 1 && $text =~ /Amplimer/); | |
| 215 }, "text" ], | |
| 216 marked_sections =>1); | |
| 217 $p->parse($content) or die "Can't open: $!"; | |
| 218 if( ! defined $markerurl ) { | |
| 219 @primers = ('notfound','notfound', '?'); | |
| 220 } elsif( $markerurl eq 'this' ) { | |
| 221 | |
| 222 } | |
| 223 else { | |
| 224 my $resp = $self->_request(GET $markerurl); | |
| 225 return undef if ( !defined $resp ); | |
| 226 $content = $resp->content(); | |
| 227 } | |
| 228 $state = 0; | |
| 229 $realname = 'unknown'; | |
| 230 my $lasttag = ''; | |
| 231 $p = HTML::Parser->new(api_version => 3, | |
| 232 start_h => [ sub { my ($tag) = @_; | |
| 233 $tag = lc $tag; | |
| 234 $lasttag = $tag; | |
| 235 if( $state == 3 && $tag eq 'dd' ) { | |
| 236 $state = 4; | |
| 237 } | |
| 238 } , 'tagname'], | |
| 239 text_h => [ sub { | |
| 240 my($text) = @_; | |
| 241 if( $text =~ /Primer Sequence/ ) { | |
| 242 $state =1; | |
| 243 } elsif( $state == 1 ) { | |
| 244 foreach my $l ( split(/\n+/,$text) ) { | |
| 245 $l =~ s/\s+(\S+)/$1/; | |
| 246 my ($name,$primer) = split(/\s+/,$l); | |
| 247 next if( !defined $name); | |
| 248 push @primers, $primer; | |
| 249 $state = 2; | |
| 250 } | |
| 251 } elsif( $state == 2 && | |
| 252 ($text =~ /Seq Min Len/i || | |
| 253 $text =~ /Seq Max Len/i) ) { | |
| 254 $state = 3; | |
| 255 } elsif ( $state == 4 ) { | |
| 256 my ($len) = ( $text =~ /(\d+\.\d+)/ | |
| 257 ); | |
| 258 $length = $len; | |
| 259 $length *= 1000 if( $len < 1 ); | |
| 260 $state = 0; | |
| 261 } elsif( $lasttag eq 'dd' && | |
| 262 $text =~ /(GDB:\d+)/i ) { | |
| 263 $realname = $1; | |
| 264 } | |
| 265 } , "text" ], | |
| 266 marked_sections =>1, | |
| 267 ); | |
| 268 $p->parse($content) || die "Can't open: $!"; | |
| 269 | |
| 270 return { 'gdbid' => $realname, 'length' => $length, 'primers' => \@primers }; | |
| 271 } | |
| 272 | |
| 273 =head2 get_request | |
| 274 | |
| 275 Title : get_request | |
| 276 Usage : my $url = $self->get_request | |
| 277 Function: HTTP::Request | |
| 278 Returns : | |
| 279 Args : %qualifiers = a hash of qualifiers (ids, format, etc) | |
| 280 | |
| 281 =cut | |
| 282 | |
| 283 sub get_request { | |
| 284 my ($self, %params) = @_; | |
| 285 if( ! %params ) { | |
| 286 $self->throw("must provide parameters with which to query"); | |
| 287 } | |
| 288 my $url = $BASEADDRESS; | |
| 289 my $querystr = '?' . join("&", map { "$_=$params{$_}" } keys %params); | |
| 290 return GET $url . $querystr; | |
| 291 } | |
| 292 | |
| 293 # private methods | |
| 294 sub _request { | |
| 295 | |
| 296 my ($self, $url,$tmpfile) = @_; | |
| 297 my ($resp); | |
| 298 if( defined $tmpfile && $tmpfile ne '' ) { | |
| 299 $resp = $self->ua->request($url, $tmpfile); | |
| 300 } else { $resp = $self->ua->request($url); } | |
| 301 | |
| 302 if( $resp->is_error ) { | |
| 303 $self->warn($resp->as_string() . "\nError getting for url " . | |
| 304 $url->uri . "!\n"); | |
| 305 return undef; | |
| 306 } | |
| 307 return $resp; | |
| 308 } | |
| 309 | |
| 310 sub _gdb_search_tag_start { | |
| 311 | |
| 312 } | |
| 313 | |
| 314 1; | |
| 315 __END__ |
