0
|
1 # $Id: GDB.pm,v 1.12 2002/12/01 00:05:19 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::DB::GenBank
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #
|
|
11 # POD documentation - main docs before the code
|
|
12 #
|
|
13
|
|
14 =head1 NAME
|
|
15
|
|
16 Bio::DB::GDB - Database object interface to GDB HTTP query
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 $gdb = new Bio::DB::GDB;
|
|
21
|
|
22 $info = $gdb->get_info(-type=>'marker',
|
|
23 -id=>'D1S243'); # Marker name
|
|
24
|
|
25 print "genbank id is ", $info->{'gdbid'},
|
|
26 "\nprimers are (fwd, rev) ", join(",", @{$info->{'primers'}}),
|
|
27 "\nproduct length is ", $info->{'length'}, "\n";
|
|
28
|
|
29 =head1 DESCRIPTION
|
|
30
|
|
31 This class allows connections to the Genome Database (GDB) and queries
|
|
32 to retrieve any database objects. See http://www.gdb.org/ or any
|
|
33 mirror for details.
|
|
34
|
|
35 =head1 FEEDBACK
|
|
36
|
|
37 =head2 Mailing Lists
|
|
38
|
|
39 User feedback is an integral part of the
|
|
40 evolution of this and other Bioperl modules. Send
|
|
41 your comments and suggestions preferably to one
|
|
42 of the Bioperl mailing lists. Your participation
|
|
43 is much appreciated.
|
|
44
|
|
45 bioperl-l@bioperl.org - General discussion
|
|
46 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
47
|
|
48 =head2 Reporting Bugs
|
|
49
|
|
50 Report bugs to the Bioperl bug tracking system to
|
|
51 help us keep track the bugs and their resolution.
|
|
52 Bug reports can be submitted via email or the
|
|
53 web:
|
|
54
|
|
55 bioperl-bugs@bio.perl.org
|
|
56 http://bugzilla.bioperl.org/
|
|
57
|
|
58 =head1 AUTHOR - Jason Stajich
|
|
59
|
|
60 Email jason@bioperl.org
|
|
61
|
|
62 =head1 APPENDIX
|
|
63
|
|
64 The rest of the documentation details each of the
|
|
65 object methods. Internal methods are usually
|
|
66 preceded with a _
|
|
67
|
|
68 =cut
|
|
69
|
|
70 # Let the code begin...
|
|
71
|
|
72 package Bio::DB::GDB;
|
|
73 use strict;
|
|
74 use Bio::Root::Root;
|
|
75 use LWP::UserAgent;
|
|
76 use HTTP::Request::Common;
|
|
77 use HTML::Parser;
|
|
78
|
|
79 use vars qw(@ISA $BASEADDRESS %PARAMSTRING $MODVERSION);
|
|
80
|
|
81 @ISA = qw(Bio::Root::Root);
|
|
82
|
|
83 $MODVERSION = '0.01';
|
|
84 $BASEADDRESS = 'http://www.gdb.org/gdb-bin/genera/genera/hgd/GenomicSegment';
|
|
85 %PARAMSTRING = (
|
|
86 gene => { '!action' => 'query' },
|
|
87 marker => { '!action' => 'query' },
|
|
88 );
|
|
89
|
|
90 # the new way to make modules a little more lightweight
|
|
91 sub new {
|
|
92 my($class,@args) = @_;
|
|
93 my $self = $class->SUPER::new(@args);
|
|
94
|
|
95 my $ua = new LWP::UserAgent;
|
|
96 $ua->agent(ref($self) ."/$MODVERSION");
|
|
97 $self->ua($ua);
|
|
98
|
|
99 return $self;
|
|
100 }
|
|
101
|
|
102 =head2 ua
|
|
103
|
|
104 Title : ua
|
|
105 Usage : my $ua = $self->ua or
|
|
106 $self->ua($ua)
|
|
107 Function: Get/Set a LWP::UserAgent for use
|
|
108 Returns : reference to LWP::UserAgent Object
|
|
109 Args : $ua - must be a LWP::UserAgent
|
|
110
|
|
111 =cut
|
|
112
|
|
113 sub ua {
|
|
114 my ($self, $ua) = @_;
|
|
115 if( defined $ua && $ua->isa("LWP::UserAgent") ) {
|
|
116 $self->{_ua} = $ua;
|
|
117 }
|
|
118 return $self->{_ua};
|
|
119 }
|
|
120
|
|
121 # helper method to get specific options
|
|
122
|
|
123 =head2 get_params
|
|
124
|
|
125 Title : get_params
|
|
126 Usage : my %params = $self->get_params($mode)
|
|
127 Function: Returns key,value pairs to be passed to query
|
|
128 for mode ('marker', 'gene')
|
|
129 Returns : a key,value pair hash
|
|
130 Args : 'marker' or 'gene' mode for retrieval
|
|
131
|
|
132 =cut
|
|
133
|
|
134 sub get_params {
|
|
135 my ($self, $mode) = @_;
|
|
136 return %{$PARAMSTRING{$mode}};
|
|
137 }
|
|
138
|
|
139 =head2 get_info
|
|
140
|
|
141 Title : get_info
|
|
142 Usage : my $info = $self->get_info(-type => 'marker',
|
|
143 -id => 'D1S234');
|
|
144 Function: Returns key,value pairs specific
|
|
145 Returns : a key,value pair hash
|
|
146 Args : -type => 'marker' or 'gene' mode for retrieval
|
|
147 -id => unique id to query for
|
|
148
|
|
149 =cut
|
|
150
|
|
151 sub get_info {
|
|
152 my ($self, @args) = @_;
|
|
153 my ( $type, $id) = $self->_rearrange([qw(TYPE ID)], @args);
|
|
154 if( !defined $type ) {
|
|
155 $self->throw("Must specify a type you are querying for");
|
|
156 } elsif( !defined $id ) {
|
|
157 $self->throw("Must specify a id to query for");
|
|
158 }
|
|
159 my %params = $self->get_params($type);
|
|
160
|
|
161 $params{'displayName'} = $id;
|
|
162
|
|
163 if( $type eq 'marker' ) {
|
|
164 # do more specific stuff?
|
|
165 } elsif( $type eq 'gene' ) {
|
|
166 # do more specific stuff?
|
|
167 }
|
|
168 my $url = $self->get_request(%params);
|
|
169
|
|
170 my ($resp) = $self->_request($url);
|
|
171 if( ! defined $resp || ! ref($resp) ) {
|
|
172 $self->warn("Did not get any data for url ". $url->uri);
|
|
173 return undef;
|
|
174 }
|
|
175 my $content = $resp->content;
|
|
176 if( $content =~ /ERROR/ || length($resp->content) == 0 ) {
|
|
177 $self->warn("Error getting for url " . $url->uri . "!\n");
|
|
178 return undef;
|
|
179 }
|
|
180 my (@primers, $length, $markerurl, $realname);
|
|
181 my $state = 0;
|
|
182 my $title = 0;
|
|
183 my $p;
|
|
184 $p = new HTML::Parser( api_version => 3,
|
|
185 start_h => [ sub {
|
|
186 return if( $title == 2 || $state == 3);
|
|
187 my($tag,$attr,$text) = @_;
|
|
188 return if( !defined $tag);
|
|
189 if( $tag eq 'table' ) {
|
|
190 $state = 1;
|
|
191 } elsif( $tag eq 'title' ) {
|
|
192 $title = 1;
|
|
193 } elsif( $state == 2 &&
|
|
194 $tag eq 'a' &&
|
|
195 $attr->{'href'} ) {
|
|
196 $state = 3;
|
|
197 if( $text =~ m(href="?(http://.+)"?\s*>) ) {
|
|
198 $markerurl = $1;
|
|
199 }
|
|
200 }
|
|
201 }, "tagname, attr, text" ],
|
|
202 end_h => [ sub {
|
|
203 return if ($title == 2 || $state == 3);
|
|
204 my ( $tag ) = @_;
|
|
205 $title = 0 if( $tag eq 'title' );
|
|
206 }, "tagname" ],
|
|
207 text_h => [ sub {
|
|
208 return if( $title == 2 || $state == 3);
|
|
209 my($text) = @_;
|
|
210 if( $title && $text =~ /Amplimer/ ) {
|
|
211 $markerurl = 'this';
|
|
212 $title = 2;
|
|
213 }
|
|
214 $state = 2 if( $state == 1 && $text =~ /Amplimer/);
|
|
215 }, "text" ],
|
|
216 marked_sections =>1);
|
|
217 $p->parse($content) or die "Can't open: $!";
|
|
218 if( ! defined $markerurl ) {
|
|
219 @primers = ('notfound','notfound', '?');
|
|
220 } elsif( $markerurl eq 'this' ) {
|
|
221
|
|
222 }
|
|
223 else {
|
|
224 my $resp = $self->_request(GET $markerurl);
|
|
225 return undef if ( !defined $resp );
|
|
226 $content = $resp->content();
|
|
227 }
|
|
228 $state = 0;
|
|
229 $realname = 'unknown';
|
|
230 my $lasttag = '';
|
|
231 $p = HTML::Parser->new(api_version => 3,
|
|
232 start_h => [ sub { my ($tag) = @_;
|
|
233 $tag = lc $tag;
|
|
234 $lasttag = $tag;
|
|
235 if( $state == 3 && $tag eq 'dd' ) {
|
|
236 $state = 4;
|
|
237 }
|
|
238 } , 'tagname'],
|
|
239 text_h => [ sub {
|
|
240 my($text) = @_;
|
|
241 if( $text =~ /Primer Sequence/ ) {
|
|
242 $state =1;
|
|
243 } elsif( $state == 1 ) {
|
|
244 foreach my $l ( split(/\n+/,$text) ) {
|
|
245 $l =~ s/\s+(\S+)/$1/;
|
|
246 my ($name,$primer) = split(/\s+/,$l);
|
|
247 next if( !defined $name);
|
|
248 push @primers, $primer;
|
|
249 $state = 2;
|
|
250 }
|
|
251 } elsif( $state == 2 &&
|
|
252 ($text =~ /Seq Min Len/i ||
|
|
253 $text =~ /Seq Max Len/i) ) {
|
|
254 $state = 3;
|
|
255 } elsif ( $state == 4 ) {
|
|
256 my ($len) = ( $text =~ /(\d+\.\d+)/
|
|
257 );
|
|
258 $length = $len;
|
|
259 $length *= 1000 if( $len < 1 );
|
|
260 $state = 0;
|
|
261 } elsif( $lasttag eq 'dd' &&
|
|
262 $text =~ /(GDB:\d+)/i ) {
|
|
263 $realname = $1;
|
|
264 }
|
|
265 } , "text" ],
|
|
266 marked_sections =>1,
|
|
267 );
|
|
268 $p->parse($content) || die "Can't open: $!";
|
|
269
|
|
270 return { 'gdbid' => $realname, 'length' => $length, 'primers' => \@primers };
|
|
271 }
|
|
272
|
|
273 =head2 get_request
|
|
274
|
|
275 Title : get_request
|
|
276 Usage : my $url = $self->get_request
|
|
277 Function: HTTP::Request
|
|
278 Returns :
|
|
279 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
|
|
280
|
|
281 =cut
|
|
282
|
|
283 sub get_request {
|
|
284 my ($self, %params) = @_;
|
|
285 if( ! %params ) {
|
|
286 $self->throw("must provide parameters with which to query");
|
|
287 }
|
|
288 my $url = $BASEADDRESS;
|
|
289 my $querystr = '?' . join("&", map { "$_=$params{$_}" } keys %params);
|
|
290 return GET $url . $querystr;
|
|
291 }
|
|
292
|
|
293 # private methods
|
|
294 sub _request {
|
|
295
|
|
296 my ($self, $url,$tmpfile) = @_;
|
|
297 my ($resp);
|
|
298 if( defined $tmpfile && $tmpfile ne '' ) {
|
|
299 $resp = $self->ua->request($url, $tmpfile);
|
|
300 } else { $resp = $self->ua->request($url); }
|
|
301
|
|
302 if( $resp->is_error ) {
|
|
303 $self->warn($resp->as_string() . "\nError getting for url " .
|
|
304 $url->uri . "!\n");
|
|
305 return undef;
|
|
306 }
|
|
307 return $resp;
|
|
308 }
|
|
309
|
|
310 sub _gdb_search_tag_start {
|
|
311
|
|
312 }
|
|
313
|
|
314 1;
|
|
315 __END__
|