comparison variant_effect_predictor/Bio/DB/GDB.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: GDB.pm,v 1.12 2002/12/01 00:05:19 jason Exp $
2 #
3 # BioPerl module for Bio::DB::GenBank
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12 #
13
14 =head1 NAME
15
16 Bio::DB::GDB - Database object interface to GDB HTTP query
17
18 =head1 SYNOPSIS
19
20 $gdb = new Bio::DB::GDB;
21
22 $info = $gdb->get_info(-type=>'marker',
23 -id=>'D1S243'); # Marker name
24
25 print "genbank id is ", $info->{'gdbid'},
26 "\nprimers are (fwd, rev) ", join(",", @{$info->{'primers'}}),
27 "\nproduct length is ", $info->{'length'}, "\n";
28
29 =head1 DESCRIPTION
30
31 This class allows connections to the Genome Database (GDB) and queries
32 to retrieve any database objects. See http://www.gdb.org/ or any
33 mirror for details.
34
35 =head1 FEEDBACK
36
37 =head2 Mailing Lists
38
39 User feedback is an integral part of the
40 evolution of this and other Bioperl modules. Send
41 your comments and suggestions preferably to one
42 of the Bioperl mailing lists. Your participation
43 is much appreciated.
44
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/MailList.shtml - About the mailing lists
47
48 =head2 Reporting Bugs
49
50 Report bugs to the Bioperl bug tracking system to
51 help us keep track the bugs and their resolution.
52 Bug reports can be submitted via email or the
53 web:
54
55 bioperl-bugs@bio.perl.org
56 http://bugzilla.bioperl.org/
57
58 =head1 AUTHOR - Jason Stajich
59
60 Email jason@bioperl.org
61
62 =head1 APPENDIX
63
64 The rest of the documentation details each of the
65 object methods. Internal methods are usually
66 preceded with a _
67
68 =cut
69
70 # Let the code begin...
71
72 package Bio::DB::GDB;
73 use strict;
74 use Bio::Root::Root;
75 use LWP::UserAgent;
76 use HTTP::Request::Common;
77 use HTML::Parser;
78
79 use vars qw(@ISA $BASEADDRESS %PARAMSTRING $MODVERSION);
80
81 @ISA = qw(Bio::Root::Root);
82
83 $MODVERSION = '0.01';
84 $BASEADDRESS = 'http://www.gdb.org/gdb-bin/genera/genera/hgd/GenomicSegment';
85 %PARAMSTRING = (
86 gene => { '!action' => 'query' },
87 marker => { '!action' => 'query' },
88 );
89
90 # the new way to make modules a little more lightweight
91 sub new {
92 my($class,@args) = @_;
93 my $self = $class->SUPER::new(@args);
94
95 my $ua = new LWP::UserAgent;
96 $ua->agent(ref($self) ."/$MODVERSION");
97 $self->ua($ua);
98
99 return $self;
100 }
101
102 =head2 ua
103
104 Title : ua
105 Usage : my $ua = $self->ua or
106 $self->ua($ua)
107 Function: Get/Set a LWP::UserAgent for use
108 Returns : reference to LWP::UserAgent Object
109 Args : $ua - must be a LWP::UserAgent
110
111 =cut
112
113 sub ua {
114 my ($self, $ua) = @_;
115 if( defined $ua && $ua->isa("LWP::UserAgent") ) {
116 $self->{_ua} = $ua;
117 }
118 return $self->{_ua};
119 }
120
121 # helper method to get specific options
122
123 =head2 get_params
124
125 Title : get_params
126 Usage : my %params = $self->get_params($mode)
127 Function: Returns key,value pairs to be passed to query
128 for mode ('marker', 'gene')
129 Returns : a key,value pair hash
130 Args : 'marker' or 'gene' mode for retrieval
131
132 =cut
133
134 sub get_params {
135 my ($self, $mode) = @_;
136 return %{$PARAMSTRING{$mode}};
137 }
138
139 =head2 get_info
140
141 Title : get_info
142 Usage : my $info = $self->get_info(-type => 'marker',
143 -id => 'D1S234');
144 Function: Returns key,value pairs specific
145 Returns : a key,value pair hash
146 Args : -type => 'marker' or 'gene' mode for retrieval
147 -id => unique id to query for
148
149 =cut
150
151 sub get_info {
152 my ($self, @args) = @_;
153 my ( $type, $id) = $self->_rearrange([qw(TYPE ID)], @args);
154 if( !defined $type ) {
155 $self->throw("Must specify a type you are querying for");
156 } elsif( !defined $id ) {
157 $self->throw("Must specify a id to query for");
158 }
159 my %params = $self->get_params($type);
160
161 $params{'displayName'} = $id;
162
163 if( $type eq 'marker' ) {
164 # do more specific stuff?
165 } elsif( $type eq 'gene' ) {
166 # do more specific stuff?
167 }
168 my $url = $self->get_request(%params);
169
170 my ($resp) = $self->_request($url);
171 if( ! defined $resp || ! ref($resp) ) {
172 $self->warn("Did not get any data for url ". $url->uri);
173 return undef;
174 }
175 my $content = $resp->content;
176 if( $content =~ /ERROR/ || length($resp->content) == 0 ) {
177 $self->warn("Error getting for url " . $url->uri . "!\n");
178 return undef;
179 }
180 my (@primers, $length, $markerurl, $realname);
181 my $state = 0;
182 my $title = 0;
183 my $p;
184 $p = new HTML::Parser( api_version => 3,
185 start_h => [ sub {
186 return if( $title == 2 || $state == 3);
187 my($tag,$attr,$text) = @_;
188 return if( !defined $tag);
189 if( $tag eq 'table' ) {
190 $state = 1;
191 } elsif( $tag eq 'title' ) {
192 $title = 1;
193 } elsif( $state == 2 &&
194 $tag eq 'a' &&
195 $attr->{'href'} ) {
196 $state = 3;
197 if( $text =~ m(href="?(http://.+)"?\s*>) ) {
198 $markerurl = $1;
199 }
200 }
201 }, "tagname, attr, text" ],
202 end_h => [ sub {
203 return if ($title == 2 || $state == 3);
204 my ( $tag ) = @_;
205 $title = 0 if( $tag eq 'title' );
206 }, "tagname" ],
207 text_h => [ sub {
208 return if( $title == 2 || $state == 3);
209 my($text) = @_;
210 if( $title && $text =~ /Amplimer/ ) {
211 $markerurl = 'this';
212 $title = 2;
213 }
214 $state = 2 if( $state == 1 && $text =~ /Amplimer/);
215 }, "text" ],
216 marked_sections =>1);
217 $p->parse($content) or die "Can't open: $!";
218 if( ! defined $markerurl ) {
219 @primers = ('notfound','notfound', '?');
220 } elsif( $markerurl eq 'this' ) {
221
222 }
223 else {
224 my $resp = $self->_request(GET $markerurl);
225 return undef if ( !defined $resp );
226 $content = $resp->content();
227 }
228 $state = 0;
229 $realname = 'unknown';
230 my $lasttag = '';
231 $p = HTML::Parser->new(api_version => 3,
232 start_h => [ sub { my ($tag) = @_;
233 $tag = lc $tag;
234 $lasttag = $tag;
235 if( $state == 3 && $tag eq 'dd' ) {
236 $state = 4;
237 }
238 } , 'tagname'],
239 text_h => [ sub {
240 my($text) = @_;
241 if( $text =~ /Primer Sequence/ ) {
242 $state =1;
243 } elsif( $state == 1 ) {
244 foreach my $l ( split(/\n+/,$text) ) {
245 $l =~ s/\s+(\S+)/$1/;
246 my ($name,$primer) = split(/\s+/,$l);
247 next if( !defined $name);
248 push @primers, $primer;
249 $state = 2;
250 }
251 } elsif( $state == 2 &&
252 ($text =~ /Seq Min Len/i ||
253 $text =~ /Seq Max Len/i) ) {
254 $state = 3;
255 } elsif ( $state == 4 ) {
256 my ($len) = ( $text =~ /(\d+\.\d+)/
257 );
258 $length = $len;
259 $length *= 1000 if( $len < 1 );
260 $state = 0;
261 } elsif( $lasttag eq 'dd' &&
262 $text =~ /(GDB:\d+)/i ) {
263 $realname = $1;
264 }
265 } , "text" ],
266 marked_sections =>1,
267 );
268 $p->parse($content) || die "Can't open: $!";
269
270 return { 'gdbid' => $realname, 'length' => $length, 'primers' => \@primers };
271 }
272
273 =head2 get_request
274
275 Title : get_request
276 Usage : my $url = $self->get_request
277 Function: HTTP::Request
278 Returns :
279 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
280
281 =cut
282
283 sub get_request {
284 my ($self, %params) = @_;
285 if( ! %params ) {
286 $self->throw("must provide parameters with which to query");
287 }
288 my $url = $BASEADDRESS;
289 my $querystr = '?' . join("&", map { "$_=$params{$_}" } keys %params);
290 return GET $url . $querystr;
291 }
292
293 # private methods
294 sub _request {
295
296 my ($self, $url,$tmpfile) = @_;
297 my ($resp);
298 if( defined $tmpfile && $tmpfile ne '' ) {
299 $resp = $self->ua->request($url, $tmpfile);
300 } else { $resp = $self->ua->request($url); }
301
302 if( $resp->is_error ) {
303 $self->warn($resp->as_string() . "\nError getting for url " .
304 $url->uri . "!\n");
305 return undef;
306 }
307 return $resp;
308 }
309
310 sub _gdb_search_tag_start {
311
312 }
313
314 1;
315 __END__