Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/DBFetch.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # | |
| 2 # $Id: DBFetch.pm,v 1.8 2002/12/22 22:02:13 lstein Exp $ | |
| 3 # | |
| 4 # BioPerl module for Bio::DB::DBFetch | |
| 5 # | |
| 6 # Cared for by Heikki Lehvaslaiho <Heikki@ebi.ac.uk> | |
| 7 # | |
| 8 # Copyright Heikki Lehvaslaiho | |
| 9 # | |
| 10 # You may distribute this module under the same terms as perl itself | |
| 11 | |
| 12 # POD documentation - main docs before the code | |
| 13 | |
| 14 =head1 NAME | |
| 15 | |
| 16 Bio::DB::DBFetch - Database object for retrieving using the dbfetch script | |
| 17 | |
| 18 =head1 SYNOPSIS | |
| 19 | |
| 20 #do not use this module directly | |
| 21 | |
| 22 =head1 DESCRIPTION | |
| 23 | |
| 24 Allows the dynamic retrieval of entries from databases using the | |
| 25 dbfetch script at EBI: | |
| 26 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>. | |
| 27 | |
| 28 In order to make changes transparent we have host type (currently only | |
| 29 ebi) and location (defaults to ebi) separated out. This allows later | |
| 30 additions of more servers in different geographical locations. | |
| 31 | |
| 32 This is a superclass which is called by instantiable subclasses with | |
| 33 correct parameters. | |
| 34 | |
| 35 =head1 FEEDBACK | |
| 36 | |
| 37 =head2 Mailing Lists | |
| 38 | |
| 39 User feedback is an integral part of the evolution of this and other | |
| 40 Bioperl modules. Send your comments and suggestions preferably to one | |
| 41 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 42 | |
| 43 bioperl-l@bioperl.org - General discussion | |
| 44 http://bio.perl.org/MailList.html - About the mailing lists | |
| 45 | |
| 46 =head2 Reporting Bugs | |
| 47 | |
| 48 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 49 the bugs and their resolution. Bug reports can be submitted via email | |
| 50 or the web: | |
| 51 | |
| 52 bioperl-bugs@bio.perl.org | |
| 53 http://bugzilla.bioperl.org/ | |
| 54 | |
| 55 =head1 AUTHOR - Heikki Lehvaslaiho | |
| 56 | |
| 57 Email Heikki Lehvaslaiho E<lt>Heikki@ebi.ac.ukE<gt> | |
| 58 | |
| 59 =head1 APPENDIX | |
| 60 | |
| 61 The rest of the documentation details each of the object | |
| 62 methods. Internal methods are usually preceded with a _ | |
| 63 | |
| 64 =cut | |
| 65 | |
| 66 # Let the code begin... | |
| 67 | |
| 68 package Bio::DB::DBFetch; | |
| 69 use strict; | |
| 70 use vars qw(@ISA $MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION | |
| 71 $DEFAULTSERVERTYPE); | |
| 72 | |
| 73 $MODVERSION = '0.1'; | |
| 74 use HTTP::Request::Common; | |
| 75 use Bio::DB::WebDBSeqI; | |
| 76 | |
| 77 @ISA = qw(Bio::DB::WebDBSeqI); | |
| 78 | |
| 79 # the new way to make modules a little more lightweight | |
| 80 | |
| 81 BEGIN { | |
| 82 # global vars | |
| 83 $DEFAULTSERVERTYPE = 'dbfetch'; | |
| 84 $DEFAULTLOCATION = 'ebi'; | |
| 85 } | |
| 86 | |
| 87 | |
| 88 =head1 Routines from Bio::DB::WebDBSeqI | |
| 89 | |
| 90 =head2 get_request | |
| 91 | |
| 92 Title : get_request | |
| 93 Usage : my $url = $self->get_request | |
| 94 Function: returns a HTTP::Request object | |
| 95 Returns : | |
| 96 Args : %qualifiers = a hash of qualifiers (ids, format, etc) | |
| 97 | |
| 98 =cut | |
| 99 | |
| 100 sub get_request { | |
| 101 my ($self, @qualifiers) = @_; | |
| 102 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)], | |
| 103 @qualifiers); | |
| 104 | |
| 105 $self->throw("Must specify a value for UIDs to fetch") | |
| 106 unless defined $uids; | |
| 107 my $tmp; | |
| 108 my $format_string = ''; | |
| 109 $format ||= $self->default_format; | |
| 110 ($format, $tmp) = $self->request_format($format); | |
| 111 $format_string = "&format=$format" if $format ne $self->default_format; | |
| 112 my $url = $self->location_url(); | |
| 113 my $uid; | |
| 114 if( ref($uids) =~ /ARRAY/i ) { | |
| 115 $uid = join (',', @$uids); | |
| 116 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.') | |
| 117 if scalar @$uids >50; | |
| 118 } else { | |
| 119 $uid = $uids; | |
| 120 } | |
| 121 | |
| 122 return GET $url. $format_string. '&id='. $uid; | |
| 123 } | |
| 124 | |
| 125 | |
| 126 =head2 postprocess_data | |
| 127 | |
| 128 Title : postprocess_data | |
| 129 Usage : $self->postprocess_data ( 'type' => 'string', | |
| 130 'location' => \$datastr); | |
| 131 Function: process downloaded data before loading into a Bio::SeqIO | |
| 132 Returns : void | |
| 133 Args : hash with two keys - 'type' can be 'string' or 'file' | |
| 134 - 'location' either file location or string | |
| 135 reference containing data | |
| 136 | |
| 137 =cut | |
| 138 | |
| 139 # remove occasional blank lines at top of web output | |
| 140 sub postprocess_data { | |
| 141 my ($self, %args) = @_; | |
| 142 if ($args{type} eq 'string') { | |
| 143 ${$args{location}} =~ s/^\s+//; # get rid of leading whitespace | |
| 144 } | |
| 145 elsif ($args{type} eq 'file') { | |
| 146 open F,$args{location} or $self->throw("Cannot open $args{location}: $!"); | |
| 147 my @data = <F>; | |
| 148 for (@data) { | |
| 149 last unless /^\s+$/; | |
| 150 shift @data; | |
| 151 } | |
| 152 open F,">$args{location}" or $self->throw("Cannot write to $args{location}: $!"); | |
| 153 print F @data; | |
| 154 close F; | |
| 155 } | |
| 156 } | |
| 157 | |
| 158 =head2 default_format | |
| 159 | |
| 160 Title : default_format | |
| 161 Usage : my $format = $self->default_format | |
| 162 Function: Returns default sequence format for this module | |
| 163 Returns : string | |
| 164 Args : none | |
| 165 | |
| 166 =cut | |
| 167 | |
| 168 sub default_format { | |
| 169 my ($self) = @_; | |
| 170 return $self->{'_default_format'}; | |
| 171 } | |
| 172 | |
| 173 =head1 Bio::DB::DBFetch specific routines | |
| 174 | |
| 175 =head2 get_Stream_by_id | |
| 176 | |
| 177 Title : get_Stream_by_id | |
| 178 Usage : $seq = $db->get_Stream_by_id($ref); | |
| 179 Function: Retrieves Seq objects from the server 'en masse', rather than one | |
| 180 at a time. For large numbers of sequences, this is far superior | |
| 181 than get_Stream_by_[id/acc](). | |
| 182 Example : | |
| 183 Returns : a Bio::SeqIO stream object | |
| 184 Args : $ref : either an array reference, a filename, or a filehandle | |
| 185 from which to get the list of unique ids/accession numbers. | |
| 186 | |
| 187 NOTE: for backward compatibility, this method is also called | |
| 188 get_Stream_by_batch. | |
| 189 | |
| 190 =cut | |
| 191 | |
| 192 sub get_Stream_by_id { | |
| 193 my ($self, $ids) = @_; | |
| 194 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch'); | |
| 195 } | |
| 196 | |
| 197 =head2 get_Seq_by_version | |
| 198 | |
| 199 Title : get_Seq_by_version | |
| 200 Usage : $seq = $db->get_Seq_by_version('X77802.1'); | |
| 201 Function: Gets a Bio::Seq object by accession number | |
| 202 Returns : A Bio::Seq object | |
| 203 Args : version number (as a string) | |
| 204 Throws : "version does not exist" exception | |
| 205 | |
| 206 =cut | |
| 207 | |
| 208 sub get_Seq_by_version { | |
| 209 my ($self,$seqid) = @_; | |
| 210 my $seqio = $self->get_Stream_by_acc([$seqid]); | |
| 211 $self->throw("version does not exist") if( !defined $seqio ); | |
| 212 return $seqio->next_seq(); | |
| 213 } | |
| 214 | |
| 215 =head2 request_format | |
| 216 | |
| 217 Title : request_format | |
| 218 Usage : my ($req_format, $ioformat) = $self->request_format; | |
| 219 $self->request_format("genbank"); | |
| 220 $self->request_format("fasta"); | |
| 221 Function: Get/Set sequence format retrieval. The get-form will normally not | |
| 222 be used outside of this and derived modules. | |
| 223 Returns : Array of two strings, the first representing the format for | |
| 224 retrieval, and the second specifying the corresponding SeqIO format. | |
| 225 Args : $format = sequence format | |
| 226 | |
| 227 =cut | |
| 228 | |
| 229 sub request_format { | |
| 230 my ($self, $value) = @_; | |
| 231 if( defined $value ) { | |
| 232 $value = lc $value; | |
| 233 $self->{'_format'} = $value; | |
| 234 return ($value, $value); | |
| 235 } | |
| 236 $value = $self->{'_format'}; | |
| 237 if( $value and defined $self->formatmap->{$value} ) { | |
| 238 return ($value, $self->formatmap->{$value}); | |
| 239 } else { | |
| 240 # Try to fall back to a default. | |
| 241 return ($self->default_format, $self->default_format ); | |
| 242 } | |
| 243 } | |
| 244 | |
| 245 | |
| 246 =head2 servertype | |
| 247 | |
| 248 Title : servertype | |
| 249 Usage : my $servertype = $self->servertype | |
| 250 $self->servertype($servertype); | |
| 251 Function: Get/Set server type | |
| 252 Returns : string | |
| 253 Args : server type string [optional] | |
| 254 | |
| 255 =cut | |
| 256 | |
| 257 sub servertype { | |
| 258 my ($self, $servertype) = @_; | |
| 259 if( defined $servertype && $servertype ne '') { | |
| 260 $self->throw("You gave an invalid server type ($servertype)". | |
| 261 " - available types are ". | |
| 262 keys %{$self->hosts}) unless( $self->hosts->{$servertype} ); | |
| 263 $self->{'_servertype'} = $servertype; | |
| 264 } | |
| 265 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'}; | |
| 266 return $self->{'_servertype'}; | |
| 267 } | |
| 268 | |
| 269 =head2 hostlocation | |
| 270 | |
| 271 Title : hostlocation | |
| 272 Usage : my $location = $self->hostlocation() | |
| 273 $self->hostlocation($location) | |
| 274 Function: Set/Get Hostlocation | |
| 275 Returns : string representing hostlocation | |
| 276 Args : string specifying hostlocation [optional] | |
| 277 | |
| 278 =cut | |
| 279 | |
| 280 sub hostlocation { | |
| 281 my ($self, $location ) = @_; | |
| 282 $location = lc $location; | |
| 283 my $servertype = $self->servertype; | |
| 284 $self->throw("Must have a valid servertype defined not $servertype") | |
| 285 unless defined $servertype; | |
| 286 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}}; | |
| 287 if( defined $location && $location ne '' ) { | |
| 288 if( ! $hosts{$location} ) { | |
| 289 $self->throw("Must specify a known host, not $location,". | |
| 290 " possible values (". | |
| 291 join(",", sort keys %hosts ). ")"); | |
| 292 } | |
| 293 $self->{'_hostlocation'} = $location; | |
| 294 } | |
| 295 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'}; | |
| 296 return $self->{'_hostlocation'}; | |
| 297 } | |
| 298 | |
| 299 =head2 location_url | |
| 300 | |
| 301 Title : location | |
| 302 Usage : my $url = $self->location_url() | |
| 303 Function: Get host url | |
| 304 Returns : string representing url | |
| 305 Args : none | |
| 306 | |
| 307 =cut | |
| 308 | |
| 309 sub location_url { | |
| 310 my ($self) = @_; | |
| 311 my $servertype = $self->servertype(); | |
| 312 my $location = $self->hostlocation(); | |
| 313 if( ! defined $location || !defined $servertype ) { | |
| 314 $self->throw("must have a valid hostlocation and servertype set before calling location_url"); | |
| 315 } | |
| 316 return sprintf($self->hosts->{$servertype}->{'baseurl'}, | |
| 317 $self->hosts->{$servertype}->{'hosts'}->{$location}); | |
| 318 } | |
| 319 | |
| 320 =head1 Bio::DB::DBFetch routines | |
| 321 | |
| 322 These methods allow subclasses to pass parameters. | |
| 323 | |
| 324 =head2 hosts | |
| 325 | |
| 326 Title : hosts | |
| 327 Usage : | |
| 328 Function: get/set for host hash | |
| 329 Returns : | |
| 330 Args : optional hash | |
| 331 | |
| 332 =cut | |
| 333 | |
| 334 sub hosts { | |
| 335 my ($self, $value) = @_; | |
| 336 if (defined $value) { | |
| 337 $self->{'_hosts'} = $value; | |
| 338 } | |
| 339 unless (exists $self->{'_hosts'}) { | |
| 340 return (''); | |
| 341 } else { | |
| 342 return $self->{'_hosts'}; | |
| 343 } | |
| 344 } | |
| 345 | |
| 346 =head2 formatmap | |
| 347 | |
| 348 Title : formatmap | |
| 349 Usage : | |
| 350 Function: get/set for format hash | |
| 351 Returns : | |
| 352 Args : optional hash | |
| 353 | |
| 354 =cut | |
| 355 | |
| 356 sub formatmap { | |
| 357 my ($self, $value) = @_; | |
| 358 if (defined $value) { | |
| 359 $self->{'_formatmap'} = $value; | |
| 360 } | |
| 361 unless (exists $self->{'_formatmap'}) { | |
| 362 return (''); | |
| 363 } else { | |
| 364 return $self->{'_formatmap'}; | |
| 365 } | |
| 366 } | |
| 367 | |
| 368 | |
| 369 1; | |
| 370 __END__ |
