Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Utils/URI.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Utils::URI | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 use Bio::EnsEMBL::Utils::URI qw/parse_uri is_uri/; | |
28 # or use Bio::EnsEMBL::Utils::URI qw/:all/; # to bring everything in | |
29 | |
30 my $db_uri = parse_uri('mysql://user@host:3157/db'); | |
31 my $http_uri = parse_uri('http://www.google.co.uk:80/search?q=t'); | |
32 | |
33 is_uri('mysql://user@host'); # returns 1 | |
34 is_uri('file:///my/path'); # returns 1 | |
35 is_uri('/my/path'); # returns 0 | |
36 | |
37 =head1 DESCRIPTION | |
38 | |
39 This object is a generic URI parser which is primarily used in the | |
40 parsing of database URIs into a more managable data structure. We also provide | |
41 the resulting URI object | |
42 | |
43 =head1 DEPENDENCIES | |
44 | |
45 L<URI::Escape> is an optional dependency but if available the code will attempt | |
46 to perform URI encoding/decoding on parameters. If you do not want this | |
47 functionality then modify the global C<$Bio::EnsEMBL::Utils::URI::URI_ESCAPE> | |
48 to false; | |
49 | |
50 =head1 METHODS | |
51 | |
52 =cut | |
53 | |
54 package Bio::EnsEMBL::Utils::URI; | |
55 | |
56 use strict; | |
57 use warnings; | |
58 | |
59 use Scalar::Util qw/looks_like_number/; | |
60 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
61 use File::Spec; | |
62 | |
63 our $URI_ESCAPE; | |
64 $URI_ESCAPE = 0; | |
65 eval { | |
66 require URI::Escape; | |
67 URI::Escape->import(); | |
68 $URI_ESCAPE = 1; | |
69 }; | |
70 | |
71 use base qw/Exporter/; | |
72 our @EXPORT_OK; | |
73 our %EXPORT_TAGS; | |
74 @EXPORT_OK = qw/parse_uri is_uri/; | |
75 %EXPORT_TAGS = ( all => [@EXPORT_OK] ); | |
76 | |
77 ####URI Parsing | |
78 | |
79 =head2 is_uri | |
80 | |
81 Arg[1] : Scalar; URI to parse | |
82 Example : is_uri('mysql://user:pass@host:415/db'); | |
83 Description : Looks for the existence of a URI scheme to decide if this | |
84 is a classical URI. Whilst non-scheme based URIs can still be | |
85 interprited it is useful to use when you need to know if | |
86 you are going to work with a URI or not | |
87 Returntype : Boolean | |
88 Caller : General | |
89 Status : Beta | |
90 | |
91 =cut | |
92 | |
93 sub is_uri { | |
94 my ($uri) = @_; | |
95 return 0 if ! $uri; | |
96 my $SCHEME = qr{ ([^:]*) :// }xms; | |
97 return ($uri =~ $SCHEME) ? 1 : 0; | |
98 } | |
99 | |
100 =head2 parse_uri | |
101 | |
102 Arg[1] : Scalar; URI to parse | |
103 Example : my $uri = parse_uri('mysql://user:pass@host:415/db'); | |
104 Description : A URL parser which attempts to convert many different types | |
105 of URL into a common data structure. | |
106 Returntype : Bio::EnsEMBL::Utils::URI | |
107 Caller : General | |
108 Status : Beta | |
109 | |
110 =cut | |
111 | |
112 sub parse_uri { | |
113 my ($url) = @_; | |
114 | |
115 my $SCHEME = qr{ ([^:]*) :// }xms; | |
116 my $USER = qr{ ([^/:\@]+)? :? ([^/\@]+)? \@ }xms; | |
117 my $HOST = qr{ ([^/:]+)? :? ([^/]+)? }xms; | |
118 my $DB = qr{ / ([^/?]+)? /? ([^/?]+)? }xms; | |
119 my $PARAMS = qr{ \? (.+)}xms; | |
120 | |
121 my $p; | |
122 | |
123 if($url =~ qr{ $SCHEME ([^?]+) (?:$PARAMS)? }xms) { | |
124 my $scheme = $1; | |
125 $scheme = ($URI_ESCAPE) ? uri_unescape($scheme) : $scheme; | |
126 $p = Bio::EnsEMBL::Utils::URI->new($scheme); | |
127 my ($locator, $params) = ($2, $3); | |
128 | |
129 if($scheme eq 'file') { | |
130 $p->path($locator); | |
131 } | |
132 elsif($scheme eq 'sqlite') { | |
133 $p->path($locator); | |
134 } | |
135 else { | |
136 if($locator =~ s/^$USER//) { | |
137 $p->user($1); | |
138 $p->pass($2); | |
139 } | |
140 if($locator =~ s/^$HOST//) { | |
141 $p->host(($URI_ESCAPE) ? uri_unescape($1) : $1); | |
142 $p->port(($URI_ESCAPE) ? uri_unescape($2) : $2); | |
143 } | |
144 | |
145 if($p->is_db_scheme() || $scheme eq q{}) { | |
146 if($locator =~ $DB) { | |
147 $p->db_params()->{dbname} = ($URI_ESCAPE) ? uri_unescape($1) : $1; | |
148 $p->db_params()->{table} = ($URI_ESCAPE) ? uri_unescape($2) : $2; | |
149 } | |
150 } | |
151 else { | |
152 $p->path($locator); | |
153 } | |
154 } | |
155 | |
156 if(defined $params) { | |
157 my @kv_pairs = split(/;|&/, $params); | |
158 foreach my $kv_string (@kv_pairs) { | |
159 my ($key, $value) = map { ($URI_ESCAPE) ? uri_unescape($_) : $_ } split(/=/, $kv_string); | |
160 $p->add_param($key, $value); | |
161 } | |
162 } | |
163 } | |
164 | |
165 return $p; | |
166 } | |
167 | |
168 ####URI Object | |
169 | |
170 =pod | |
171 | |
172 =head2 new() | |
173 | |
174 Arg[1] : String; scheme the URI will confrom to | |
175 Description : New object call | |
176 Returntype : Bio::EnsEMBL::Utils::URIParser::URI | |
177 Exceptions : Thrown if scheme is undefined. | |
178 Status : Stable | |
179 | |
180 =cut | |
181 | |
182 sub new { | |
183 my ($class, $scheme) = @_; | |
184 $class = ref($class) || $class; | |
185 throw "Scheme cannot be undefined. Empty string is allowed" if ! defined $scheme; | |
186 | |
187 my $self = bless ({ | |
188 params => {}, | |
189 param_keys => [], | |
190 db_params => {}, | |
191 scheme => $scheme, | |
192 }, $class); | |
193 | |
194 return $self; | |
195 } | |
196 | |
197 =head2 db_schemes() | |
198 | |
199 Description: Returns a hash of scheme names known to be databases | |
200 Returntype : HashRef | |
201 Exceptions : None | |
202 Status : Stable | |
203 | |
204 =cut | |
205 | |
206 sub db_schemes { | |
207 my ($self) = @_; | |
208 return {map { $_ => 1 } qw/mysql ODBC sqlite Oracle Sybase/}; | |
209 } | |
210 | |
211 | |
212 =head2 is_db_scheme() | |
213 | |
214 Description: Returns true if the code believes the scheme to be a Database | |
215 Returntype : Boolean | |
216 Exceptions : None | |
217 Status : Stable | |
218 | |
219 =cut | |
220 | |
221 sub is_db_scheme { | |
222 my ($self) = @_; | |
223 return ( exists $self->db_schemes()->{$self->scheme()} ) ? 1 : 0; | |
224 } | |
225 | |
226 =head2 scheme() | |
227 | |
228 Description : Getter for the scheme attribute | |
229 Returntype : String | |
230 Exceptions : None | |
231 Status : Stable | |
232 | |
233 =cut | |
234 | |
235 sub scheme { | |
236 my ($self) = @_; | |
237 return $self->{scheme}; | |
238 } | |
239 | |
240 =head2 path() | |
241 | |
242 Arg[1] : Setter argument | |
243 Description : Getter/setter for the path attribute | |
244 Returntype : String | |
245 Exceptions : None | |
246 Status : Stable | |
247 | |
248 =cut | |
249 | |
250 sub path { | |
251 my ($self, $path) = @_; | |
252 $self->{path} = $path if defined $path; | |
253 return $self->{path}; | |
254 } | |
255 | |
256 =head2 user() | |
257 | |
258 Arg[1] : Setter argument | |
259 Description : Getter/setter for the user attribute | |
260 Returntype : String | |
261 Exceptions : None | |
262 Status : Stable | |
263 | |
264 =cut | |
265 | |
266 sub user { | |
267 my ($self, $user) = @_; | |
268 $self->{user} = $user if defined $user; | |
269 return $self->{user}; | |
270 } | |
271 | |
272 =head2 pass() | |
273 | |
274 Arg[1] : Setter argument | |
275 Description : Getter/setter for the password attribute | |
276 Returntype : String | |
277 Exceptions : None | |
278 Status : Stable | |
279 | |
280 =cut | |
281 | |
282 sub pass { | |
283 my ($self, $pass) = @_; | |
284 $self->{pass} = $pass if defined $pass; | |
285 return $self->{pass}; | |
286 } | |
287 | |
288 =head2 host() | |
289 | |
290 Arg[1] : Setter argument | |
291 Description : Getter/setter for the host attribute | |
292 Returntype : String | |
293 Exceptions : None | |
294 Status : Stable | |
295 | |
296 =cut | |
297 | |
298 sub host { | |
299 my ($self, $host) = @_; | |
300 $self->{host} = $host if defined $host; | |
301 return $self->{host}; | |
302 } | |
303 | |
304 =head2 port() | |
305 | |
306 Arg[1] : Setter argument | |
307 Description : Getter/setter for the port attribute | |
308 Returntype : Integer | |
309 Exceptions : If port is not a number, less than 1 or not a whole integer | |
310 Status : Stable | |
311 | |
312 =cut | |
313 | |
314 sub port { | |
315 my ($self, $port) = @_; | |
316 if(defined $port) { | |
317 if(! looks_like_number($port) || $port < 1 || int($port) != $port) { | |
318 throw "Port $port is not a number, less than 1 or not a whole integer"; | |
319 } | |
320 $self->{port} = $port if defined $port; | |
321 } | |
322 return $self->{port}; | |
323 } | |
324 | |
325 =head2 param_keys() | |
326 | |
327 Description : Getter for the paramater map keys in the order they were first | |
328 seen. Keys should only appear once in this array | |
329 Returntype : ArrayRef | |
330 Exceptions : None | |
331 Status : Stable | |
332 | |
333 =cut | |
334 | |
335 sub param_keys { | |
336 my ($self) = @_; | |
337 return [@{$self->{param_keys}}]; | |
338 } | |
339 | |
340 =head2 param_exists_ci() | |
341 | |
342 Arg[1] : String; Key | |
343 Description : Performs a case-insensitive search for the given key | |
344 Returntype : Boolean; returns true if your given key was seen | |
345 Exceptions : None | |
346 Status : Stable | |
347 | |
348 =cut | |
349 | |
350 sub param_exists_ci { | |
351 my ($self, $key) = @_; | |
352 my %keys = map { uc($_) => 1 } @{$self->param_keys()}; | |
353 return ($keys{uc($key)}) ? 1 : 0; | |
354 } | |
355 | |
356 =head2 add_param() | |
357 | |
358 Arg[1] : String; key | |
359 Arg[1] : Scalar; value | |
360 Description : Add a key/value to the params map. Multiple inserts of the same | |
361 key is allowed | |
362 Returntype : None | |
363 Exceptions : None | |
364 Status : Stable | |
365 | |
366 =cut | |
367 | |
368 sub add_param { | |
369 my ($self, $key, $value) = @_; | |
370 if(!exists $self->{params}->{$key}) { | |
371 $self->{params}->{$key} = []; | |
372 push(@{$self->{param_keys}}, $key); | |
373 } | |
374 push(@{$self->{params}->{$key}}, $value); | |
375 return; | |
376 } | |
377 | |
378 =head2 get_params() | |
379 | |
380 Arg[1] : String; key | |
381 Description : Returns the values which were found to be linked to the given | |
382 key. Arrays are returned because one key can have many | |
383 values in a URI | |
384 Returntype : ArrayRef[Scalar] | |
385 Exceptions : None | |
386 Status : Stable | |
387 | |
388 =cut | |
389 | |
390 sub get_params { | |
391 my ($self, $key) = @_; | |
392 return [] if ! exists $self->{params}->{$key}; | |
393 return [@{$self->{params}->{$key}}]; | |
394 } | |
395 | |
396 =head2 db_params() | |
397 | |
398 Description : Storage of parameters used only for database URIs since | |
399 they require | |
400 Returntype : HashRef; Database name is keyed under C<dbname> and the | |
401 table is keyed under C<table> | |
402 Exceptions : None | |
403 Status : Stable | |
404 | |
405 =cut | |
406 | |
407 sub db_params { | |
408 my ($self) = @_; | |
409 return $self->{db_params}; | |
410 } | |
411 | |
412 =head2 generate_dbsql_params() | |
413 | |
414 Arg[1] : boolean $no_table alows you to avoid pushing -TABLE as an | |
415 output value | |
416 Description : Generates a Hash of Ensembl compatible parameters to be used | |
417 to construct a DB object. We combine those parameters | |
418 which are deemed to be part of the C<db_params()> method | |
419 under C<-DBNAME> and C<-TABLE>. We also search for a number | |
420 of optional parameters which are lowercased equivalents | |
421 of the construction parameters available from a | |
422 L<Bio::EnsEMBL::DBSQL::DBAdaptor>, | |
423 L<Bio::EnsEMBL::DBSQL::DBConnection> as well as C<verbose> | |
424 being supported. | |
425 | |
426 We also convert the scheme type into the driver attribute | |
427 | |
428 Returntype : Hash (not a reference). Output can be put into a C<DBConnection> | |
429 constructor. | |
430 Exceptions : None | |
431 Status : Stable | |
432 | |
433 =cut | |
434 | |
435 sub generate_dbsql_params { | |
436 my ($self, $no_table) = @_; | |
437 my %db_params; | |
438 | |
439 $db_params{-DRIVER} = $self->scheme(); | |
440 $db_params{-HOST} = $self->host() if $self->host(); | |
441 $db_params{-PORT} = $self->port() if $self->port(); | |
442 $db_params{-USER} = $self->user() if $self->user(); | |
443 $db_params{-PASS} = $self->pass() if $self->pass(); | |
444 | |
445 my $dbname; | |
446 my $table; | |
447 if($self->scheme() eq 'sqlite') { | |
448 ($dbname, $table) = $self->_decode_sqlite(); | |
449 } | |
450 else { | |
451 $dbname = $self->db_params()->{dbname}; | |
452 $table = $self->db_params()->{table}; | |
453 } | |
454 | |
455 $db_params{-DBNAME} = $dbname if $dbname; | |
456 $db_params{-TABLE} = $table if ! $no_table && $table; | |
457 | |
458 foreach my $boolean_param (qw/disconnect_when_inactive reconnect_when_connection_lost is_multispecies no_cache verbose/) { | |
459 if($self->param_exists_ci($boolean_param)) { | |
460 $db_params{q{-}.uc($boolean_param)} = 1; | |
461 } | |
462 } | |
463 foreach my $value_param (qw/species group species_id wait_timeout/) { | |
464 if($self->param_exists_ci($value_param)) { | |
465 $db_params{q{-}.uc($value_param)} = $self->get_params($value_param)->[0]; | |
466 } | |
467 } | |
468 | |
469 return %db_params; | |
470 } | |
471 | |
472 =head2 _decode_sqlite | |
473 | |
474 Description : Performs path gymnastics to decode into a number of possible | |
475 options. The issue with SQLite is that the normal URI scheme | |
476 looks like sqlite:///my/path.sqlite/table but how do we know | |
477 that the DB name is C</my/path.sqlite> and the table is | |
478 C<table>? | |
479 | |
480 The code takes a path, looks for the full path & if it cannot | |
481 be found looks for the file a directory back. In the above | |
482 example it would have looked for C</my/path.sqlite/table>, | |
483 found it to be non-existant, looked for C</my/path.sqlite> | |
484 and found it. | |
485 | |
486 If the path splitting procdure resulted in just 1 file after | |
487 the first existence check e.g. C<sqlite://db.sqlite> it assumes | |
488 that should be the name. If no file can be found we default to | |
489 the full length path. | |
490 Caller : internal | |
491 | |
492 =cut | |
493 | |
494 sub _decode_sqlite { | |
495 my ($self) = @_; | |
496 my $dbname; | |
497 my $table; | |
498 my $path = $self->path(); | |
499 if(-f $path) { | |
500 $dbname = $path; | |
501 } | |
502 else { | |
503 my ($volume, $directories, $file) = File::Spec->splitpath($path); | |
504 my @splitdirs = File::Spec->splitdir($directories); | |
505 if(@splitdirs == 1) { | |
506 $dbname = $path; | |
507 } | |
508 else { | |
509 my $new_file = pop(@splitdirs); | |
510 $new_file ||= q{}; | |
511 my $new_path = File::Spec->catpath($volume, File::Spec->catdir(@splitdirs), $new_file); | |
512 if($new_path ne File::Spec->rootdir() && -f $new_path) { | |
513 $dbname = $new_path; | |
514 $table = $file; | |
515 } | |
516 else { | |
517 $dbname = $path; | |
518 } | |
519 } | |
520 } | |
521 | |
522 $self->db_params()->{dbname} = $dbname if $dbname; | |
523 $self->db_params()->{table} = $table if $table; | |
524 | |
525 return ($dbname, $table); | |
526 } | |
527 | |
528 =head2 generate_uri() | |
529 | |
530 Description : Generates a URI string from the paramaters in this object | |
531 Returntype : String | |
532 Exceptions : None | |
533 Status : Stable | |
534 | |
535 =cut | |
536 | |
537 sub generate_uri { | |
538 my ($self) = @_; | |
539 my $scheme = sprintf('%s://', ($URI_ESCAPE) ? uri_escape($self->scheme()) : $self->scheme()); | |
540 my $user_credentials = q{}; | |
541 my $host_credentials = q{}; | |
542 my $location = q{}; | |
543 | |
544 if($self->user() || $self->pass()) { | |
545 my $user = $self->user(); | |
546 my $pass = $self->pass(); | |
547 if($URI_ESCAPE) { | |
548 $user = uri_escape($user) if $user; | |
549 $pass = uri_escape($pass) if $pass; | |
550 } | |
551 $user_credentials = sprintf('%s%s@', | |
552 ( $user ? $user : q{} ), | |
553 ( $pass ? q{:}.$pass : q{} ) | |
554 ); | |
555 } | |
556 | |
557 if($self->host() || $self->port()) { | |
558 my $host = $self->host(); | |
559 my $port = $self->port(); | |
560 if($URI_ESCAPE) { | |
561 $host = uri_escape($host) if $host; | |
562 $port = uri_escape($port) if $port; | |
563 } | |
564 $host_credentials = sprintf('%s%s', | |
565 ( $host ? $host : q{} ), | |
566 ( $port ? q{:}.$port : q{} ) | |
567 ); | |
568 } | |
569 | |
570 if($self->is_db_scheme() || $self->scheme() eq '') { | |
571 if($self->scheme() eq 'sqlite') { | |
572 if(! $self->path()) { | |
573 my $tmp_loc = $self->db_params()->{dbname}; | |
574 throw "There is no dbname available" unless $tmp_loc; | |
575 $tmp_loc .= q{/}.$self->db_params()->{table} if $self->db_params()->{table}; | |
576 $self->path($tmp_loc); | |
577 } | |
578 $location = $self->path(); | |
579 } | |
580 else { | |
581 my $dbname = $self->db_params()->{dbname}; | |
582 my $table = $self->db_params()->{table}; | |
583 if($dbname || $table) { | |
584 if($URI_ESCAPE) { | |
585 $dbname = uri_escape($dbname) if $dbname; | |
586 $table = uri_escape($table) if $table; | |
587 } | |
588 $location = sprintf('/%s%s', | |
589 ($dbname ? $dbname : q{}), | |
590 ($table ? q{/}.$table : q{}) | |
591 ); | |
592 } | |
593 } | |
594 } | |
595 else { | |
596 $location = $self->path() if $self->path(); | |
597 } | |
598 | |
599 my $param_string = q{}; | |
600 if(@{$self->param_keys()}) { | |
601 $param_string = q{?}; | |
602 my @params; | |
603 foreach my $key (@{$self->param_keys}) { | |
604 my $values_array = $self->get_params($key); | |
605 foreach my $value (@{$values_array}) { | |
606 my $encoded_key = ($URI_ESCAPE) ? uri_escape($key) : $key; | |
607 my $encoded_value = ($URI_ESCAPE) ? uri_escape($value) : $value; | |
608 push(@params, ($encoded_value) ? "$encoded_key=$encoded_value" : $encoded_key); | |
609 } | |
610 } | |
611 $param_string .= join(q{;}, @params); | |
612 } | |
613 | |
614 return join(q{}, $scheme, $user_credentials, $host_credentials, $location, $param_string); | |
615 } | |
616 | |
617 1; |