0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Utils::URI
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 use Bio::EnsEMBL::Utils::URI qw/parse_uri is_uri/;
|
|
28 # or use Bio::EnsEMBL::Utils::URI qw/:all/; # to bring everything in
|
|
29
|
|
30 my $db_uri = parse_uri('mysql://user@host:3157/db');
|
|
31 my $http_uri = parse_uri('http://www.google.co.uk:80/search?q=t');
|
|
32
|
|
33 is_uri('mysql://user@host'); # returns 1
|
|
34 is_uri('file:///my/path'); # returns 1
|
|
35 is_uri('/my/path'); # returns 0
|
|
36
|
|
37 =head1 DESCRIPTION
|
|
38
|
|
39 This object is a generic URI parser which is primarily used in the
|
|
40 parsing of database URIs into a more managable data structure. We also provide
|
|
41 the resulting URI object
|
|
42
|
|
43 =head1 DEPENDENCIES
|
|
44
|
|
45 L<URI::Escape> is an optional dependency but if available the code will attempt
|
|
46 to perform URI encoding/decoding on parameters. If you do not want this
|
|
47 functionality then modify the global C<$Bio::EnsEMBL::Utils::URI::URI_ESCAPE>
|
|
48 to false;
|
|
49
|
|
50 =head1 METHODS
|
|
51
|
|
52 =cut
|
|
53
|
|
54 package Bio::EnsEMBL::Utils::URI;
|
|
55
|
|
56 use strict;
|
|
57 use warnings;
|
|
58
|
|
59 use Scalar::Util qw/looks_like_number/;
|
|
60 use Bio::EnsEMBL::Utils::Exception qw(throw);
|
|
61 use File::Spec;
|
|
62
|
|
63 our $URI_ESCAPE;
|
|
64 $URI_ESCAPE = 0;
|
|
65 eval {
|
|
66 require URI::Escape;
|
|
67 URI::Escape->import();
|
|
68 $URI_ESCAPE = 1;
|
|
69 };
|
|
70
|
|
71 use base qw/Exporter/;
|
|
72 our @EXPORT_OK;
|
|
73 our %EXPORT_TAGS;
|
|
74 @EXPORT_OK = qw/parse_uri is_uri/;
|
|
75 %EXPORT_TAGS = ( all => [@EXPORT_OK] );
|
|
76
|
|
77 ####URI Parsing
|
|
78
|
|
79 =head2 is_uri
|
|
80
|
|
81 Arg[1] : Scalar; URI to parse
|
|
82 Example : is_uri('mysql://user:pass@host:415/db');
|
|
83 Description : Looks for the existence of a URI scheme to decide if this
|
|
84 is a classical URI. Whilst non-scheme based URIs can still be
|
|
85 interprited it is useful to use when you need to know if
|
|
86 you are going to work with a URI or not
|
|
87 Returntype : Boolean
|
|
88 Caller : General
|
|
89 Status : Beta
|
|
90
|
|
91 =cut
|
|
92
|
|
93 sub is_uri {
|
|
94 my ($uri) = @_;
|
|
95 return 0 if ! $uri;
|
|
96 my $SCHEME = qr{ ([^:]*) :// }xms;
|
|
97 return ($uri =~ $SCHEME) ? 1 : 0;
|
|
98 }
|
|
99
|
|
100 =head2 parse_uri
|
|
101
|
|
102 Arg[1] : Scalar; URI to parse
|
|
103 Example : my $uri = parse_uri('mysql://user:pass@host:415/db');
|
|
104 Description : A URL parser which attempts to convert many different types
|
|
105 of URL into a common data structure.
|
|
106 Returntype : Bio::EnsEMBL::Utils::URI
|
|
107 Caller : General
|
|
108 Status : Beta
|
|
109
|
|
110 =cut
|
|
111
|
|
112 sub parse_uri {
|
|
113 my ($url) = @_;
|
|
114
|
|
115 my $SCHEME = qr{ ([^:]*) :// }xms;
|
|
116 my $USER = qr{ ([^/:\@]+)? :? ([^/\@]+)? \@ }xms;
|
|
117 my $HOST = qr{ ([^/:]+)? :? ([^/]+)? }xms;
|
|
118 my $DB = qr{ / ([^/?]+)? /? ([^/?]+)? }xms;
|
|
119 my $PARAMS = qr{ \? (.+)}xms;
|
|
120
|
|
121 my $p;
|
|
122
|
|
123 if($url =~ qr{ $SCHEME ([^?]+) (?:$PARAMS)? }xms) {
|
|
124 my $scheme = $1;
|
|
125 $scheme = ($URI_ESCAPE) ? uri_unescape($scheme) : $scheme;
|
|
126 $p = Bio::EnsEMBL::Utils::URI->new($scheme);
|
|
127 my ($locator, $params) = ($2, $3);
|
|
128
|
|
129 if($scheme eq 'file') {
|
|
130 $p->path($locator);
|
|
131 }
|
|
132 elsif($scheme eq 'sqlite') {
|
|
133 $p->path($locator);
|
|
134 }
|
|
135 else {
|
|
136 if($locator =~ s/^$USER//) {
|
|
137 $p->user($1);
|
|
138 $p->pass($2);
|
|
139 }
|
|
140 if($locator =~ s/^$HOST//) {
|
|
141 $p->host(($URI_ESCAPE) ? uri_unescape($1) : $1);
|
|
142 $p->port(($URI_ESCAPE) ? uri_unescape($2) : $2);
|
|
143 }
|
|
144
|
|
145 if($p->is_db_scheme() || $scheme eq q{}) {
|
|
146 if($locator =~ $DB) {
|
|
147 $p->db_params()->{dbname} = ($URI_ESCAPE) ? uri_unescape($1) : $1;
|
|
148 $p->db_params()->{table} = ($URI_ESCAPE) ? uri_unescape($2) : $2;
|
|
149 }
|
|
150 }
|
|
151 else {
|
|
152 $p->path($locator);
|
|
153 }
|
|
154 }
|
|
155
|
|
156 if(defined $params) {
|
|
157 my @kv_pairs = split(/;|&/, $params);
|
|
158 foreach my $kv_string (@kv_pairs) {
|
|
159 my ($key, $value) = map { ($URI_ESCAPE) ? uri_unescape($_) : $_ } split(/=/, $kv_string);
|
|
160 $p->add_param($key, $value);
|
|
161 }
|
|
162 }
|
|
163 }
|
|
164
|
|
165 return $p;
|
|
166 }
|
|
167
|
|
168 ####URI Object
|
|
169
|
|
170 =pod
|
|
171
|
|
172 =head2 new()
|
|
173
|
|
174 Arg[1] : String; scheme the URI will confrom to
|
|
175 Description : New object call
|
|
176 Returntype : Bio::EnsEMBL::Utils::URIParser::URI
|
|
177 Exceptions : Thrown if scheme is undefined.
|
|
178 Status : Stable
|
|
179
|
|
180 =cut
|
|
181
|
|
182 sub new {
|
|
183 my ($class, $scheme) = @_;
|
|
184 $class = ref($class) || $class;
|
|
185 throw "Scheme cannot be undefined. Empty string is allowed" if ! defined $scheme;
|
|
186
|
|
187 my $self = bless ({
|
|
188 params => {},
|
|
189 param_keys => [],
|
|
190 db_params => {},
|
|
191 scheme => $scheme,
|
|
192 }, $class);
|
|
193
|
|
194 return $self;
|
|
195 }
|
|
196
|
|
197 =head2 db_schemes()
|
|
198
|
|
199 Description: Returns a hash of scheme names known to be databases
|
|
200 Returntype : HashRef
|
|
201 Exceptions : None
|
|
202 Status : Stable
|
|
203
|
|
204 =cut
|
|
205
|
|
206 sub db_schemes {
|
|
207 my ($self) = @_;
|
|
208 return {map { $_ => 1 } qw/mysql ODBC sqlite Oracle Sybase/};
|
|
209 }
|
|
210
|
|
211
|
|
212 =head2 is_db_scheme()
|
|
213
|
|
214 Description: Returns true if the code believes the scheme to be a Database
|
|
215 Returntype : Boolean
|
|
216 Exceptions : None
|
|
217 Status : Stable
|
|
218
|
|
219 =cut
|
|
220
|
|
221 sub is_db_scheme {
|
|
222 my ($self) = @_;
|
|
223 return ( exists $self->db_schemes()->{$self->scheme()} ) ? 1 : 0;
|
|
224 }
|
|
225
|
|
226 =head2 scheme()
|
|
227
|
|
228 Description : Getter for the scheme attribute
|
|
229 Returntype : String
|
|
230 Exceptions : None
|
|
231 Status : Stable
|
|
232
|
|
233 =cut
|
|
234
|
|
235 sub scheme {
|
|
236 my ($self) = @_;
|
|
237 return $self->{scheme};
|
|
238 }
|
|
239
|
|
240 =head2 path()
|
|
241
|
|
242 Arg[1] : Setter argument
|
|
243 Description : Getter/setter for the path attribute
|
|
244 Returntype : String
|
|
245 Exceptions : None
|
|
246 Status : Stable
|
|
247
|
|
248 =cut
|
|
249
|
|
250 sub path {
|
|
251 my ($self, $path) = @_;
|
|
252 $self->{path} = $path if defined $path;
|
|
253 return $self->{path};
|
|
254 }
|
|
255
|
|
256 =head2 user()
|
|
257
|
|
258 Arg[1] : Setter argument
|
|
259 Description : Getter/setter for the user attribute
|
|
260 Returntype : String
|
|
261 Exceptions : None
|
|
262 Status : Stable
|
|
263
|
|
264 =cut
|
|
265
|
|
266 sub user {
|
|
267 my ($self, $user) = @_;
|
|
268 $self->{user} = $user if defined $user;
|
|
269 return $self->{user};
|
|
270 }
|
|
271
|
|
272 =head2 pass()
|
|
273
|
|
274 Arg[1] : Setter argument
|
|
275 Description : Getter/setter for the password attribute
|
|
276 Returntype : String
|
|
277 Exceptions : None
|
|
278 Status : Stable
|
|
279
|
|
280 =cut
|
|
281
|
|
282 sub pass {
|
|
283 my ($self, $pass) = @_;
|
|
284 $self->{pass} = $pass if defined $pass;
|
|
285 return $self->{pass};
|
|
286 }
|
|
287
|
|
288 =head2 host()
|
|
289
|
|
290 Arg[1] : Setter argument
|
|
291 Description : Getter/setter for the host attribute
|
|
292 Returntype : String
|
|
293 Exceptions : None
|
|
294 Status : Stable
|
|
295
|
|
296 =cut
|
|
297
|
|
298 sub host {
|
|
299 my ($self, $host) = @_;
|
|
300 $self->{host} = $host if defined $host;
|
|
301 return $self->{host};
|
|
302 }
|
|
303
|
|
304 =head2 port()
|
|
305
|
|
306 Arg[1] : Setter argument
|
|
307 Description : Getter/setter for the port attribute
|
|
308 Returntype : Integer
|
|
309 Exceptions : If port is not a number, less than 1 or not a whole integer
|
|
310 Status : Stable
|
|
311
|
|
312 =cut
|
|
313
|
|
314 sub port {
|
|
315 my ($self, $port) = @_;
|
|
316 if(defined $port) {
|
|
317 if(! looks_like_number($port) || $port < 1 || int($port) != $port) {
|
|
318 throw "Port $port is not a number, less than 1 or not a whole integer";
|
|
319 }
|
|
320 $self->{port} = $port if defined $port;
|
|
321 }
|
|
322 return $self->{port};
|
|
323 }
|
|
324
|
|
325 =head2 param_keys()
|
|
326
|
|
327 Description : Getter for the paramater map keys in the order they were first
|
|
328 seen. Keys should only appear once in this array
|
|
329 Returntype : ArrayRef
|
|
330 Exceptions : None
|
|
331 Status : Stable
|
|
332
|
|
333 =cut
|
|
334
|
|
335 sub param_keys {
|
|
336 my ($self) = @_;
|
|
337 return [@{$self->{param_keys}}];
|
|
338 }
|
|
339
|
|
340 =head2 param_exists_ci()
|
|
341
|
|
342 Arg[1] : String; Key
|
|
343 Description : Performs a case-insensitive search for the given key
|
|
344 Returntype : Boolean; returns true if your given key was seen
|
|
345 Exceptions : None
|
|
346 Status : Stable
|
|
347
|
|
348 =cut
|
|
349
|
|
350 sub param_exists_ci {
|
|
351 my ($self, $key) = @_;
|
|
352 my %keys = map { uc($_) => 1 } @{$self->param_keys()};
|
|
353 return ($keys{uc($key)}) ? 1 : 0;
|
|
354 }
|
|
355
|
|
356 =head2 add_param()
|
|
357
|
|
358 Arg[1] : String; key
|
|
359 Arg[1] : Scalar; value
|
|
360 Description : Add a key/value to the params map. Multiple inserts of the same
|
|
361 key is allowed
|
|
362 Returntype : None
|
|
363 Exceptions : None
|
|
364 Status : Stable
|
|
365
|
|
366 =cut
|
|
367
|
|
368 sub add_param {
|
|
369 my ($self, $key, $value) = @_;
|
|
370 if(!exists $self->{params}->{$key}) {
|
|
371 $self->{params}->{$key} = [];
|
|
372 push(@{$self->{param_keys}}, $key);
|
|
373 }
|
|
374 push(@{$self->{params}->{$key}}, $value);
|
|
375 return;
|
|
376 }
|
|
377
|
|
378 =head2 get_params()
|
|
379
|
|
380 Arg[1] : String; key
|
|
381 Description : Returns the values which were found to be linked to the given
|
|
382 key. Arrays are returned because one key can have many
|
|
383 values in a URI
|
|
384 Returntype : ArrayRef[Scalar]
|
|
385 Exceptions : None
|
|
386 Status : Stable
|
|
387
|
|
388 =cut
|
|
389
|
|
390 sub get_params {
|
|
391 my ($self, $key) = @_;
|
|
392 return [] if ! exists $self->{params}->{$key};
|
|
393 return [@{$self->{params}->{$key}}];
|
|
394 }
|
|
395
|
|
396 =head2 db_params()
|
|
397
|
|
398 Description : Storage of parameters used only for database URIs since
|
|
399 they require
|
|
400 Returntype : HashRef; Database name is keyed under C<dbname> and the
|
|
401 table is keyed under C<table>
|
|
402 Exceptions : None
|
|
403 Status : Stable
|
|
404
|
|
405 =cut
|
|
406
|
|
407 sub db_params {
|
|
408 my ($self) = @_;
|
|
409 return $self->{db_params};
|
|
410 }
|
|
411
|
|
412 =head2 generate_dbsql_params()
|
|
413
|
|
414 Arg[1] : boolean $no_table alows you to avoid pushing -TABLE as an
|
|
415 output value
|
|
416 Description : Generates a Hash of Ensembl compatible parameters to be used
|
|
417 to construct a DB object. We combine those parameters
|
|
418 which are deemed to be part of the C<db_params()> method
|
|
419 under C<-DBNAME> and C<-TABLE>. We also search for a number
|
|
420 of optional parameters which are lowercased equivalents
|
|
421 of the construction parameters available from a
|
|
422 L<Bio::EnsEMBL::DBSQL::DBAdaptor>,
|
|
423 L<Bio::EnsEMBL::DBSQL::DBConnection> as well as C<verbose>
|
|
424 being supported.
|
|
425
|
|
426 We also convert the scheme type into the driver attribute
|
|
427
|
|
428 Returntype : Hash (not a reference). Output can be put into a C<DBConnection>
|
|
429 constructor.
|
|
430 Exceptions : None
|
|
431 Status : Stable
|
|
432
|
|
433 =cut
|
|
434
|
|
435 sub generate_dbsql_params {
|
|
436 my ($self, $no_table) = @_;
|
|
437 my %db_params;
|
|
438
|
|
439 $db_params{-DRIVER} = $self->scheme();
|
|
440 $db_params{-HOST} = $self->host() if $self->host();
|
|
441 $db_params{-PORT} = $self->port() if $self->port();
|
|
442 $db_params{-USER} = $self->user() if $self->user();
|
|
443 $db_params{-PASS} = $self->pass() if $self->pass();
|
|
444
|
|
445 my $dbname;
|
|
446 my $table;
|
|
447 if($self->scheme() eq 'sqlite') {
|
|
448 ($dbname, $table) = $self->_decode_sqlite();
|
|
449 }
|
|
450 else {
|
|
451 $dbname = $self->db_params()->{dbname};
|
|
452 $table = $self->db_params()->{table};
|
|
453 }
|
|
454
|
|
455 $db_params{-DBNAME} = $dbname if $dbname;
|
|
456 $db_params{-TABLE} = $table if ! $no_table && $table;
|
|
457
|
|
458 foreach my $boolean_param (qw/disconnect_when_inactive reconnect_when_connection_lost is_multispecies no_cache verbose/) {
|
|
459 if($self->param_exists_ci($boolean_param)) {
|
|
460 $db_params{q{-}.uc($boolean_param)} = 1;
|
|
461 }
|
|
462 }
|
|
463 foreach my $value_param (qw/species group species_id wait_timeout/) {
|
|
464 if($self->param_exists_ci($value_param)) {
|
|
465 $db_params{q{-}.uc($value_param)} = $self->get_params($value_param)->[0];
|
|
466 }
|
|
467 }
|
|
468
|
|
469 return %db_params;
|
|
470 }
|
|
471
|
|
472 =head2 _decode_sqlite
|
|
473
|
|
474 Description : Performs path gymnastics to decode into a number of possible
|
|
475 options. The issue with SQLite is that the normal URI scheme
|
|
476 looks like sqlite:///my/path.sqlite/table but how do we know
|
|
477 that the DB name is C</my/path.sqlite> and the table is
|
|
478 C<table>?
|
|
479
|
|
480 The code takes a path, looks for the full path & if it cannot
|
|
481 be found looks for the file a directory back. In the above
|
|
482 example it would have looked for C</my/path.sqlite/table>,
|
|
483 found it to be non-existant, looked for C</my/path.sqlite>
|
|
484 and found it.
|
|
485
|
|
486 If the path splitting procdure resulted in just 1 file after
|
|
487 the first existence check e.g. C<sqlite://db.sqlite> it assumes
|
|
488 that should be the name. If no file can be found we default to
|
|
489 the full length path.
|
|
490 Caller : internal
|
|
491
|
|
492 =cut
|
|
493
|
|
494 sub _decode_sqlite {
|
|
495 my ($self) = @_;
|
|
496 my $dbname;
|
|
497 my $table;
|
|
498 my $path = $self->path();
|
|
499 if(-f $path) {
|
|
500 $dbname = $path;
|
|
501 }
|
|
502 else {
|
|
503 my ($volume, $directories, $file) = File::Spec->splitpath($path);
|
|
504 my @splitdirs = File::Spec->splitdir($directories);
|
|
505 if(@splitdirs == 1) {
|
|
506 $dbname = $path;
|
|
507 }
|
|
508 else {
|
|
509 my $new_file = pop(@splitdirs);
|
|
510 $new_file ||= q{};
|
|
511 my $new_path = File::Spec->catpath($volume, File::Spec->catdir(@splitdirs), $new_file);
|
|
512 if($new_path ne File::Spec->rootdir() && -f $new_path) {
|
|
513 $dbname = $new_path;
|
|
514 $table = $file;
|
|
515 }
|
|
516 else {
|
|
517 $dbname = $path;
|
|
518 }
|
|
519 }
|
|
520 }
|
|
521
|
|
522 $self->db_params()->{dbname} = $dbname if $dbname;
|
|
523 $self->db_params()->{table} = $table if $table;
|
|
524
|
|
525 return ($dbname, $table);
|
|
526 }
|
|
527
|
|
528 =head2 generate_uri()
|
|
529
|
|
530 Description : Generates a URI string from the paramaters in this object
|
|
531 Returntype : String
|
|
532 Exceptions : None
|
|
533 Status : Stable
|
|
534
|
|
535 =cut
|
|
536
|
|
537 sub generate_uri {
|
|
538 my ($self) = @_;
|
|
539 my $scheme = sprintf('%s://', ($URI_ESCAPE) ? uri_escape($self->scheme()) : $self->scheme());
|
|
540 my $user_credentials = q{};
|
|
541 my $host_credentials = q{};
|
|
542 my $location = q{};
|
|
543
|
|
544 if($self->user() || $self->pass()) {
|
|
545 my $user = $self->user();
|
|
546 my $pass = $self->pass();
|
|
547 if($URI_ESCAPE) {
|
|
548 $user = uri_escape($user) if $user;
|
|
549 $pass = uri_escape($pass) if $pass;
|
|
550 }
|
|
551 $user_credentials = sprintf('%s%s@',
|
|
552 ( $user ? $user : q{} ),
|
|
553 ( $pass ? q{:}.$pass : q{} )
|
|
554 );
|
|
555 }
|
|
556
|
|
557 if($self->host() || $self->port()) {
|
|
558 my $host = $self->host();
|
|
559 my $port = $self->port();
|
|
560 if($URI_ESCAPE) {
|
|
561 $host = uri_escape($host) if $host;
|
|
562 $port = uri_escape($port) if $port;
|
|
563 }
|
|
564 $host_credentials = sprintf('%s%s',
|
|
565 ( $host ? $host : q{} ),
|
|
566 ( $port ? q{:}.$port : q{} )
|
|
567 );
|
|
568 }
|
|
569
|
|
570 if($self->is_db_scheme() || $self->scheme() eq '') {
|
|
571 if($self->scheme() eq 'sqlite') {
|
|
572 if(! $self->path()) {
|
|
573 my $tmp_loc = $self->db_params()->{dbname};
|
|
574 throw "There is no dbname available" unless $tmp_loc;
|
|
575 $tmp_loc .= q{/}.$self->db_params()->{table} if $self->db_params()->{table};
|
|
576 $self->path($tmp_loc);
|
|
577 }
|
|
578 $location = $self->path();
|
|
579 }
|
|
580 else {
|
|
581 my $dbname = $self->db_params()->{dbname};
|
|
582 my $table = $self->db_params()->{table};
|
|
583 if($dbname || $table) {
|
|
584 if($URI_ESCAPE) {
|
|
585 $dbname = uri_escape($dbname) if $dbname;
|
|
586 $table = uri_escape($table) if $table;
|
|
587 }
|
|
588 $location = sprintf('/%s%s',
|
|
589 ($dbname ? $dbname : q{}),
|
|
590 ($table ? q{/}.$table : q{})
|
|
591 );
|
|
592 }
|
|
593 }
|
|
594 }
|
|
595 else {
|
|
596 $location = $self->path() if $self->path();
|
|
597 }
|
|
598
|
|
599 my $param_string = q{};
|
|
600 if(@{$self->param_keys()}) {
|
|
601 $param_string = q{?};
|
|
602 my @params;
|
|
603 foreach my $key (@{$self->param_keys}) {
|
|
604 my $values_array = $self->get_params($key);
|
|
605 foreach my $value (@{$values_array}) {
|
|
606 my $encoded_key = ($URI_ESCAPE) ? uri_escape($key) : $key;
|
|
607 my $encoded_value = ($URI_ESCAPE) ? uri_escape($value) : $value;
|
|
608 push(@params, ($encoded_value) ? "$encoded_key=$encoded_value" : $encoded_key);
|
|
609 }
|
|
610 }
|
|
611 $param_string .= join(q{;}, @params);
|
|
612 }
|
|
613
|
|
614 return join(q{}, $scheme, $user_credentials, $host_credentials, $location, $param_string);
|
|
615 }
|
|
616
|
|
617 1;
|