comparison variant_effect_predictor/Bio/EnsEMBL/Utils/URI.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Utils::URI
24
25 =head1 SYNOPSIS
26
27 use Bio::EnsEMBL::Utils::URI qw/parse_uri is_uri/;
28 # or use Bio::EnsEMBL::Utils::URI qw/:all/; # to bring everything in
29
30 my $db_uri = parse_uri('mysql://user@host:3157/db');
31 my $http_uri = parse_uri('http://www.google.co.uk:80/search?q=t');
32
33 is_uri('mysql://user@host'); # returns 1
34 is_uri('file:///my/path'); # returns 1
35 is_uri('/my/path'); # returns 0
36
37 =head1 DESCRIPTION
38
39 This object is a generic URI parser which is primarily used in the
40 parsing of database URIs into a more managable data structure. We also provide
41 the resulting URI object
42
43 =head1 DEPENDENCIES
44
45 L<URI::Escape> is an optional dependency but if available the code will attempt
46 to perform URI encoding/decoding on parameters. If you do not want this
47 functionality then modify the global C<$Bio::EnsEMBL::Utils::URI::URI_ESCAPE>
48 to false;
49
50 =head1 METHODS
51
52 =cut
53
54 package Bio::EnsEMBL::Utils::URI;
55
56 use strict;
57 use warnings;
58
59 use Scalar::Util qw/looks_like_number/;
60 use Bio::EnsEMBL::Utils::Exception qw(throw);
61 use File::Spec;
62
63 our $URI_ESCAPE;
64 $URI_ESCAPE = 0;
65 eval {
66 require URI::Escape;
67 URI::Escape->import();
68 $URI_ESCAPE = 1;
69 };
70
71 use base qw/Exporter/;
72 our @EXPORT_OK;
73 our %EXPORT_TAGS;
74 @EXPORT_OK = qw/parse_uri is_uri/;
75 %EXPORT_TAGS = ( all => [@EXPORT_OK] );
76
77 ####URI Parsing
78
79 =head2 is_uri
80
81 Arg[1] : Scalar; URI to parse
82 Example : is_uri('mysql://user:pass@host:415/db');
83 Description : Looks for the existence of a URI scheme to decide if this
84 is a classical URI. Whilst non-scheme based URIs can still be
85 interprited it is useful to use when you need to know if
86 you are going to work with a URI or not
87 Returntype : Boolean
88 Caller : General
89 Status : Beta
90
91 =cut
92
93 sub is_uri {
94 my ($uri) = @_;
95 return 0 if ! $uri;
96 my $SCHEME = qr{ ([^:]*) :// }xms;
97 return ($uri =~ $SCHEME) ? 1 : 0;
98 }
99
100 =head2 parse_uri
101
102 Arg[1] : Scalar; URI to parse
103 Example : my $uri = parse_uri('mysql://user:pass@host:415/db');
104 Description : A URL parser which attempts to convert many different types
105 of URL into a common data structure.
106 Returntype : Bio::EnsEMBL::Utils::URI
107 Caller : General
108 Status : Beta
109
110 =cut
111
112 sub parse_uri {
113 my ($url) = @_;
114
115 my $SCHEME = qr{ ([^:]*) :// }xms;
116 my $USER = qr{ ([^/:\@]+)? :? ([^/\@]+)? \@ }xms;
117 my $HOST = qr{ ([^/:]+)? :? ([^/]+)? }xms;
118 my $DB = qr{ / ([^/?]+)? /? ([^/?]+)? }xms;
119 my $PARAMS = qr{ \? (.+)}xms;
120
121 my $p;
122
123 if($url =~ qr{ $SCHEME ([^?]+) (?:$PARAMS)? }xms) {
124 my $scheme = $1;
125 $scheme = ($URI_ESCAPE) ? uri_unescape($scheme) : $scheme;
126 $p = Bio::EnsEMBL::Utils::URI->new($scheme);
127 my ($locator, $params) = ($2, $3);
128
129 if($scheme eq 'file') {
130 $p->path($locator);
131 }
132 elsif($scheme eq 'sqlite') {
133 $p->path($locator);
134 }
135 else {
136 if($locator =~ s/^$USER//) {
137 $p->user($1);
138 $p->pass($2);
139 }
140 if($locator =~ s/^$HOST//) {
141 $p->host(($URI_ESCAPE) ? uri_unescape($1) : $1);
142 $p->port(($URI_ESCAPE) ? uri_unescape($2) : $2);
143 }
144
145 if($p->is_db_scheme() || $scheme eq q{}) {
146 if($locator =~ $DB) {
147 $p->db_params()->{dbname} = ($URI_ESCAPE) ? uri_unescape($1) : $1;
148 $p->db_params()->{table} = ($URI_ESCAPE) ? uri_unescape($2) : $2;
149 }
150 }
151 else {
152 $p->path($locator);
153 }
154 }
155
156 if(defined $params) {
157 my @kv_pairs = split(/;|&/, $params);
158 foreach my $kv_string (@kv_pairs) {
159 my ($key, $value) = map { ($URI_ESCAPE) ? uri_unescape($_) : $_ } split(/=/, $kv_string);
160 $p->add_param($key, $value);
161 }
162 }
163 }
164
165 return $p;
166 }
167
168 ####URI Object
169
170 =pod
171
172 =head2 new()
173
174 Arg[1] : String; scheme the URI will confrom to
175 Description : New object call
176 Returntype : Bio::EnsEMBL::Utils::URIParser::URI
177 Exceptions : Thrown if scheme is undefined.
178 Status : Stable
179
180 =cut
181
182 sub new {
183 my ($class, $scheme) = @_;
184 $class = ref($class) || $class;
185 throw "Scheme cannot be undefined. Empty string is allowed" if ! defined $scheme;
186
187 my $self = bless ({
188 params => {},
189 param_keys => [],
190 db_params => {},
191 scheme => $scheme,
192 }, $class);
193
194 return $self;
195 }
196
197 =head2 db_schemes()
198
199 Description: Returns a hash of scheme names known to be databases
200 Returntype : HashRef
201 Exceptions : None
202 Status : Stable
203
204 =cut
205
206 sub db_schemes {
207 my ($self) = @_;
208 return {map { $_ => 1 } qw/mysql ODBC sqlite Oracle Sybase/};
209 }
210
211
212 =head2 is_db_scheme()
213
214 Description: Returns true if the code believes the scheme to be a Database
215 Returntype : Boolean
216 Exceptions : None
217 Status : Stable
218
219 =cut
220
221 sub is_db_scheme {
222 my ($self) = @_;
223 return ( exists $self->db_schemes()->{$self->scheme()} ) ? 1 : 0;
224 }
225
226 =head2 scheme()
227
228 Description : Getter for the scheme attribute
229 Returntype : String
230 Exceptions : None
231 Status : Stable
232
233 =cut
234
235 sub scheme {
236 my ($self) = @_;
237 return $self->{scheme};
238 }
239
240 =head2 path()
241
242 Arg[1] : Setter argument
243 Description : Getter/setter for the path attribute
244 Returntype : String
245 Exceptions : None
246 Status : Stable
247
248 =cut
249
250 sub path {
251 my ($self, $path) = @_;
252 $self->{path} = $path if defined $path;
253 return $self->{path};
254 }
255
256 =head2 user()
257
258 Arg[1] : Setter argument
259 Description : Getter/setter for the user attribute
260 Returntype : String
261 Exceptions : None
262 Status : Stable
263
264 =cut
265
266 sub user {
267 my ($self, $user) = @_;
268 $self->{user} = $user if defined $user;
269 return $self->{user};
270 }
271
272 =head2 pass()
273
274 Arg[1] : Setter argument
275 Description : Getter/setter for the password attribute
276 Returntype : String
277 Exceptions : None
278 Status : Stable
279
280 =cut
281
282 sub pass {
283 my ($self, $pass) = @_;
284 $self->{pass} = $pass if defined $pass;
285 return $self->{pass};
286 }
287
288 =head2 host()
289
290 Arg[1] : Setter argument
291 Description : Getter/setter for the host attribute
292 Returntype : String
293 Exceptions : None
294 Status : Stable
295
296 =cut
297
298 sub host {
299 my ($self, $host) = @_;
300 $self->{host} = $host if defined $host;
301 return $self->{host};
302 }
303
304 =head2 port()
305
306 Arg[1] : Setter argument
307 Description : Getter/setter for the port attribute
308 Returntype : Integer
309 Exceptions : If port is not a number, less than 1 or not a whole integer
310 Status : Stable
311
312 =cut
313
314 sub port {
315 my ($self, $port) = @_;
316 if(defined $port) {
317 if(! looks_like_number($port) || $port < 1 || int($port) != $port) {
318 throw "Port $port is not a number, less than 1 or not a whole integer";
319 }
320 $self->{port} = $port if defined $port;
321 }
322 return $self->{port};
323 }
324
325 =head2 param_keys()
326
327 Description : Getter for the paramater map keys in the order they were first
328 seen. Keys should only appear once in this array
329 Returntype : ArrayRef
330 Exceptions : None
331 Status : Stable
332
333 =cut
334
335 sub param_keys {
336 my ($self) = @_;
337 return [@{$self->{param_keys}}];
338 }
339
340 =head2 param_exists_ci()
341
342 Arg[1] : String; Key
343 Description : Performs a case-insensitive search for the given key
344 Returntype : Boolean; returns true if your given key was seen
345 Exceptions : None
346 Status : Stable
347
348 =cut
349
350 sub param_exists_ci {
351 my ($self, $key) = @_;
352 my %keys = map { uc($_) => 1 } @{$self->param_keys()};
353 return ($keys{uc($key)}) ? 1 : 0;
354 }
355
356 =head2 add_param()
357
358 Arg[1] : String; key
359 Arg[1] : Scalar; value
360 Description : Add a key/value to the params map. Multiple inserts of the same
361 key is allowed
362 Returntype : None
363 Exceptions : None
364 Status : Stable
365
366 =cut
367
368 sub add_param {
369 my ($self, $key, $value) = @_;
370 if(!exists $self->{params}->{$key}) {
371 $self->{params}->{$key} = [];
372 push(@{$self->{param_keys}}, $key);
373 }
374 push(@{$self->{params}->{$key}}, $value);
375 return;
376 }
377
378 =head2 get_params()
379
380 Arg[1] : String; key
381 Description : Returns the values which were found to be linked to the given
382 key. Arrays are returned because one key can have many
383 values in a URI
384 Returntype : ArrayRef[Scalar]
385 Exceptions : None
386 Status : Stable
387
388 =cut
389
390 sub get_params {
391 my ($self, $key) = @_;
392 return [] if ! exists $self->{params}->{$key};
393 return [@{$self->{params}->{$key}}];
394 }
395
396 =head2 db_params()
397
398 Description : Storage of parameters used only for database URIs since
399 they require
400 Returntype : HashRef; Database name is keyed under C<dbname> and the
401 table is keyed under C<table>
402 Exceptions : None
403 Status : Stable
404
405 =cut
406
407 sub db_params {
408 my ($self) = @_;
409 return $self->{db_params};
410 }
411
412 =head2 generate_dbsql_params()
413
414 Arg[1] : boolean $no_table alows you to avoid pushing -TABLE as an
415 output value
416 Description : Generates a Hash of Ensembl compatible parameters to be used
417 to construct a DB object. We combine those parameters
418 which are deemed to be part of the C<db_params()> method
419 under C<-DBNAME> and C<-TABLE>. We also search for a number
420 of optional parameters which are lowercased equivalents
421 of the construction parameters available from a
422 L<Bio::EnsEMBL::DBSQL::DBAdaptor>,
423 L<Bio::EnsEMBL::DBSQL::DBConnection> as well as C<verbose>
424 being supported.
425
426 We also convert the scheme type into the driver attribute
427
428 Returntype : Hash (not a reference). Output can be put into a C<DBConnection>
429 constructor.
430 Exceptions : None
431 Status : Stable
432
433 =cut
434
435 sub generate_dbsql_params {
436 my ($self, $no_table) = @_;
437 my %db_params;
438
439 $db_params{-DRIVER} = $self->scheme();
440 $db_params{-HOST} = $self->host() if $self->host();
441 $db_params{-PORT} = $self->port() if $self->port();
442 $db_params{-USER} = $self->user() if $self->user();
443 $db_params{-PASS} = $self->pass() if $self->pass();
444
445 my $dbname;
446 my $table;
447 if($self->scheme() eq 'sqlite') {
448 ($dbname, $table) = $self->_decode_sqlite();
449 }
450 else {
451 $dbname = $self->db_params()->{dbname};
452 $table = $self->db_params()->{table};
453 }
454
455 $db_params{-DBNAME} = $dbname if $dbname;
456 $db_params{-TABLE} = $table if ! $no_table && $table;
457
458 foreach my $boolean_param (qw/disconnect_when_inactive reconnect_when_connection_lost is_multispecies no_cache verbose/) {
459 if($self->param_exists_ci($boolean_param)) {
460 $db_params{q{-}.uc($boolean_param)} = 1;
461 }
462 }
463 foreach my $value_param (qw/species group species_id wait_timeout/) {
464 if($self->param_exists_ci($value_param)) {
465 $db_params{q{-}.uc($value_param)} = $self->get_params($value_param)->[0];
466 }
467 }
468
469 return %db_params;
470 }
471
472 =head2 _decode_sqlite
473
474 Description : Performs path gymnastics to decode into a number of possible
475 options. The issue with SQLite is that the normal URI scheme
476 looks like sqlite:///my/path.sqlite/table but how do we know
477 that the DB name is C</my/path.sqlite> and the table is
478 C<table>?
479
480 The code takes a path, looks for the full path & if it cannot
481 be found looks for the file a directory back. In the above
482 example it would have looked for C</my/path.sqlite/table>,
483 found it to be non-existant, looked for C</my/path.sqlite>
484 and found it.
485
486 If the path splitting procdure resulted in just 1 file after
487 the first existence check e.g. C<sqlite://db.sqlite> it assumes
488 that should be the name. If no file can be found we default to
489 the full length path.
490 Caller : internal
491
492 =cut
493
494 sub _decode_sqlite {
495 my ($self) = @_;
496 my $dbname;
497 my $table;
498 my $path = $self->path();
499 if(-f $path) {
500 $dbname = $path;
501 }
502 else {
503 my ($volume, $directories, $file) = File::Spec->splitpath($path);
504 my @splitdirs = File::Spec->splitdir($directories);
505 if(@splitdirs == 1) {
506 $dbname = $path;
507 }
508 else {
509 my $new_file = pop(@splitdirs);
510 $new_file ||= q{};
511 my $new_path = File::Spec->catpath($volume, File::Spec->catdir(@splitdirs), $new_file);
512 if($new_path ne File::Spec->rootdir() && -f $new_path) {
513 $dbname = $new_path;
514 $table = $file;
515 }
516 else {
517 $dbname = $path;
518 }
519 }
520 }
521
522 $self->db_params()->{dbname} = $dbname if $dbname;
523 $self->db_params()->{table} = $table if $table;
524
525 return ($dbname, $table);
526 }
527
528 =head2 generate_uri()
529
530 Description : Generates a URI string from the paramaters in this object
531 Returntype : String
532 Exceptions : None
533 Status : Stable
534
535 =cut
536
537 sub generate_uri {
538 my ($self) = @_;
539 my $scheme = sprintf('%s://', ($URI_ESCAPE) ? uri_escape($self->scheme()) : $self->scheme());
540 my $user_credentials = q{};
541 my $host_credentials = q{};
542 my $location = q{};
543
544 if($self->user() || $self->pass()) {
545 my $user = $self->user();
546 my $pass = $self->pass();
547 if($URI_ESCAPE) {
548 $user = uri_escape($user) if $user;
549 $pass = uri_escape($pass) if $pass;
550 }
551 $user_credentials = sprintf('%s%s@',
552 ( $user ? $user : q{} ),
553 ( $pass ? q{:}.$pass : q{} )
554 );
555 }
556
557 if($self->host() || $self->port()) {
558 my $host = $self->host();
559 my $port = $self->port();
560 if($URI_ESCAPE) {
561 $host = uri_escape($host) if $host;
562 $port = uri_escape($port) if $port;
563 }
564 $host_credentials = sprintf('%s%s',
565 ( $host ? $host : q{} ),
566 ( $port ? q{:}.$port : q{} )
567 );
568 }
569
570 if($self->is_db_scheme() || $self->scheme() eq '') {
571 if($self->scheme() eq 'sqlite') {
572 if(! $self->path()) {
573 my $tmp_loc = $self->db_params()->{dbname};
574 throw "There is no dbname available" unless $tmp_loc;
575 $tmp_loc .= q{/}.$self->db_params()->{table} if $self->db_params()->{table};
576 $self->path($tmp_loc);
577 }
578 $location = $self->path();
579 }
580 else {
581 my $dbname = $self->db_params()->{dbname};
582 my $table = $self->db_params()->{table};
583 if($dbname || $table) {
584 if($URI_ESCAPE) {
585 $dbname = uri_escape($dbname) if $dbname;
586 $table = uri_escape($table) if $table;
587 }
588 $location = sprintf('/%s%s',
589 ($dbname ? $dbname : q{}),
590 ($table ? q{/}.$table : q{})
591 );
592 }
593 }
594 }
595 else {
596 $location = $self->path() if $self->path();
597 }
598
599 my $param_string = q{};
600 if(@{$self->param_keys()}) {
601 $param_string = q{?};
602 my @params;
603 foreach my $key (@{$self->param_keys}) {
604 my $values_array = $self->get_params($key);
605 foreach my $value (@{$values_array}) {
606 my $encoded_key = ($URI_ESCAPE) ? uri_escape($key) : $key;
607 my $encoded_value = ($URI_ESCAPE) ? uri_escape($value) : $value;
608 push(@params, ($encoded_value) ? "$encoded_key=$encoded_value" : $encoded_key);
609 }
610 }
611 $param_string .= join(q{;}, @params);
612 }
613
614 return join(q{}, $scheme, $user_credentials, $host_credentials, $location, $param_string);
615 }
616
617 1;