Mercurial > repos > mahtabm > ensembl
view variant_effect_predictor/Bio/EnsEMBL/Utils/URI.pm @ 3:d30fa12e4cc5 default tip
Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author | devteam <devteam@galaxyproject.org> |
---|---|
date | Mon, 13 Jan 2014 10:38:30 -0500 |
parents | 1f6dce3d34e0 |
children |
line wrap: on
line source
=head1 LICENSE Copyright (c) 1999-2012 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <dev@ensembl.org>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =cut =head1 NAME Bio::EnsEMBL::Utils::URI =head1 SYNOPSIS use Bio::EnsEMBL::Utils::URI qw/parse_uri is_uri/; # or use Bio::EnsEMBL::Utils::URI qw/:all/; # to bring everything in my $db_uri = parse_uri('mysql://user@host:3157/db'); my $http_uri = parse_uri('http://www.google.co.uk:80/search?q=t'); is_uri('mysql://user@host'); # returns 1 is_uri('file:///my/path'); # returns 1 is_uri('/my/path'); # returns 0 =head1 DESCRIPTION This object is a generic URI parser which is primarily used in the parsing of database URIs into a more managable data structure. We also provide the resulting URI object =head1 DEPENDENCIES L<URI::Escape> is an optional dependency but if available the code will attempt to perform URI encoding/decoding on parameters. If you do not want this functionality then modify the global C<$Bio::EnsEMBL::Utils::URI::URI_ESCAPE> to false; =head1 METHODS =cut package Bio::EnsEMBL::Utils::URI; use strict; use warnings; use Scalar::Util qw/looks_like_number/; use Bio::EnsEMBL::Utils::Exception qw(throw); use File::Spec; our $URI_ESCAPE; $URI_ESCAPE = 0; eval { require URI::Escape; URI::Escape->import(); $URI_ESCAPE = 1; }; use base qw/Exporter/; our @EXPORT_OK; our %EXPORT_TAGS; @EXPORT_OK = qw/parse_uri is_uri/; %EXPORT_TAGS = ( all => [@EXPORT_OK] ); ####URI Parsing =head2 is_uri Arg[1] : Scalar; URI to parse Example : is_uri('mysql://user:pass@host:415/db'); Description : Looks for the existence of a URI scheme to decide if this is a classical URI. Whilst non-scheme based URIs can still be interprited it is useful to use when you need to know if you are going to work with a URI or not Returntype : Boolean Caller : General Status : Beta =cut sub is_uri { my ($uri) = @_; return 0 if ! $uri; my $SCHEME = qr{ ([^:]*) :// }xms; return ($uri =~ $SCHEME) ? 1 : 0; } =head2 parse_uri Arg[1] : Scalar; URI to parse Example : my $uri = parse_uri('mysql://user:pass@host:415/db'); Description : A URL parser which attempts to convert many different types of URL into a common data structure. Returntype : Bio::EnsEMBL::Utils::URI Caller : General Status : Beta =cut sub parse_uri { my ($url) = @_; my $SCHEME = qr{ ([^:]*) :// }xms; my $USER = qr{ ([^/:\@]+)? :? ([^/\@]+)? \@ }xms; my $HOST = qr{ ([^/:]+)? :? ([^/]+)? }xms; my $DB = qr{ / ([^/?]+)? /? ([^/?]+)? }xms; my $PARAMS = qr{ \? (.+)}xms; my $p; if($url =~ qr{ $SCHEME ([^?]+) (?:$PARAMS)? }xms) { my $scheme = $1; $scheme = ($URI_ESCAPE) ? uri_unescape($scheme) : $scheme; $p = Bio::EnsEMBL::Utils::URI->new($scheme); my ($locator, $params) = ($2, $3); if($scheme eq 'file') { $p->path($locator); } elsif($scheme eq 'sqlite') { $p->path($locator); } else { if($locator =~ s/^$USER//) { $p->user($1); $p->pass($2); } if($locator =~ s/^$HOST//) { $p->host(($URI_ESCAPE) ? uri_unescape($1) : $1); $p->port(($URI_ESCAPE) ? uri_unescape($2) : $2); } if($p->is_db_scheme() || $scheme eq q{}) { if($locator =~ $DB) { $p->db_params()->{dbname} = ($URI_ESCAPE) ? uri_unescape($1) : $1; $p->db_params()->{table} = ($URI_ESCAPE) ? uri_unescape($2) : $2; } } else { $p->path($locator); } } if(defined $params) { my @kv_pairs = split(/;|&/, $params); foreach my $kv_string (@kv_pairs) { my ($key, $value) = map { ($URI_ESCAPE) ? uri_unescape($_) : $_ } split(/=/, $kv_string); $p->add_param($key, $value); } } } return $p; } ####URI Object =pod =head2 new() Arg[1] : String; scheme the URI will confrom to Description : New object call Returntype : Bio::EnsEMBL::Utils::URIParser::URI Exceptions : Thrown if scheme is undefined. Status : Stable =cut sub new { my ($class, $scheme) = @_; $class = ref($class) || $class; throw "Scheme cannot be undefined. Empty string is allowed" if ! defined $scheme; my $self = bless ({ params => {}, param_keys => [], db_params => {}, scheme => $scheme, }, $class); return $self; } =head2 db_schemes() Description: Returns a hash of scheme names known to be databases Returntype : HashRef Exceptions : None Status : Stable =cut sub db_schemes { my ($self) = @_; return {map { $_ => 1 } qw/mysql ODBC sqlite Oracle Sybase/}; } =head2 is_db_scheme() Description: Returns true if the code believes the scheme to be a Database Returntype : Boolean Exceptions : None Status : Stable =cut sub is_db_scheme { my ($self) = @_; return ( exists $self->db_schemes()->{$self->scheme()} ) ? 1 : 0; } =head2 scheme() Description : Getter for the scheme attribute Returntype : String Exceptions : None Status : Stable =cut sub scheme { my ($self) = @_; return $self->{scheme}; } =head2 path() Arg[1] : Setter argument Description : Getter/setter for the path attribute Returntype : String Exceptions : None Status : Stable =cut sub path { my ($self, $path) = @_; $self->{path} = $path if defined $path; return $self->{path}; } =head2 user() Arg[1] : Setter argument Description : Getter/setter for the user attribute Returntype : String Exceptions : None Status : Stable =cut sub user { my ($self, $user) = @_; $self->{user} = $user if defined $user; return $self->{user}; } =head2 pass() Arg[1] : Setter argument Description : Getter/setter for the password attribute Returntype : String Exceptions : None Status : Stable =cut sub pass { my ($self, $pass) = @_; $self->{pass} = $pass if defined $pass; return $self->{pass}; } =head2 host() Arg[1] : Setter argument Description : Getter/setter for the host attribute Returntype : String Exceptions : None Status : Stable =cut sub host { my ($self, $host) = @_; $self->{host} = $host if defined $host; return $self->{host}; } =head2 port() Arg[1] : Setter argument Description : Getter/setter for the port attribute Returntype : Integer Exceptions : If port is not a number, less than 1 or not a whole integer Status : Stable =cut sub port { my ($self, $port) = @_; if(defined $port) { if(! looks_like_number($port) || $port < 1 || int($port) != $port) { throw "Port $port is not a number, less than 1 or not a whole integer"; } $self->{port} = $port if defined $port; } return $self->{port}; } =head2 param_keys() Description : Getter for the paramater map keys in the order they were first seen. Keys should only appear once in this array Returntype : ArrayRef Exceptions : None Status : Stable =cut sub param_keys { my ($self) = @_; return [@{$self->{param_keys}}]; } =head2 param_exists_ci() Arg[1] : String; Key Description : Performs a case-insensitive search for the given key Returntype : Boolean; returns true if your given key was seen Exceptions : None Status : Stable =cut sub param_exists_ci { my ($self, $key) = @_; my %keys = map { uc($_) => 1 } @{$self->param_keys()}; return ($keys{uc($key)}) ? 1 : 0; } =head2 add_param() Arg[1] : String; key Arg[1] : Scalar; value Description : Add a key/value to the params map. Multiple inserts of the same key is allowed Returntype : None Exceptions : None Status : Stable =cut sub add_param { my ($self, $key, $value) = @_; if(!exists $self->{params}->{$key}) { $self->{params}->{$key} = []; push(@{$self->{param_keys}}, $key); } push(@{$self->{params}->{$key}}, $value); return; } =head2 get_params() Arg[1] : String; key Description : Returns the values which were found to be linked to the given key. Arrays are returned because one key can have many values in a URI Returntype : ArrayRef[Scalar] Exceptions : None Status : Stable =cut sub get_params { my ($self, $key) = @_; return [] if ! exists $self->{params}->{$key}; return [@{$self->{params}->{$key}}]; } =head2 db_params() Description : Storage of parameters used only for database URIs since they require Returntype : HashRef; Database name is keyed under C<dbname> and the table is keyed under C<table> Exceptions : None Status : Stable =cut sub db_params { my ($self) = @_; return $self->{db_params}; } =head2 generate_dbsql_params() Arg[1] : boolean $no_table alows you to avoid pushing -TABLE as an output value Description : Generates a Hash of Ensembl compatible parameters to be used to construct a DB object. We combine those parameters which are deemed to be part of the C<db_params()> method under C<-DBNAME> and C<-TABLE>. We also search for a number of optional parameters which are lowercased equivalents of the construction parameters available from a L<Bio::EnsEMBL::DBSQL::DBAdaptor>, L<Bio::EnsEMBL::DBSQL::DBConnection> as well as C<verbose> being supported. We also convert the scheme type into the driver attribute Returntype : Hash (not a reference). Output can be put into a C<DBConnection> constructor. Exceptions : None Status : Stable =cut sub generate_dbsql_params { my ($self, $no_table) = @_; my %db_params; $db_params{-DRIVER} = $self->scheme(); $db_params{-HOST} = $self->host() if $self->host(); $db_params{-PORT} = $self->port() if $self->port(); $db_params{-USER} = $self->user() if $self->user(); $db_params{-PASS} = $self->pass() if $self->pass(); my $dbname; my $table; if($self->scheme() eq 'sqlite') { ($dbname, $table) = $self->_decode_sqlite(); } else { $dbname = $self->db_params()->{dbname}; $table = $self->db_params()->{table}; } $db_params{-DBNAME} = $dbname if $dbname; $db_params{-TABLE} = $table if ! $no_table && $table; foreach my $boolean_param (qw/disconnect_when_inactive reconnect_when_connection_lost is_multispecies no_cache verbose/) { if($self->param_exists_ci($boolean_param)) { $db_params{q{-}.uc($boolean_param)} = 1; } } foreach my $value_param (qw/species group species_id wait_timeout/) { if($self->param_exists_ci($value_param)) { $db_params{q{-}.uc($value_param)} = $self->get_params($value_param)->[0]; } } return %db_params; } =head2 _decode_sqlite Description : Performs path gymnastics to decode into a number of possible options. The issue with SQLite is that the normal URI scheme looks like sqlite:///my/path.sqlite/table but how do we know that the DB name is C</my/path.sqlite> and the table is C<table>? The code takes a path, looks for the full path & if it cannot be found looks for the file a directory back. In the above example it would have looked for C</my/path.sqlite/table>, found it to be non-existant, looked for C</my/path.sqlite> and found it. If the path splitting procdure resulted in just 1 file after the first existence check e.g. C<sqlite://db.sqlite> it assumes that should be the name. If no file can be found we default to the full length path. Caller : internal =cut sub _decode_sqlite { my ($self) = @_; my $dbname; my $table; my $path = $self->path(); if(-f $path) { $dbname = $path; } else { my ($volume, $directories, $file) = File::Spec->splitpath($path); my @splitdirs = File::Spec->splitdir($directories); if(@splitdirs == 1) { $dbname = $path; } else { my $new_file = pop(@splitdirs); $new_file ||= q{}; my $new_path = File::Spec->catpath($volume, File::Spec->catdir(@splitdirs), $new_file); if($new_path ne File::Spec->rootdir() && -f $new_path) { $dbname = $new_path; $table = $file; } else { $dbname = $path; } } } $self->db_params()->{dbname} = $dbname if $dbname; $self->db_params()->{table} = $table if $table; return ($dbname, $table); } =head2 generate_uri() Description : Generates a URI string from the paramaters in this object Returntype : String Exceptions : None Status : Stable =cut sub generate_uri { my ($self) = @_; my $scheme = sprintf('%s://', ($URI_ESCAPE) ? uri_escape($self->scheme()) : $self->scheme()); my $user_credentials = q{}; my $host_credentials = q{}; my $location = q{}; if($self->user() || $self->pass()) { my $user = $self->user(); my $pass = $self->pass(); if($URI_ESCAPE) { $user = uri_escape($user) if $user; $pass = uri_escape($pass) if $pass; } $user_credentials = sprintf('%s%s@', ( $user ? $user : q{} ), ( $pass ? q{:}.$pass : q{} ) ); } if($self->host() || $self->port()) { my $host = $self->host(); my $port = $self->port(); if($URI_ESCAPE) { $host = uri_escape($host) if $host; $port = uri_escape($port) if $port; } $host_credentials = sprintf('%s%s', ( $host ? $host : q{} ), ( $port ? q{:}.$port : q{} ) ); } if($self->is_db_scheme() || $self->scheme() eq '') { if($self->scheme() eq 'sqlite') { if(! $self->path()) { my $tmp_loc = $self->db_params()->{dbname}; throw "There is no dbname available" unless $tmp_loc; $tmp_loc .= q{/}.$self->db_params()->{table} if $self->db_params()->{table}; $self->path($tmp_loc); } $location = $self->path(); } else { my $dbname = $self->db_params()->{dbname}; my $table = $self->db_params()->{table}; if($dbname || $table) { if($URI_ESCAPE) { $dbname = uri_escape($dbname) if $dbname; $table = uri_escape($table) if $table; } $location = sprintf('/%s%s', ($dbname ? $dbname : q{}), ($table ? q{/}.$table : q{}) ); } } } else { $location = $self->path() if $self->path(); } my $param_string = q{}; if(@{$self->param_keys()}) { $param_string = q{?}; my @params; foreach my $key (@{$self->param_keys}) { my $values_array = $self->get_params($key); foreach my $value (@{$values_array}) { my $encoded_key = ($URI_ESCAPE) ? uri_escape($key) : $key; my $encoded_value = ($URI_ESCAPE) ? uri_escape($value) : $value; push(@params, ($encoded_value) ? "$encoded_key=$encoded_value" : $encoded_key); } } $param_string .= join(q{;}, @params); } return join(q{}, $scheme, $user_credentials, $host_credentials, $location, $param_string); } 1;