diff variant_effect_predictor/Bio/EnsEMBL/Utils/URI.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Utils/URI.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,617 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::URI
+
+=head1 SYNOPSIS
+
+  use Bio::EnsEMBL::Utils::URI qw/parse_uri is_uri/;
+  # or use Bio::EnsEMBL::Utils::URI qw/:all/; # to bring everything in
+
+  my $db_uri = parse_uri('mysql://user@host:3157/db');
+  my $http_uri = parse_uri('http://www.google.co.uk:80/search?q=t');
+  
+  is_uri('mysql://user@host'); # returns 1
+  is_uri('file:///my/path'); # returns 1
+  is_uri('/my/path'); # returns 0
+
+=head1 DESCRIPTION
+
+This object is a generic URI parser which is primarily used in the
+parsing of database URIs into a more managable data structure. We also provide
+the resulting URI object
+
+=head1 DEPENDENCIES
+
+L<URI::Escape> is an optional dependency but if available the code will attempt
+to perform URI encoding/decoding on parameters. If you do not want this 
+functionality then modify the global C<$Bio::EnsEMBL::Utils::URI::URI_ESCAPE>
+to false;
+
+=head1 METHODS
+
+=cut
+
+package Bio::EnsEMBL::Utils::URI;
+
+use strict;
+use warnings;
+
+use Scalar::Util qw/looks_like_number/;
+use Bio::EnsEMBL::Utils::Exception qw(throw);
+use File::Spec;
+
+our $URI_ESCAPE;
+$URI_ESCAPE = 0;
+eval {
+  require URI::Escape;
+  URI::Escape->import();
+  $URI_ESCAPE = 1;
+};
+
+use base qw/Exporter/;
+our @EXPORT_OK;
+our %EXPORT_TAGS;
+@EXPORT_OK = qw/parse_uri is_uri/;
+%EXPORT_TAGS = ( all => [@EXPORT_OK] );
+
+####URI Parsing
+
+=head2 is_uri
+
+  Arg[1]      : Scalar; URI to parse
+  Example     : is_uri('mysql://user:pass@host:415/db');
+  Description : Looks for the existence of a URI scheme to decide if this
+                is a classical URI. Whilst non-scheme based URIs can still be
+                interprited it is useful to use when you need to know if
+                you are going to work with a URI or not
+  Returntype  : Boolean
+  Caller      : General
+  Status      : Beta
+
+=cut
+
+sub is_uri {
+  my ($uri) = @_;
+  return 0 if ! $uri;
+  my $SCHEME = qr{ ([^:]*) :// }xms;
+  return ($uri =~ $SCHEME) ? 1 : 0;
+}
+
+=head2 parse_uri
+
+  Arg[1]      : Scalar; URI to parse
+  Example     : my $uri = parse_uri('mysql://user:pass@host:415/db');
+  Description : A URL parser which attempts to convert many different types
+                of URL into a common data structure.
+  Returntype  : Bio::EnsEMBL::Utils::URI
+  Caller      : General
+  Status      : Beta
+
+=cut
+
+sub parse_uri {
+  my ($url) = @_;
+
+  my $SCHEME = qr{ ([^:]*) :// }xms;
+  my $USER = qr{ ([^/:\@]+)? :? ([^/\@]+)? \@ }xms;
+  my $HOST = qr{ ([^/:]+)? :? ([^/]+)? }xms;
+  my $DB = qr{ / ([^/?]+)? /? ([^/?]+)? }xms;
+  my $PARAMS = qr{ \? (.+)}xms;
+
+  my $p;
+
+  if($url =~ qr{ $SCHEME ([^?]+) (?:$PARAMS)? }xms) {
+    my $scheme = $1;
+    $scheme = ($URI_ESCAPE) ? uri_unescape($scheme) : $scheme;
+    $p = Bio::EnsEMBL::Utils::URI->new($scheme);
+    my ($locator, $params) = ($2, $3);
+
+    if($scheme eq 'file') {
+      $p->path($locator);
+    }
+    elsif($scheme eq 'sqlite') {
+      $p->path($locator);
+    }
+    else {
+      if($locator =~ s/^$USER//) {
+        $p->user($1);
+        $p->pass($2);
+      }
+      if($locator =~ s/^$HOST//) {
+        $p->host(($URI_ESCAPE) ? uri_unescape($1) : $1);
+        $p->port(($URI_ESCAPE) ? uri_unescape($2) : $2);
+      }
+
+      if($p->is_db_scheme() || $scheme eq q{}) {
+        if($locator =~ $DB) {
+          $p->db_params()->{dbname} = ($URI_ESCAPE) ? uri_unescape($1) : $1;
+          $p->db_params()->{table}  = ($URI_ESCAPE) ? uri_unescape($2) : $2;
+        }
+      }
+      else {
+        $p->path($locator);
+      }
+    }
+
+    if(defined $params) {
+      my @kv_pairs = split(/;|&/, $params);
+      foreach my $kv_string (@kv_pairs) {
+        my ($key, $value) = map { ($URI_ESCAPE) ? uri_unescape($_) : $_ } split(/=/, $kv_string);
+        $p->add_param($key, $value);
+      }
+    }
+  }
+
+  return $p;
+}
+
+####URI Object
+
+=pod
+
+=head2 new()
+
+  Arg[1]      : String; scheme the URI will confrom to
+  Description : New object call
+  Returntype  : Bio::EnsEMBL::Utils::URIParser::URI
+  Exceptions  : Thrown if scheme is undefined.
+  Status      : Stable
+
+=cut
+
+sub new {
+  my ($class, $scheme) = @_;
+  $class = ref($class) || $class;
+  throw "Scheme cannot be undefined. Empty string is allowed" if ! defined $scheme;
+
+  my $self = bless ({
+    params => {},
+    param_keys => [],
+    db_params => {},
+    scheme => $scheme,
+  }, $class);
+
+  return $self;
+}
+
+=head2 db_schemes()
+
+  Description: Returns a hash of scheme names known to be databases
+  Returntype : HashRef
+  Exceptions : None
+  Status     : Stable
+
+=cut
+
+sub db_schemes {
+  my ($self) = @_;
+  return {map { $_ => 1 } qw/mysql ODBC sqlite Oracle Sybase/};
+}
+
+
+=head2 is_db_scheme()
+
+  Description: Returns true if the code believes the scheme to be a Database
+  Returntype : Boolean
+  Exceptions : None
+  Status     : Stable
+
+=cut
+
+sub is_db_scheme {
+  my ($self) = @_;
+  return ( exists $self->db_schemes()->{$self->scheme()} ) ? 1 : 0;
+}
+
+=head2 scheme()
+
+  Description : Getter for the scheme attribute
+  Returntype  : String
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub scheme {
+  my ($self) = @_;
+  return $self->{scheme};
+}
+
+=head2 path()
+
+  Arg[1]      : Setter argument
+  Description : Getter/setter for the path attribute
+  Returntype  : String
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub path {
+  my ($self, $path) = @_;
+  $self->{path} = $path if defined $path;
+  return $self->{path};
+}
+
+=head2 user()
+
+  Arg[1]      : Setter argument
+  Description : Getter/setter for the user attribute
+  Returntype  : String
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub user {
+  my ($self, $user) = @_;
+  $self->{user} = $user if defined $user;
+  return $self->{user};
+}
+
+=head2 pass()
+
+  Arg[1]      : Setter argument
+  Description : Getter/setter for the password attribute
+  Returntype  : String
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub pass {
+  my ($self, $pass) = @_;
+  $self->{pass} = $pass if defined $pass;
+  return $self->{pass};
+}
+
+=head2 host()
+
+  Arg[1]      : Setter argument
+  Description : Getter/setter for the host attribute
+  Returntype  : String
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub host {
+  my ($self, $host) = @_;
+  $self->{host} = $host if defined $host;
+  return $self->{host};
+}
+
+=head2 port()
+
+  Arg[1]      : Setter argument
+  Description : Getter/setter for the port attribute
+  Returntype  : Integer
+  Exceptions  : If port is not a number, less than 1 or not a whole integer
+  Status      : Stable
+
+=cut
+
+sub port {
+  my ($self, $port) = @_;
+  if(defined $port) {
+    if(! looks_like_number($port) || $port < 1 || int($port) != $port) {
+      throw "Port $port is not a number, less than 1 or not a whole integer";
+    }
+    $self->{port} = $port if defined $port;
+  }
+  return $self->{port};
+}
+
+=head2 param_keys()
+
+  Description : Getter for the paramater map keys in the order they were first
+                seen. Keys should only appear once in this array
+  Returntype  : ArrayRef
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub param_keys {
+  my ($self) = @_;
+  return [@{$self->{param_keys}}];
+}
+
+=head2 param_exists_ci()
+
+  Arg[1]      : String; Key
+  Description : Performs a case-insensitive search for the given key
+  Returntype  : Boolean; returns true if your given key was seen
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub param_exists_ci {
+  my ($self, $key) = @_;
+  my %keys = map { uc($_) => 1 } @{$self->param_keys()};
+  return ($keys{uc($key)}) ? 1 : 0;
+}
+
+=head2 add_param()
+
+  Arg[1]      : String; key
+  Arg[1]      : Scalar; value
+  Description : Add a key/value to the params map. Multiple inserts of the same
+                key is allowed
+  Returntype  : None
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub add_param {
+  my ($self, $key, $value) = @_;
+  if(!exists $self->{params}->{$key}) {
+    $self->{params}->{$key} = [];
+    push(@{$self->{param_keys}}, $key);
+  }
+  push(@{$self->{params}->{$key}}, $value);
+  return;
+}
+
+=head2 get_params()
+
+  Arg[1]      : String; key
+  Description : Returns the values which were found to be linked to the given
+                key. Arrays are returned because one key can have many
+                values in a URI
+  Returntype  : ArrayRef[Scalar]
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub get_params {
+  my ($self, $key) = @_;
+  return [] if ! exists $self->{params}->{$key};
+  return [@{$self->{params}->{$key}}];
+}
+
+=head2 db_params()
+
+  Description : Storage of parameters used only for database URIs since
+                they require
+  Returntype  : HashRef; Database name is keyed under C<dbname> and the
+                table is keyed under C<table>
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub db_params {
+  my ($self) = @_;
+  return $self->{db_params};
+}
+
+=head2 generate_dbsql_params()
+
+  Arg[1]      : boolean $no_table alows you to avoid pushing -TABLE as an 
+                output value
+  Description : Generates a Hash of Ensembl compatible parameters to be used
+                to construct a DB object. We combine those parameters
+                which are deemed to be part of the C<db_params()> method
+                under C<-DBNAME> and C<-TABLE>. We also search for a number
+                of optional parameters which are lowercased equivalents
+                of the construction parameters available from a
+                L<Bio::EnsEMBL::DBSQL::DBAdaptor>,
+                L<Bio::EnsEMBL::DBSQL::DBConnection> as well as C<verbose>
+                being supported.
+
+                We also convert the scheme type into the driver attribute
+
+  Returntype  : Hash (not a reference). Output can be put into a C<DBConnection>
+                constructor.
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub generate_dbsql_params {
+  my ($self, $no_table) = @_;
+  my %db_params;
+
+  $db_params{-DRIVER} = $self->scheme();
+  $db_params{-HOST}   = $self->host() if $self->host();
+  $db_params{-PORT}   = $self->port() if $self->port();
+  $db_params{-USER}   = $self->user() if $self->user();
+  $db_params{-PASS}   = $self->pass() if $self->pass();
+  
+  my $dbname;
+  my $table;
+  if($self->scheme() eq 'sqlite') {
+    ($dbname, $table) = $self->_decode_sqlite();
+  }
+  else {
+    $dbname = $self->db_params()->{dbname};
+    $table = $self->db_params()->{table};
+  }
+  
+  $db_params{-DBNAME} = $dbname if $dbname;
+  $db_params{-TABLE}  = $table if ! $no_table && $table;
+
+  foreach my $boolean_param (qw/disconnect_when_inactive reconnect_when_connection_lost is_multispecies no_cache verbose/) {
+    if($self->param_exists_ci($boolean_param)) {
+      $db_params{q{-}.uc($boolean_param)} = 1;
+    }
+  }
+  foreach my $value_param (qw/species group species_id wait_timeout/) {
+    if($self->param_exists_ci($value_param)) {
+      $db_params{q{-}.uc($value_param)} = $self->get_params($value_param)->[0];
+    }
+  }
+
+  return %db_params;
+}
+
+=head2 _decode_sqlite
+
+  Description : Performs path gymnastics to decode into a number of possible
+                options. The issue with SQLite is that the normal URI scheme
+                looks like sqlite:///my/path.sqlite/table but how do we know
+                that the DB name is C</my/path.sqlite> and the table is 
+                C<table>?
+                
+                The code takes a path, looks for the full path & if it cannot
+                be found looks for the file a directory back. In the above
+                example it would have looked for C</my/path.sqlite/table>,
+                found it to be non-existant, looked for C</my/path.sqlite>
+                and found it. 
+                
+                If the path splitting procdure resulted in just 1 file after
+                the first existence check e.g. C<sqlite://db.sqlite> it assumes
+                that should be the name. If no file can be found we default to
+                the full length path.
+  Caller      : internal
+
+=cut
+
+sub _decode_sqlite {
+  my ($self) = @_;
+  my $dbname;
+  my $table;
+  my $path = $self->path();
+  if(-f $path) {
+    $dbname = $path;
+  }
+  else {
+    my ($volume, $directories, $file) = File::Spec->splitpath($path);
+    my @splitdirs = File::Spec->splitdir($directories);
+    if(@splitdirs == 1) {
+      $dbname = $path;
+    }
+    else {
+      my $new_file = pop(@splitdirs);
+      $new_file ||= q{};
+      my $new_path = File::Spec->catpath($volume, File::Spec->catdir(@splitdirs), $new_file);
+      if($new_path ne File::Spec->rootdir() && -f $new_path) {
+        $dbname = $new_path;
+        $table = $file;
+      }
+      else {
+        $dbname = $path;
+      }
+    }
+  }
+  
+  $self->db_params()->{dbname} = $dbname if $dbname;
+  $self->db_params()->{table} = $table if $table;
+  
+  return ($dbname, $table);
+}
+
+=head2 generate_uri()
+
+  Description : Generates a URI string from the paramaters in this object
+  Returntype  : String
+  Exceptions  : None
+  Status      : Stable
+
+=cut
+
+sub generate_uri {
+  my ($self) = @_;
+  my $scheme = sprintf('%s://', ($URI_ESCAPE) ? uri_escape($self->scheme()) : $self->scheme());
+  my $user_credentials = q{};
+  my $host_credentials = q{};
+  my $location = q{};
+
+  if($self->user() || $self->pass()) {
+    my $user = $self->user();
+    my $pass = $self->pass();
+    if($URI_ESCAPE) {
+      $user = uri_escape($user) if $user;
+      $pass = uri_escape($pass) if $pass;
+    }
+    $user_credentials = sprintf('%s%s@',
+      ( $user ? $user : q{} ),
+      ( $pass ? q{:}.$pass : q{} )
+    );
+  }
+
+  if($self->host() || $self->port()) {
+    my $host = $self->host();
+    my $port = $self->port();
+    if($URI_ESCAPE) {
+      $host = uri_escape($host) if $host;
+      $port = uri_escape($port) if $port;
+    }
+    $host_credentials = sprintf('%s%s',
+      ( $host ? $host : q{} ),
+      ( $port ? q{:}.$port : q{} )
+    );
+  }
+
+  if($self->is_db_scheme() || $self->scheme() eq '') {
+    if($self->scheme() eq 'sqlite') {
+      if(! $self->path()) {
+        my $tmp_loc = $self->db_params()->{dbname};
+        throw "There is no dbname available" unless $tmp_loc;
+        $tmp_loc .= q{/}.$self->db_params()->{table} if $self->db_params()->{table};
+        $self->path($tmp_loc);
+      }
+      $location = $self->path();
+    }
+    else {
+      my $dbname = $self->db_params()->{dbname};
+      my $table = $self->db_params()->{table};
+      if($dbname || $table) {
+        if($URI_ESCAPE) {
+          $dbname = uri_escape($dbname) if $dbname;
+          $table = uri_escape($table) if $table;
+        }
+        $location = sprintf('/%s%s',
+          ($dbname ? $dbname : q{}),
+          ($table ? q{/}.$table : q{})
+        );
+      }
+    }
+  }
+  else {
+    $location = $self->path() if $self->path();
+  }
+
+  my $param_string = q{};
+  if(@{$self->param_keys()}) {
+    $param_string = q{?};
+    my @params;
+    foreach my $key (@{$self->param_keys}) {
+      my $values_array = $self->get_params($key);
+      foreach my $value (@{$values_array}) {
+        my $encoded_key = ($URI_ESCAPE) ? uri_escape($key) : $key;
+        my $encoded_value = ($URI_ESCAPE) ? uri_escape($value) : $value;
+        push(@params, ($encoded_value) ? "$encoded_key=$encoded_value" : $encoded_key);
+      }
+    }
+    $param_string .= join(q{;}, @params);
+  }
+
+  return join(q{}, $scheme, $user_credentials, $host_credentials, $location, $param_string);
+}
+
+1;