diff variant_effect_predictor/Bio/EnsEMBL/Utils/CliHelper.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Utils/CliHelper.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,290 @@
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::CliHelper
+
+=head1 VERSION
+
+$Revision: 1.6 $
+
+=head1 SYNOPSIS
+
+  use Bio::EnsEMBL::Utils::CliHelper;
+
+  my $cli = Bio::EnsEMBL::Utils::CliHelper->new();
+
+  # get the basic options for connecting to a database server
+  my $optsd = $cli->get_dba_opts();
+
+  # add the print option
+  push(@$optsd,"print|p");
+
+  # process the command line with the supplied options plus a reference to a help subroutine
+  my $opts = $cli->process_args($optsd,\&usage);
+  
+  # use the command line options to get an array of database details
+  for my $db_args (@{$cli->get_dba_args_for_opts($opts)}) {
+    # use the args to create a DBA
+    my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{$db_args});
+    ...
+  }
+  
+  For adding secondary databases, a prefix can be supplied. For instance, to add a second set of
+  db params prefixed with dna (-dnahost -dbport etc.) use the prefix argument with get_dba_opts and 
+  get_dba_args_for_opts:
+  # get the basic options for connecting to a database server
+  my $optsd =
+   [ @{ $cli_helper->get_dba_opts() }, @{ $cli_helper->get_dba_opts('gc') } ];
+  # process the command line with the supplied options plus a help subroutine
+  my $opts = $cli_helper->process_args( $optsd, \&usage );
+  # get the dna details
+  my ($dna_dba_details) =
+    @{ $cli_helper->get_dba_args_for_opts( $opts, 1, 'dna' ) };
+  my $dna_db =
+    Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$dna_dba_details} ) );
+
+=head1 DESCRIPTION
+
+Utilities for a more consistent approach to parsing and handling EnsEMBL script command lines
+
+=head1 METHODS
+
+See subroutines.
+
+=cut
+
+package Bio::EnsEMBL::Utils::CliHelper;
+
+use warnings;
+use strict;
+
+use Carp;
+use Data::Dumper;
+use Getopt::Long qw(:config auto_version no_ignore_case);
+
+use Bio::EnsEMBL::Registry;
+use Bio::EnsEMBL::DBSQL::DBConnection;
+use Bio::EnsEMBL::DBSQL::DBAdaptor;
+
+my $dba_opts =
+  [ { args => [ 'host', 'dbhost', 'h'  ], type => '=s' },
+    { args => [ 'port', 'dbport', 'P'  ], type => ':i' },
+    { args => [ 'user', 'dbuser', 'u'  ], type => '=s' },
+    { args => [ 'pass', 'dbpass', 'p'  ], type => ':s' },
+    { args => [         'dbname', 'D'  ], type => ':s' },
+    { args => [ 'pattern', 'dbpattern' ], type => ':s' },
+    { args => [ 'driver'               ], type => ':s' },
+    { args => [ 'species_id'           ], type => ':i' },
+    { args => [ 'species'              ], type => ':i' },
+  ];
+
+=head2 new()
+
+  Description : Construct a new instance of a CliHelper object
+  Returntype  : Bio::EnsEMBL::Utils:CliHelper
+  Status      : Under development
+
+=cut
+
+sub new {
+	my ( $class, @args ) = @_;
+	my $self = bless( {}, ref($class) || $class );
+	return $self;
+}
+
+=head2 get_dba_opts()
+
+  Arg [1]     : Optional prefix for dbnames e.g. dna
+  Description : Retrieves the standard options for connecting to one or more Ensembl databases
+  Returntype  : Arrayref of option definitions
+  Status      : Under development
+
+=cut
+
+sub get_dba_opts {
+	my ( $self, $prefix ) = @_;
+	$prefix ||= '';
+	my @dba_opts = map {
+		my $opt = join '|', map { $prefix . $_ } @{ $_->{args} };
+		$opt . $_->{type};
+	} @{$dba_opts};
+	return \@dba_opts;
+}
+
+=head2 process_args()
+
+    Arg [1]     : Arrayref of supported command line options (e.g. from get_dba_opts)
+    Arg [2]     : Ref to subroutine to be invoked when -help or -? is supplied
+    Description : Retrieves the standard options for connecting to one or more Ensembl databases
+    Returntype  : Hashref of parsed options
+    Status      : Under development
+
+=cut
+
+sub process_args {
+	my ( $self, $opts_def, $usage_sub ) = @_;
+	my $opts = {};
+	push @{$opts_def}, q/help|?/ => $usage_sub;
+	GetOptions( $opts, @{$opts_def} )
+	  || croak 'Could not parse command line arguments';
+	return $opts;
+}
+
+=head2 get_dba_args_for_opts()
+
+    Arg [1]     : Hash of options (e.g. parsed from command line options by process_args())
+    Arg [2]     : If set to 1, the databases are assumed to have a single species only. Default is 0.
+    Arg [3]     : Optional prefix to use when parsing e.g. dna
+    Description : Uses the parsed command line options to generate an array of DBAdaptor arguments 
+                : (e.g. expands dbpattern, finds all species_ids for multispecies databases)
+                : These can then be passed directly to Bio::EnsEMBL::DBSQL::DBAdaptor->new()
+    Returntype  : Arrayref of DBA argument hash refs 
+    Status      : Under development
+
+=cut
+
+sub get_dba_args_for_opts {
+	my ( $self, $opts, $single_species, $prefix ) = @_;
+	$prefix         ||= '';
+	$single_species ||= 0;
+	my ( $host, $port, $user, $pass, $dbname, $pattern, $driver ) =
+	  map { $prefix . $_ } qw(host port user pass dbname pattern driver);
+	my @db_args;
+	if ( defined $opts->{$host} ) {
+		my $dbc =
+		  Bio::EnsEMBL::DBSQL::DBConnection->new( -USER   => $opts->{$user},
+                                                          -PASS   => $opts->{$pass},
+                                                          -HOST   => $opts->{$host},
+                                                          -PORT   => $opts->{$port},
+                                                          -DRIVER => $opts->{$driver} );
+		my @dbnames;
+		if ( defined $opts->{$dbname} ) {
+			push @dbnames, $opts->{$dbname};
+		} elsif ( defined $opts->{$pattern} ) {
+		   # get a basic DBConnection and use to find out which dbs are involved
+			@dbnames =
+			  grep { m/$opts->{pattern}/smx }
+			  @{ $dbc->sql_helper()->execute_simple(q/SHOW DATABASES/) };
+		} else {
+			print Dumper($opts);
+			croak 'dbname or dbpattern arguments required';
+		}
+		for my $dbname (@dbnames) {
+
+      #Decipher group of DBAdaptor by capturing the name_name(_name?)_core_ code. Otherwise we don't know 
+      my ($group) = $dbname =~ /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_([a-z]+)(?:_\d+)?_\d+/;
+            
+			my $multi = 0;
+			my $species_ids = [ [ 1, undef ] ];
+			if ( !$single_species ) {
+				$species_ids =
+				  $dbc->sql_helper()
+				  ->execute(
+"SELECT species_id,meta_value FROM $dbname.meta WHERE meta_key='species.production_name'"
+				  );
+				if ( scalar( @{$species_ids} ) > 1 ) {
+					$multi = 1;
+				}
+				if ( defined $opts->{species_id} ) {
+					$species_ids =
+					  [ [ $opts->{species_id}, $opts->{species} ] ];
+				}
+			}
+			for my $species_id ( @{$species_ids} ) {
+				my $args = {
+					-HOST            => $opts->{$host},
+					-USER            => $opts->{$user},
+					-PORT            => $opts->{$port},
+					-PASS            => $opts->{$pass},
+					-DBNAME          => $dbname,
+					-DRIVER          => $opts->{$driver},
+					-SPECIES_ID      => $species_id->[0],
+					-SPECIES         => $species_id->[1],
+					-MULTISPECIES_DB => $multi };
+				$args->{-GROUP} = $group if $group;
+				push(@db_args, $args);
+			}
+		}
+	} ## end if ( defined $opts->{$host...})
+        else {
+            croak '(db)host arguments required';
+        }
+	return \@db_args;
+} ## end sub get_dba_args_for_opts
+
+=head2 get_dba_args_for_opts()
+
+    Arg [1]     : Hash of options (e.g. parsed from command line options by process_args())
+    Arg [2]     : If set to 1, the databases are assumed to have a single species only. Default is 0.
+    Arg [3]     : Optional prefix to use when parsing e.g. dna
+    Description : Uses the parsed command line options to generate an array DBAdaptors. 
+                : Note this can overload connections on a server
+    Returntype  : Arrayref of Bio::EnsEMBL::DBSQL::DBAdaptor
+    Status      : Under development
+
+=cut
+
+sub get_dbas_for_opts {
+	my ( $self, $opts, $single_species, $prefix ) = @_;
+
+# get all the DBA details that we want to work with and create DBAs for each in turn
+	my $dbas;
+	for my $args (
+		  @{ $self->get_dba_args_for_opts( $opts, $single_species, $prefix ) } )
+	{
+		push @{$dbas}, Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$args} );
+	}
+	return $dbas;
+}
+
+=head2 load_registry_for_opts
+
+  Arg [1]    	: Hash of options (e.g. parsed from command line options by process_args()) 
+  Arg [2]     : Optional prefix to use when parsing e.g. dna or master 
+  Description	: Loads a Registry from the given options hash. If a C<registry> 
+                option is given then the code will call C<load_all>. Otherwise
+                we use the database parameters given to call 
+                C<load_registry_from_db()>.
+  Returntype  : Integer of the number of DBAdaptors loaded
+  Status      : Under development
+
+=cut
+
+sub load_registry_for_opts {
+  my ($self, $opts, $prefix) = @_;
+  $prefix ||= q{};
+  if($opts->{registry}) {
+    my $location = $opts->{registry};
+    return Bio::EnsEMBL::Registry->load_all($location);
+  }
+  my ( $host, $port, $user, $pass ) = map { $prefix . $_ } qw(host port user pass);
+  my %args = (
+    -HOST => $opts->{$host},
+    -PORT => $opts->{$port},
+    -USER => $opts->{$user},
+  );
+  $args{-PASS} = $opts->{$pass};
+  return Bio::EnsEMBL::Registry->load_registry_from_db(%args);
+}
+
+1;