comparison variant_effect_predictor/Bio/EnsEMBL/Utils/CliHelper.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1
2 =head1 LICENSE
3
4 Copyright (c) 1999-2012 The European Bioinformatics Institute and
5 Genome Research Limited. All rights reserved.
6
7 This software is distributed under a modified Apache license.
8 For license details, please see
9
10 http://www.ensembl.org/info/about/code_licence.html
11
12 =head1 CONTACT
13
14 Please email comments or questions to the public Ensembl
15 developers list at <dev@ensembl.org>.
16
17 Questions may also be sent to the Ensembl help desk at
18 <helpdesk@ensembl.org>.
19
20 =cut
21
22 =head1 NAME
23
24 Bio::EnsEMBL::Utils::CliHelper
25
26 =head1 VERSION
27
28 $Revision: 1.6 $
29
30 =head1 SYNOPSIS
31
32 use Bio::EnsEMBL::Utils::CliHelper;
33
34 my $cli = Bio::EnsEMBL::Utils::CliHelper->new();
35
36 # get the basic options for connecting to a database server
37 my $optsd = $cli->get_dba_opts();
38
39 # add the print option
40 push(@$optsd,"print|p");
41
42 # process the command line with the supplied options plus a reference to a help subroutine
43 my $opts = $cli->process_args($optsd,\&usage);
44
45 # use the command line options to get an array of database details
46 for my $db_args (@{$cli->get_dba_args_for_opts($opts)}) {
47 # use the args to create a DBA
48 my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{$db_args});
49 ...
50 }
51
52 For adding secondary databases, a prefix can be supplied. For instance, to add a second set of
53 db params prefixed with dna (-dnahost -dbport etc.) use the prefix argument with get_dba_opts and
54 get_dba_args_for_opts:
55 # get the basic options for connecting to a database server
56 my $optsd =
57 [ @{ $cli_helper->get_dba_opts() }, @{ $cli_helper->get_dba_opts('gc') } ];
58 # process the command line with the supplied options plus a help subroutine
59 my $opts = $cli_helper->process_args( $optsd, \&usage );
60 # get the dna details
61 my ($dna_dba_details) =
62 @{ $cli_helper->get_dba_args_for_opts( $opts, 1, 'dna' ) };
63 my $dna_db =
64 Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$dna_dba_details} ) );
65
66 =head1 DESCRIPTION
67
68 Utilities for a more consistent approach to parsing and handling EnsEMBL script command lines
69
70 =head1 METHODS
71
72 See subroutines.
73
74 =cut
75
76 package Bio::EnsEMBL::Utils::CliHelper;
77
78 use warnings;
79 use strict;
80
81 use Carp;
82 use Data::Dumper;
83 use Getopt::Long qw(:config auto_version no_ignore_case);
84
85 use Bio::EnsEMBL::Registry;
86 use Bio::EnsEMBL::DBSQL::DBConnection;
87 use Bio::EnsEMBL::DBSQL::DBAdaptor;
88
89 my $dba_opts =
90 [ { args => [ 'host', 'dbhost', 'h' ], type => '=s' },
91 { args => [ 'port', 'dbport', 'P' ], type => ':i' },
92 { args => [ 'user', 'dbuser', 'u' ], type => '=s' },
93 { args => [ 'pass', 'dbpass', 'p' ], type => ':s' },
94 { args => [ 'dbname', 'D' ], type => ':s' },
95 { args => [ 'pattern', 'dbpattern' ], type => ':s' },
96 { args => [ 'driver' ], type => ':s' },
97 { args => [ 'species_id' ], type => ':i' },
98 { args => [ 'species' ], type => ':i' },
99 ];
100
101 =head2 new()
102
103 Description : Construct a new instance of a CliHelper object
104 Returntype : Bio::EnsEMBL::Utils:CliHelper
105 Status : Under development
106
107 =cut
108
109 sub new {
110 my ( $class, @args ) = @_;
111 my $self = bless( {}, ref($class) || $class );
112 return $self;
113 }
114
115 =head2 get_dba_opts()
116
117 Arg [1] : Optional prefix for dbnames e.g. dna
118 Description : Retrieves the standard options for connecting to one or more Ensembl databases
119 Returntype : Arrayref of option definitions
120 Status : Under development
121
122 =cut
123
124 sub get_dba_opts {
125 my ( $self, $prefix ) = @_;
126 $prefix ||= '';
127 my @dba_opts = map {
128 my $opt = join '|', map { $prefix . $_ } @{ $_->{args} };
129 $opt . $_->{type};
130 } @{$dba_opts};
131 return \@dba_opts;
132 }
133
134 =head2 process_args()
135
136 Arg [1] : Arrayref of supported command line options (e.g. from get_dba_opts)
137 Arg [2] : Ref to subroutine to be invoked when -help or -? is supplied
138 Description : Retrieves the standard options for connecting to one or more Ensembl databases
139 Returntype : Hashref of parsed options
140 Status : Under development
141
142 =cut
143
144 sub process_args {
145 my ( $self, $opts_def, $usage_sub ) = @_;
146 my $opts = {};
147 push @{$opts_def}, q/help|?/ => $usage_sub;
148 GetOptions( $opts, @{$opts_def} )
149 || croak 'Could not parse command line arguments';
150 return $opts;
151 }
152
153 =head2 get_dba_args_for_opts()
154
155 Arg [1] : Hash of options (e.g. parsed from command line options by process_args())
156 Arg [2] : If set to 1, the databases are assumed to have a single species only. Default is 0.
157 Arg [3] : Optional prefix to use when parsing e.g. dna
158 Description : Uses the parsed command line options to generate an array of DBAdaptor arguments
159 : (e.g. expands dbpattern, finds all species_ids for multispecies databases)
160 : These can then be passed directly to Bio::EnsEMBL::DBSQL::DBAdaptor->new()
161 Returntype : Arrayref of DBA argument hash refs
162 Status : Under development
163
164 =cut
165
166 sub get_dba_args_for_opts {
167 my ( $self, $opts, $single_species, $prefix ) = @_;
168 $prefix ||= '';
169 $single_species ||= 0;
170 my ( $host, $port, $user, $pass, $dbname, $pattern, $driver ) =
171 map { $prefix . $_ } qw(host port user pass dbname pattern driver);
172 my @db_args;
173 if ( defined $opts->{$host} ) {
174 my $dbc =
175 Bio::EnsEMBL::DBSQL::DBConnection->new( -USER => $opts->{$user},
176 -PASS => $opts->{$pass},
177 -HOST => $opts->{$host},
178 -PORT => $opts->{$port},
179 -DRIVER => $opts->{$driver} );
180 my @dbnames;
181 if ( defined $opts->{$dbname} ) {
182 push @dbnames, $opts->{$dbname};
183 } elsif ( defined $opts->{$pattern} ) {
184 # get a basic DBConnection and use to find out which dbs are involved
185 @dbnames =
186 grep { m/$opts->{pattern}/smx }
187 @{ $dbc->sql_helper()->execute_simple(q/SHOW DATABASES/) };
188 } else {
189 print Dumper($opts);
190 croak 'dbname or dbpattern arguments required';
191 }
192 for my $dbname (@dbnames) {
193
194 #Decipher group of DBAdaptor by capturing the name_name(_name?)_core_ code. Otherwise we don't know
195 my ($group) = $dbname =~ /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_([a-z]+)(?:_\d+)?_\d+/;
196
197 my $multi = 0;
198 my $species_ids = [ [ 1, undef ] ];
199 if ( !$single_species ) {
200 $species_ids =
201 $dbc->sql_helper()
202 ->execute(
203 "SELECT species_id,meta_value FROM $dbname.meta WHERE meta_key='species.production_name'"
204 );
205 if ( scalar( @{$species_ids} ) > 1 ) {
206 $multi = 1;
207 }
208 if ( defined $opts->{species_id} ) {
209 $species_ids =
210 [ [ $opts->{species_id}, $opts->{species} ] ];
211 }
212 }
213 for my $species_id ( @{$species_ids} ) {
214 my $args = {
215 -HOST => $opts->{$host},
216 -USER => $opts->{$user},
217 -PORT => $opts->{$port},
218 -PASS => $opts->{$pass},
219 -DBNAME => $dbname,
220 -DRIVER => $opts->{$driver},
221 -SPECIES_ID => $species_id->[0],
222 -SPECIES => $species_id->[1],
223 -MULTISPECIES_DB => $multi };
224 $args->{-GROUP} = $group if $group;
225 push(@db_args, $args);
226 }
227 }
228 } ## end if ( defined $opts->{$host...})
229 else {
230 croak '(db)host arguments required';
231 }
232 return \@db_args;
233 } ## end sub get_dba_args_for_opts
234
235 =head2 get_dba_args_for_opts()
236
237 Arg [1] : Hash of options (e.g. parsed from command line options by process_args())
238 Arg [2] : If set to 1, the databases are assumed to have a single species only. Default is 0.
239 Arg [3] : Optional prefix to use when parsing e.g. dna
240 Description : Uses the parsed command line options to generate an array DBAdaptors.
241 : Note this can overload connections on a server
242 Returntype : Arrayref of Bio::EnsEMBL::DBSQL::DBAdaptor
243 Status : Under development
244
245 =cut
246
247 sub get_dbas_for_opts {
248 my ( $self, $opts, $single_species, $prefix ) = @_;
249
250 # get all the DBA details that we want to work with and create DBAs for each in turn
251 my $dbas;
252 for my $args (
253 @{ $self->get_dba_args_for_opts( $opts, $single_species, $prefix ) } )
254 {
255 push @{$dbas}, Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$args} );
256 }
257 return $dbas;
258 }
259
260 =head2 load_registry_for_opts
261
262 Arg [1] : Hash of options (e.g. parsed from command line options by process_args())
263 Arg [2] : Optional prefix to use when parsing e.g. dna or master
264 Description : Loads a Registry from the given options hash. If a C<registry>
265 option is given then the code will call C<load_all>. Otherwise
266 we use the database parameters given to call
267 C<load_registry_from_db()>.
268 Returntype : Integer of the number of DBAdaptors loaded
269 Status : Under development
270
271 =cut
272
273 sub load_registry_for_opts {
274 my ($self, $opts, $prefix) = @_;
275 $prefix ||= q{};
276 if($opts->{registry}) {
277 my $location = $opts->{registry};
278 return Bio::EnsEMBL::Registry->load_all($location);
279 }
280 my ( $host, $port, $user, $pass ) = map { $prefix . $_ } qw(host port user pass);
281 my %args = (
282 -HOST => $opts->{$host},
283 -PORT => $opts->{$port},
284 -USER => $opts->{$user},
285 );
286 $args{-PASS} = $opts->{$pass};
287 return Bio::EnsEMBL::Registry->load_registry_from_db(%args);
288 }
289
290 1;