Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Utils/CliHelper.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 | |
2 =head1 LICENSE | |
3 | |
4 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
5 Genome Research Limited. All rights reserved. | |
6 | |
7 This software is distributed under a modified Apache license. | |
8 For license details, please see | |
9 | |
10 http://www.ensembl.org/info/about/code_licence.html | |
11 | |
12 =head1 CONTACT | |
13 | |
14 Please email comments or questions to the public Ensembl | |
15 developers list at <dev@ensembl.org>. | |
16 | |
17 Questions may also be sent to the Ensembl help desk at | |
18 <helpdesk@ensembl.org>. | |
19 | |
20 =cut | |
21 | |
22 =head1 NAME | |
23 | |
24 Bio::EnsEMBL::Utils::CliHelper | |
25 | |
26 =head1 VERSION | |
27 | |
28 $Revision: 1.6 $ | |
29 | |
30 =head1 SYNOPSIS | |
31 | |
32 use Bio::EnsEMBL::Utils::CliHelper; | |
33 | |
34 my $cli = Bio::EnsEMBL::Utils::CliHelper->new(); | |
35 | |
36 # get the basic options for connecting to a database server | |
37 my $optsd = $cli->get_dba_opts(); | |
38 | |
39 # add the print option | |
40 push(@$optsd,"print|p"); | |
41 | |
42 # process the command line with the supplied options plus a reference to a help subroutine | |
43 my $opts = $cli->process_args($optsd,\&usage); | |
44 | |
45 # use the command line options to get an array of database details | |
46 for my $db_args (@{$cli->get_dba_args_for_opts($opts)}) { | |
47 # use the args to create a DBA | |
48 my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{$db_args}); | |
49 ... | |
50 } | |
51 | |
52 For adding secondary databases, a prefix can be supplied. For instance, to add a second set of | |
53 db params prefixed with dna (-dnahost -dbport etc.) use the prefix argument with get_dba_opts and | |
54 get_dba_args_for_opts: | |
55 # get the basic options for connecting to a database server | |
56 my $optsd = | |
57 [ @{ $cli_helper->get_dba_opts() }, @{ $cli_helper->get_dba_opts('gc') } ]; | |
58 # process the command line with the supplied options plus a help subroutine | |
59 my $opts = $cli_helper->process_args( $optsd, \&usage ); | |
60 # get the dna details | |
61 my ($dna_dba_details) = | |
62 @{ $cli_helper->get_dba_args_for_opts( $opts, 1, 'dna' ) }; | |
63 my $dna_db = | |
64 Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$dna_dba_details} ) ); | |
65 | |
66 =head1 DESCRIPTION | |
67 | |
68 Utilities for a more consistent approach to parsing and handling EnsEMBL script command lines | |
69 | |
70 =head1 METHODS | |
71 | |
72 See subroutines. | |
73 | |
74 =cut | |
75 | |
76 package Bio::EnsEMBL::Utils::CliHelper; | |
77 | |
78 use warnings; | |
79 use strict; | |
80 | |
81 use Carp; | |
82 use Data::Dumper; | |
83 use Getopt::Long qw(:config auto_version no_ignore_case); | |
84 | |
85 use Bio::EnsEMBL::Registry; | |
86 use Bio::EnsEMBL::DBSQL::DBConnection; | |
87 use Bio::EnsEMBL::DBSQL::DBAdaptor; | |
88 | |
89 my $dba_opts = | |
90 [ { args => [ 'host', 'dbhost', 'h' ], type => '=s' }, | |
91 { args => [ 'port', 'dbport', 'P' ], type => ':i' }, | |
92 { args => [ 'user', 'dbuser', 'u' ], type => '=s' }, | |
93 { args => [ 'pass', 'dbpass', 'p' ], type => ':s' }, | |
94 { args => [ 'dbname', 'D' ], type => ':s' }, | |
95 { args => [ 'pattern', 'dbpattern' ], type => ':s' }, | |
96 { args => [ 'driver' ], type => ':s' }, | |
97 { args => [ 'species_id' ], type => ':i' }, | |
98 { args => [ 'species' ], type => ':i' }, | |
99 ]; | |
100 | |
101 =head2 new() | |
102 | |
103 Description : Construct a new instance of a CliHelper object | |
104 Returntype : Bio::EnsEMBL::Utils:CliHelper | |
105 Status : Under development | |
106 | |
107 =cut | |
108 | |
109 sub new { | |
110 my ( $class, @args ) = @_; | |
111 my $self = bless( {}, ref($class) || $class ); | |
112 return $self; | |
113 } | |
114 | |
115 =head2 get_dba_opts() | |
116 | |
117 Arg [1] : Optional prefix for dbnames e.g. dna | |
118 Description : Retrieves the standard options for connecting to one or more Ensembl databases | |
119 Returntype : Arrayref of option definitions | |
120 Status : Under development | |
121 | |
122 =cut | |
123 | |
124 sub get_dba_opts { | |
125 my ( $self, $prefix ) = @_; | |
126 $prefix ||= ''; | |
127 my @dba_opts = map { | |
128 my $opt = join '|', map { $prefix . $_ } @{ $_->{args} }; | |
129 $opt . $_->{type}; | |
130 } @{$dba_opts}; | |
131 return \@dba_opts; | |
132 } | |
133 | |
134 =head2 process_args() | |
135 | |
136 Arg [1] : Arrayref of supported command line options (e.g. from get_dba_opts) | |
137 Arg [2] : Ref to subroutine to be invoked when -help or -? is supplied | |
138 Description : Retrieves the standard options for connecting to one or more Ensembl databases | |
139 Returntype : Hashref of parsed options | |
140 Status : Under development | |
141 | |
142 =cut | |
143 | |
144 sub process_args { | |
145 my ( $self, $opts_def, $usage_sub ) = @_; | |
146 my $opts = {}; | |
147 push @{$opts_def}, q/help|?/ => $usage_sub; | |
148 GetOptions( $opts, @{$opts_def} ) | |
149 || croak 'Could not parse command line arguments'; | |
150 return $opts; | |
151 } | |
152 | |
153 =head2 get_dba_args_for_opts() | |
154 | |
155 Arg [1] : Hash of options (e.g. parsed from command line options by process_args()) | |
156 Arg [2] : If set to 1, the databases are assumed to have a single species only. Default is 0. | |
157 Arg [3] : Optional prefix to use when parsing e.g. dna | |
158 Description : Uses the parsed command line options to generate an array of DBAdaptor arguments | |
159 : (e.g. expands dbpattern, finds all species_ids for multispecies databases) | |
160 : These can then be passed directly to Bio::EnsEMBL::DBSQL::DBAdaptor->new() | |
161 Returntype : Arrayref of DBA argument hash refs | |
162 Status : Under development | |
163 | |
164 =cut | |
165 | |
166 sub get_dba_args_for_opts { | |
167 my ( $self, $opts, $single_species, $prefix ) = @_; | |
168 $prefix ||= ''; | |
169 $single_species ||= 0; | |
170 my ( $host, $port, $user, $pass, $dbname, $pattern, $driver ) = | |
171 map { $prefix . $_ } qw(host port user pass dbname pattern driver); | |
172 my @db_args; | |
173 if ( defined $opts->{$host} ) { | |
174 my $dbc = | |
175 Bio::EnsEMBL::DBSQL::DBConnection->new( -USER => $opts->{$user}, | |
176 -PASS => $opts->{$pass}, | |
177 -HOST => $opts->{$host}, | |
178 -PORT => $opts->{$port}, | |
179 -DRIVER => $opts->{$driver} ); | |
180 my @dbnames; | |
181 if ( defined $opts->{$dbname} ) { | |
182 push @dbnames, $opts->{$dbname}; | |
183 } elsif ( defined $opts->{$pattern} ) { | |
184 # get a basic DBConnection and use to find out which dbs are involved | |
185 @dbnames = | |
186 grep { m/$opts->{pattern}/smx } | |
187 @{ $dbc->sql_helper()->execute_simple(q/SHOW DATABASES/) }; | |
188 } else { | |
189 print Dumper($opts); | |
190 croak 'dbname or dbpattern arguments required'; | |
191 } | |
192 for my $dbname (@dbnames) { | |
193 | |
194 #Decipher group of DBAdaptor by capturing the name_name(_name?)_core_ code. Otherwise we don't know | |
195 my ($group) = $dbname =~ /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_([a-z]+)(?:_\d+)?_\d+/; | |
196 | |
197 my $multi = 0; | |
198 my $species_ids = [ [ 1, undef ] ]; | |
199 if ( !$single_species ) { | |
200 $species_ids = | |
201 $dbc->sql_helper() | |
202 ->execute( | |
203 "SELECT species_id,meta_value FROM $dbname.meta WHERE meta_key='species.production_name'" | |
204 ); | |
205 if ( scalar( @{$species_ids} ) > 1 ) { | |
206 $multi = 1; | |
207 } | |
208 if ( defined $opts->{species_id} ) { | |
209 $species_ids = | |
210 [ [ $opts->{species_id}, $opts->{species} ] ]; | |
211 } | |
212 } | |
213 for my $species_id ( @{$species_ids} ) { | |
214 my $args = { | |
215 -HOST => $opts->{$host}, | |
216 -USER => $opts->{$user}, | |
217 -PORT => $opts->{$port}, | |
218 -PASS => $opts->{$pass}, | |
219 -DBNAME => $dbname, | |
220 -DRIVER => $opts->{$driver}, | |
221 -SPECIES_ID => $species_id->[0], | |
222 -SPECIES => $species_id->[1], | |
223 -MULTISPECIES_DB => $multi }; | |
224 $args->{-GROUP} = $group if $group; | |
225 push(@db_args, $args); | |
226 } | |
227 } | |
228 } ## end if ( defined $opts->{$host...}) | |
229 else { | |
230 croak '(db)host arguments required'; | |
231 } | |
232 return \@db_args; | |
233 } ## end sub get_dba_args_for_opts | |
234 | |
235 =head2 get_dba_args_for_opts() | |
236 | |
237 Arg [1] : Hash of options (e.g. parsed from command line options by process_args()) | |
238 Arg [2] : If set to 1, the databases are assumed to have a single species only. Default is 0. | |
239 Arg [3] : Optional prefix to use when parsing e.g. dna | |
240 Description : Uses the parsed command line options to generate an array DBAdaptors. | |
241 : Note this can overload connections on a server | |
242 Returntype : Arrayref of Bio::EnsEMBL::DBSQL::DBAdaptor | |
243 Status : Under development | |
244 | |
245 =cut | |
246 | |
247 sub get_dbas_for_opts { | |
248 my ( $self, $opts, $single_species, $prefix ) = @_; | |
249 | |
250 # get all the DBA details that we want to work with and create DBAs for each in turn | |
251 my $dbas; | |
252 for my $args ( | |
253 @{ $self->get_dba_args_for_opts( $opts, $single_species, $prefix ) } ) | |
254 { | |
255 push @{$dbas}, Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$args} ); | |
256 } | |
257 return $dbas; | |
258 } | |
259 | |
260 =head2 load_registry_for_opts | |
261 | |
262 Arg [1] : Hash of options (e.g. parsed from command line options by process_args()) | |
263 Arg [2] : Optional prefix to use when parsing e.g. dna or master | |
264 Description : Loads a Registry from the given options hash. If a C<registry> | |
265 option is given then the code will call C<load_all>. Otherwise | |
266 we use the database parameters given to call | |
267 C<load_registry_from_db()>. | |
268 Returntype : Integer of the number of DBAdaptors loaded | |
269 Status : Under development | |
270 | |
271 =cut | |
272 | |
273 sub load_registry_for_opts { | |
274 my ($self, $opts, $prefix) = @_; | |
275 $prefix ||= q{}; | |
276 if($opts->{registry}) { | |
277 my $location = $opts->{registry}; | |
278 return Bio::EnsEMBL::Registry->load_all($location); | |
279 } | |
280 my ( $host, $port, $user, $pass ) = map { $prefix . $_ } qw(host port user pass); | |
281 my %args = ( | |
282 -HOST => $opts->{$host}, | |
283 -PORT => $opts->{$port}, | |
284 -USER => $opts->{$user}, | |
285 ); | |
286 $args{-PASS} = $opts->{$pass}; | |
287 return Bio::EnsEMBL::Registry->load_registry_from_db(%args); | |
288 } | |
289 | |
290 1; |