comparison variant_effect_predictor/Bio/EnsEMBL/Registry.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Registry
24
25 =head1 SYNOPSIS
26
27 use Bio::EnsEMBL::Registry;
28
29 my $registry = 'Bio::EnsEMBL::Registry';
30
31 $registry->load_all("configuration_file");
32
33 $gene_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Gene' );
34
35 =head1 DESCRIPTION
36
37 All Adaptors are stored/registered using this module. This module should
38 then be used to get the adaptors needed.
39
40 The registry can be loaded from a configuration file using the load_all
41 method.
42
43 If a filename is passed to load_all then this is used. Else if the
44 environment variable ENSEMBL_REGISTRY is set to the name on an existing
45 configuration file, then this is used. Else if the file .ensembl_init
46 in your home directory exist, it is used.
47
48 For the Web server ENSEMBL_REGISTRY should be set in SiteDefs.pm. This
49 will then be passed on to load_all.
50
51
52 The registry can also be loaded via the method load_registry_from_db
53 which given a database host will load the latest versions of the Ensembl
54 databases from it.
55
56 The four types of registries are for db adaptors, dba adaptors, dna
57 adaptors and the standard type.
58
59 =head2 db
60
61 These are registries for backwards compatibility and enable the
62 subroutines to add other adaptors to connections.
63
64 e.g. get_all_db_adaptors, get_db_adaptor, add_db_adaptor,
65 remove_db_adaptor are the old DBAdaptor subroutines which are now
66 redirected to the Registry.
67
68 So if before we had
69
70 my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
71
72 We now want to change this to
73
74 my $sfa =
75 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "blast" );
76
77
78 =head2 DBA
79
80 These are the stores for the DBAdaptors
81
82 The Registry will create all the DBConnections needed now if you set up
83 the configuration correctly. So instead of the old commands like
84
85 my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);
86 my $exon_adaptor = $db->get_ExonAdaptor;
87
88 we should now have just
89
90 my $exon_adaptor =
91 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "exon" );
92
93
94 =head2 DNA
95
96 This is an internal Registry and allows the configuration of a dnadb.
97 An example here is to set the est database to get its dna data from the
98 core database.
99
100 ## set the est db to use the core for getting dna data.
101 # Bio::EnsEMBL::Utils::ConfigRegistry->dnadb_add( "Homo Sapiens",
102 # "core", "Homo Sapiens", "est" );
103
104
105 =head2 adaptors
106
107 This is the registry for all the general types of adaptors like
108 GeneAdaptor, ExonAdaptor, Slice Adaptor etc.
109
110 These are accessed by the get_adaptor subroutine i.e.
111
112 my $exon_adaptor =
113 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "exon" );
114
115 =head1 METHODS
116
117 =cut
118
119
120
121 package Bio::EnsEMBL::Registry;
122 use strict;
123 use warnings;
124
125 our $NEW_EVAL = 0;
126
127 use Bio::EnsEMBL::DBSQL::DBAdaptor;
128 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
129 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
130 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
131 use Bio::EnsEMBL::Utils::ConfigRegistry;
132 use Bio::EnsEMBL::ApiVersion;
133 use Bio::EnsEMBL::Utils::URI qw/parse_uri/;
134
135 use DBI qw(:sql_types);
136
137 use vars qw(%registry_register);
138
139 # This is a map from group names to Ensembl DB adaptors. Used by
140 # load_all() and reset_DBAdaptor().
141 my %group2adaptor = (
142 'blast' => 'Bio::EnsEMBL::External::BlastAdaptor',
143 'compara' => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor',
144 'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
145 'estgene' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
146 'funcgen' => 'Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor',
147 'regulation' => 'Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor',
148 'haplotype' => 'Bio::EnsEMBL::ExternalData::Haplotype::DBAdaptor',
149 'hive' => 'Bio::EnsEMBL::Hive::DBSQL::DBAdaptor',
150 'ontology' => 'Bio::EnsEMBL::DBSQL::OntologyDBAdaptor',
151 'otherfeatures' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
152 'pipeline' => 'Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor',
153 'snp' => 'Bio::EnsEMBL::ExternalData::SNPSQL::DBAdaptor',
154 'stable_ids' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
155 'variation' => 'Bio::EnsEMBL::Variation::DBSQL::DBAdaptor',
156 'vega' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
157 'vega_update' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
158 );
159
160
161 =head2 load_all
162
163 Will load the registry with the configuration file which is
164 obtained from the first in the following and in that order.
165
166 1) If an argument is passed to this method, this is used as the
167 name of the configuration file to read.
168
169 2) If the environment variable ENSEMBL_REGISTRY is set, this is
170 used as the name of the configuration file to read.
171
172 3) If the file .ensembl_init exist in the home directory, it is
173 used as the configuration file.
174
175 Arg [1] : (optional) string
176 Name of file to load the registry from.
177
178 Arg [2] : (optional) integer
179 If not 0, will print out all information.
180
181 Arg [3] : (optional) integer
182 If not 0, the database connection will not be
183 cleared, if 0 or if not set the database connections
184 will be cleared (this is the default).
185
186 Arg [4]: (optional) boolean
187 This option will turn off caching for slice features,
188 so, every time a set of features is retrieved,
189 they will come from the database instead of the
190 cache. This option is only recommended for advanced
191 users, specially if you need to store and retrieve
192 features. It might reduce performance when querying
193 the database if not used properly. If in doubt, do
194 not use it or ask in the developer mailing list.
195
196 Example : Bio::EnsEMBL::Registry->load_all();
197 Returntype : Int count of the DBAdaptor instances which can be found in the
198 registry due to this method being called. Will never be negative
199 Exceptions : none
200 Status : Stable
201
202 =cut
203
204 sub load_all {
205 my ($class, $config_file, $verbose, $no_clear, $no_cache ) = @_;
206
207 if ( !defined($config_file) ) {
208 if ( defined( $ENV{ENSEMBL_REGISTRY} ) ) {
209 $config_file = $ENV{ENSEMBL_REGISTRY};
210 } elsif ( defined( $ENV{HOME} ) ) {
211 $config_file = $ENV{HOME} . "/.ensembl_init";
212 }
213 }
214
215 $verbose ||= 0;
216 $no_clear ||= 0;
217 $no_cache ||= 0;
218
219 my $original_count = $class->get_DBAdaptor_count();
220
221 if ( !defined($config_file) ) {
222 if ($verbose) {
223 print( STDERR
224 "No default registry configuration to load.\n" );
225 }
226 } elsif ( !-e $config_file ) {
227 if ($verbose) {
228 printf( STDERR "Configuration file '%s' does not exist. "
229 . "Registry configuration not loaded.\n",
230 $config_file );
231 }
232 } else {
233 if ( defined( $registry_register{'seen'} ) ) {
234 if ( !$no_clear ) {
235 if ($verbose) {
236 print( STDERR "Clearing previously loaded "
237 . "registry configuration\n" );
238 }
239 $class->clear();
240 }
241 }
242 $registry_register{'seen'} = 1;
243
244 if ($verbose) {
245 printf( STDERR
246 "Loading registry configuration from '%s'.\n",
247 $config_file );
248 }
249
250 my $cfg;
251
252 my $test_eval = eval { require Config::IniFiles };
253
254 if ($@ or (!$test_eval)) {
255 # The user does not have the 'Config::IniFiles' module.
256 if ($verbose) {
257 print( STDERR "No Config::IniFiles module found, "
258 . "assuming this is not an ini-file\n" );
259 }
260 # If the configuration file *is* an ini-file, we can expect a
261 # load of compilation errors from the next eval...
262 } else {
263 # The user has the 'Config::IniFiles' module installed. See
264 # if this is an ini-file or not...
265 $cfg = Config::IniFiles->new( -file => $config_file );
266 }
267
268 if ( defined $cfg ) {
269 my %default_adaptor_args = ();
270
271 if ( $cfg->SectionExists('default') ) {
272 # The 'default' section is special. It contain default
273 # values that should be implicit to all other section in
274 # this configuration file. Aliases are added if there
275 # is also a 'species' setting.
276
277 my $alias = $cfg->val( 'default', 'alias' );
278 $cfg->delval( 'default', 'alias' );
279
280 my $species = $cfg->val( 'default', 'species' );
281
282 if ( defined($alias) && defined($species) ) {
283 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
284 -species => $species,
285 -alias => [ split( /\n/, $alias ) ]
286 );
287 }
288
289 %default_adaptor_args =
290 map { '-' . $_ => $cfg->val( 'default', $_ ) }
291 $cfg->Parameters('default');
292 }
293
294 foreach my $section ( $cfg->Sections() ) {
295 if ( $section eq 'default' )
296 { # We have already done the 'default' section.
297 next;
298 }
299
300 my $group = $cfg->val( $section, 'group' )
301 || $cfg->val( 'default', 'group' );
302
303 if ( !defined($group) ) {
304 printf( STDERR "Key 'group' is undefined "
305 . "for configuration section '%s', "
306 . "skipping this section.\n",
307 $section );
308 next;
309 }
310
311 my $adaptor = $group2adaptor{ lc($group) };
312 if ( !defined($adaptor) ) {
313 printf( STDERR "Unknown group '%s' "
314 . "for configuration section '%s', "
315 . "skipping this section.\n",
316 $group, $section );
317 next;
318 }
319
320 # Handle aliases. A section must have both an 'alias'
321 # setting and a 'species' setting for aliases to be
322 # added. The 'species' setting might be inherited from
323 # the 'default' section.
324
325 my $alias = $cfg->val( $section, 'alias' );
326 $cfg->delval( $section, 'alias' );
327
328 my $species = $cfg->val( $section, 'species' )
329 || $cfg->val( 'default', 'species' );
330
331 if ( defined($alias) && defined($species) ) {
332 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
333 -species => $species,
334 -alias => [ split( /\n/, $alias ) ]
335 );
336 }
337
338 # Fill in the adaptor initialization arguments.
339 # We trust the user to provide sensible key-value pairs.
340 my %adaptor_args = %default_adaptor_args;
341 foreach my $parameter ( $cfg->Parameters($section) ) {
342 $adaptor_args{ '-' . $parameter } =
343 $cfg->val( $section, $parameter );
344
345 # when set, do not use the feature cache in the
346 # different adaptors
347 if ($no_cache) {
348 $adaptor_args{'-no_cache'} = 1;
349 }
350 }
351
352 if ($verbose) {
353 printf( "Configuring adaptor '%s' "
354 . "for configuration section '%s'...\n",
355 $adaptor, $section );
356 }
357
358 my $test_eval = eval "require $adaptor";
359 if ($@ or (!$test_eval)) { die($@) }
360
361 $adaptor->new(%adaptor_args);
362
363 } ## end foreach my $section ( $cfg->Sections...
364 } else {
365 # This is probably no ini-file but an old style piece
366 # of configuration written in Perl. We need to try to
367 # require() it.
368
369 my $test_eval;
370 if($NEW_EVAL) {
371 require Bio::EnsEMBL::Utils::IO;
372 my $contents = Bio::EnsEMBL::Utils::IO::slurp($config_file);
373 $test_eval = eval $contents;
374 }
375 else {
376 $test_eval = eval { require($config_file) };
377 # To make the web code avoid doing this again we delete first
378 delete $INC{$config_file};
379 }
380
381 #Now raise the exception just in case something above is
382 #catching this
383 if ($@ or (!$test_eval)) { die($@) }
384
385 }
386 } ## end else [ if ( !defined($config_file...
387
388 my $count = $class->get_DBAdaptor_count() - $original_count;
389 return $count >= 0 ? $count : 0;
390 } ## end sub load_all
391
392 =head2 clear
393
394 Will clear the registry and disconnect from all databases.
395
396 Example : Bio::EnsEMBL::Registry->clear();
397 Returntype : none
398 Exceptions : none
399 Status : Stable
400
401 =cut
402
403 sub clear{
404 my ($self);
405
406 foreach my $dba (@{$registry_register{'_DBA'}}){
407 if($dba->dbc->connected){
408 $dba->dbc->db_handle->disconnect();
409 }
410 }
411 %registry_register = ();
412 return;
413 }
414
415 #
416 # db adaptors. (for backwards compatibility)
417 #
418
419 =head2 add_db
420
421 Arg [1] : db (DBAdaptor) to add adaptor to.
422 Arg [2] : name of the name to add the adaptor to in the registry.
423 Arg [3] : The adaptor to be added to the registry.
424 Example : Bio::EnsEMBL::Registry->add_db($db, "lite", $dba);
425 Returntype : none
426 Exceptions : none
427 Status : At Risk.
428 : This is here for backwards compatibility only and may
429 : be removed eventually. Solution is to make sure the
430 : db and the adaptor have the same species and the call
431 : is then no longer needed.
432
433 =cut
434
435 sub add_db {
436 my ( $class, $db, $name, $adap ) = @_;
437
438 if ( lc( $db->species() ) ne lc( $adap->species ) ) {
439 $registry_register{_SPECIES}{ lc( $db->species() ) }
440 { lc( $db->group() ) }{'_special'}{ lc($name) } = $adap;
441 }
442 return;
443 }
444
445 =head2 remove_db
446
447 Arg [1] : db (DBAdaptor) to remove adaptor from.
448 Arg [2] : name to remove the adaptor from in the registry.
449 Example : my $db = Bio::EnsEMBL::Registry->remove_db($db, "lite");
450 Returntype : adaptor
451 Exceptions : none
452 Status : At Risk.
453 : This is here for backwards compatibility only and may
454 : be removed eventually. Solution is to make sure the
455 : db and the adaptor have the same species and the call
456 : is then no longer needed.
457
458 =cut
459
460 sub remove_db {
461 my ( $class, $db, $name ) = @_;
462
463 my $ret =
464 $registry_register{_SPECIES}{ lc( $db->species() ) }
465 { lc( $db->group() ) }{'_special'}{ lc($name) };
466
467 $registry_register{_SPECIES}{ lc( $db->species() ) }
468 { lc( $db->group() ) }{'_special'}{ lc($name) } = undef;
469
470 return $ret;
471 }
472
473 =head2 get_db
474
475 Arg [1] : db (DBAdaptor) to get adaptor from.
476 Arg [2] : name to get the adaptor for in the registry.
477 Example : my $db = Bio::EnsEMBL::Registry->get_db("Human", "core", "lite");
478 Returntype : adaptor
479 Exceptions : See get_DBAdaptor()
480 Status : At Risk.
481 : This is here for backwards compatibility only and may
482 : be removed eventually. Solution is to make sure the
483 : db and the adaptor have the same species then call
484 : get_DBAdaptor instead.
485
486 =cut
487
488 sub get_db {
489 my ( $class, $db, $name ) = @_;
490
491 my $ret = Bio::EnsEMBL::Registry->get_DBAdaptor( lc( $db->species ),
492 lc($name) );
493
494 if ( defined($ret) ) { return $ret }
495
496 return $registry_register{_SPECIES}{ lc( $db->species() ) }
497 { lc( $db->group() ) }{'_special'}{ lc($name) };
498 }
499
500 =head2 get_all_db_adaptors
501
502 Arg [1] : db (DBAdaptor) to get all the adaptors from.
503 Example : my $db = Bio::EnsEMBL::Registry->get_all_db_adaptors($db);
504 Returntype : adaptor
505 Exceptions : none
506 Status : At Risk.
507 : This is here for backwards compatibility only and
508 : may be removed eventually. Solution is to make
509 : sure the dbs all have the same species then call
510 : get_all_DBAdaptors(-species => "human");
511
512
513 =cut
514
515 sub get_all_db_adaptors {
516 my ( $class, $db ) = @_;
517 my %ret = ();
518
519 # we now also want to add all the DBAdaptors for the same species.
520 # as add_db_adaptor does not add if it is from the same species.
521
522 foreach my $dba ( @{ $registry_register{'_DBA'} } ) {
523 if ( lc( $dba->species() ) eq lc( $db->species() ) ) {
524 $ret{ $dba->group() } = $dba;
525 }
526 }
527
528 foreach my $key (
529 keys %{
530 $registry_register{_SPECIES}
531 { $class->get_alias( $db->species() ) }{ lc( $db->group() ) }
532 {'_special'} } )
533 {
534 $ret{$key} =
535 $registry_register{_SPECIES}
536 { $class->get_alias( $db->species() ) }{ lc( $db->group() ) }
537 {'_special'}{$key};
538 }
539
540 return \%ret;
541 } ## end sub get_all_db_adaptors
542
543
544 #
545 # DBAdaptors
546 #
547
548 =head2 add_DBAdaptor
549
550 Arg [1] : name of the species to add the adaptor to in the registry.
551 Arg [2] : name of the group to add the adaptor to in the registry.
552 Arg [3] : DBAdaptor to be added to the registry.
553 Example : Bio::EnsEMBL::Registry->add_DBAdaptor("Human", "core", $dba);
554 Returntype : none
555 Exceptions : none
556 caller : internal
557 Status : Stable
558
559 =cut
560
561 sub add_DBAdaptor {
562 my ( $class, $species, $group, $adap ) = @_;
563
564 if ( !( $class->alias_exists($species) ) ) {
565 $class->add_alias( $species, $species );
566 }
567
568 $species = $class->get_alias($species);
569
570 $registry_register{_SPECIES}{$species}{ lc($group) }{'_DB'} = $adap;
571
572 if ( !defined( $registry_register{'_DBA'} ) ) {
573 $registry_register{'_DBA'} = [$adap];
574 } else {
575 push( @{ $registry_register{'_DBA'} }, $adap );
576 }
577 return;
578 }
579
580
581
582 =head2 get_DBAdaptor
583
584 Arg [1] : name of the species to get the adaptor for in the registry.
585 Arg [2] : name of the group to get the adaptor for in the registry.
586 Arg [3] : if set will not give warnings when looking for alias.
587 Example : $dba = Bio::EnsEMBL::Registry->get_DBAdaptor("Human", "core");
588 Returntype : DBAdaptor
589 Exceptions : If $species is not defined and if no valid internal name
590 could be found for $species. If thrown check your API and DB
591 version
592 Status : Stable
593
594 =cut
595
596 sub get_DBAdaptor {
597 my ( $class, $species, $group, $no_alias_check ) = @_;
598
599 if ( !defined($species) ) {
600 throw('Species not defined.');
601 }
602
603 my $ispecies = $class->get_alias( $species, $no_alias_check );
604
605 if ( !defined($ispecies) ) {
606 if(! $no_alias_check) {
607 throw("Can not find internal name for species '$species'");
608 }
609 }
610 else { $species = $ispecies }
611
612 return $registry_register{_SPECIES}{$species}{ lc($group) }{'_DB'};
613 }
614
615 =head2 get_all_DBAdaptors
616
617 Arg [SPECIES]: (optional) string
618 species name to get adaptors for
619 Arg [GROUP] : (optional) string
620 group name to get adaptors for
621 Example :
622 @dba =
623 @{ Bio::EnsEMBL::Registry->get_all_DBAdaptors() };
624
625 @human_dbas =
626 @{ Bio::EnsEMBL::Registry->get_all_DBAdaptors(
627 -species => 'human'
628 ) };
629
630 Returntype : list of DBAdaptors
631 Exceptions : none
632 Status : Stable
633
634 =cut
635
636 sub get_all_DBAdaptors {
637 my ( $class, @args ) = @_;
638
639 my ( $species, $group ) = rearrange( [qw(SPECIES GROUP)], @args );
640
641 if ( defined($species) ) { $species = $class->get_alias($species) }
642
643 my @ret;
644 foreach my $dba ( @{ $registry_register{'_DBA'} } ) {
645 if ( ( !defined($species) || lc($species) eq lc( $dba->species() ) )
646 && ( !defined($group) || lc($group) eq lc( $dba->group() ) ) )
647 {
648 push( @ret, $dba );
649 }
650 }
651
652 return \@ret;
653 }
654
655 =head2 get_all_DBAdaptors_by_connection
656
657 Arg [1] : DBConnection used to find DBAdaptors
658 Returntype : reference to list of DBAdaptors
659 Exceptions : none
660 Example : @dba = @{ Bio::EnsEMBL::Registry
661 ->get_all_DBAdaptors_by_connection($dbc) };
662 Status : Stable
663
664 =cut
665
666 sub get_all_DBAdaptors_by_connection {
667 my ( $self, $dbc_orig ) = @_;
668
669 my @return;
670
671 foreach my $dba ( @{ $registry_register{'_DBA'} } ) {
672 my $dbc = $dba->dbc();
673
674 if ( defined($dbc)
675 && $dbc->can('equals')
676 && $dbc->equals($dbc_orig) )
677 {
678 push( @return, $dba );
679 }
680 }
681
682 return \@return;
683 }
684
685 =head2 get_all_DBAdaptors_by_dbname
686
687 Arg [1] : string, name of database
688 Returntype : reference to list of DBAdaptors
689 Exceptions : none
690 Example : @dba = @{ Bio::EnsEMBL::Registry
691 ->get_all_DBAdaptors_by_dbname($dbname) };
692 Status : Stable
693
694 =cut
695
696 sub get_all_DBAdaptors_by_dbname {
697 my ( $self, $dbname ) = @_;
698
699 my @return;
700
701 foreach my $dba ( @{ $registry_register{'_DBA'} } ) {
702 my $dbc = $dba->dbc();
703
704 if ( defined($dbc) && $dbc->dbname() eq $dbname ) {
705 push( @return, $dba );
706 }
707 }
708
709 return \@return;
710 }
711
712 =head2 remove_DBAdaptor
713
714 Arg [1] : name of the species to get the adaptor for in the registry.
715 Arg [2] : name of the group to get the adaptor for in the registry.
716 Example : $dba = Bio::EnsEMBL::Registry->remove_DBAdaptor("Human", "core");
717 Returntype : none
718 Exceptions : none
719 Status : At risk
720
721 =cut
722
723 sub remove_DBAdaptor {
724 my ( $class, $species, $group ) = @_;
725
726 $species = $class->get_alias($species);
727
728 delete $registry_register{_SPECIES}{$species}{$group};
729 # This will remove the DBAdaptor and all the other adaptors
730
731 # Now remove if from the _DBA array
732 my $index;
733
734 foreach my $i ( 0 .. $#{ $registry_register{'_DBA'} } ) {
735 my $dba = $registry_register{'_DBA'}->[$i];
736
737 if ( ( $dba->species eq $species )
738 && $dba->group eq $group )
739 {
740 $index = $i;
741 last;
742 }
743 }
744
745 # Now remove from _DBA cache
746 if ( defined($index) ) {
747 splice( @{ $registry_register{'_DBA'} }, $index, 1 );
748 }
749
750 return;
751 } ## end sub remove_DBAdaptor
752
753
754
755 =head2 reset_DBAdaptor
756
757 Arg [1]: string - species e.g. homo_sapiens
758 Arg [2]: string - DB group e.g. core
759 Arg [3]: string - new dbname
760 Args [4-7]: string - optional DB parameters, defaults to current db params if omitted
761 Arg [8]: hashref - Hash ref of additional parameters e.g. eFG dnadb params for auto selecting dnadb
762 Usage : $reg->reset_registry_db( 'homo_sapiens', 'core',
763 'homo_sapiens_core_37_35j' );
764 Description: Resets a DB within the registry.
765 Exceptions: Throws if mandatory params not supplied
766 Throws if species name is not already seen by the registry
767 Throws if no current DB for species/group available
768 Status : At risk
769
770 =cut
771
772 sub reset_DBAdaptor {
773 my (
774 $self, $species, $group, $dbname, $host,
775 $port, $user, $pass, $params
776 ) = @_;
777
778 # Check mandatory params
779 if ( !( defined $species && defined $group && defined $dbname ) ) {
780 throw(
781 'Must provide at least a species, group, and dbname parameter '
782 . 'to redefine a DB in the registry' );
783 }
784
785 # Validate species here
786 my $alias = $self->get_alias($species);
787 throw("Could not find registry alias for species:\t$species")
788 if ( !defined $alias );
789
790 # Get all current defaults if not defined
791
792 my $db = $self->get_DBAdaptor( $alias, $group );
793 my $class;
794
795 if ($db) {
796 $class = ref($db);
797 $host ||= $db->dbc->host;
798 $port ||= $db->dbc->port;
799 $user ||= $db->dbc->username;
800 $pass ||= $db->dbc->password;
801 } else {
802 #Now we need to test mandatory params
803 $class = $group2adaptor{ lc($group) };
804
805 if ( !( $host && $user ) ) {
806 throw("No comparable $alias $group DB present in Registry. "
807 . "You must pass at least a dbhost and dbuser" );
808 }
809 }
810
811 $self->remove_DBAdaptor( $alias, $group );
812
813 # ConfigRegistry should automatically add this to the Registry
814 $db = $class->new(
815 -user => $user,
816 -host => $host,
817 -port => $port,
818 -pass => $pass,
819 -dbname => $dbname,
820 -species => $alias,
821 -group => $group,
822 %{$params} );
823
824 return $db;
825 } ## end sub reset_DBAdaptor
826
827
828 #
829 # DNA Adaptors
830 #
831
832 =head2 add_DNAAdaptor
833
834 Arg [1] : name of the species to add the adaptor to in the registry.
835 Arg [2] : name of the group to add the adaptor to in the registry.
836 Arg [3] : name of the species to get the dna from
837 Arg [4] : name of the group to get the dna from
838 Example : Bio::EnsEMBL::Registry->add_DNAAdaptor("Human", "estgene", "Human", "core");
839 Returntype : none
840 Exceptions : none
841 Status : Stable
842
843 =cut
844
845 sub add_DNAAdaptor {
846 my ( $class, $species, $group, $dnadb_species, $dnadb_group ) = @_;
847
848 $species = $class->get_alias($species);
849 $dnadb_species = $class->get_alias($dnadb_species);
850 if ( $dnadb_group->isa('Bio::EnsEMBL::DBSQL::DBAdaptor') ) {
851 deprecated("");
852 } else {
853 $registry_register{_SPECIES}{$species}{ lc($group) }{'_DNA'} =
854 $dnadb_group;
855 $registry_register{_SPECIES}{$species}{ lc($group) }{'_DNA2'} =
856 $dnadb_species;
857 }
858 return;
859 }
860
861 =head2 get_DNAAdaptor
862
863 Arg [1] : name of the species to get the adaptor for in the registry.
864 Arg [2] : name of the group to get the adaptor for in the registry.
865 Example : $dnaAdap = Bio::EnsEMBL::Registry->get_DNAAdaptor("Human", "core");
866 Returntype : adaptor
867 Exceptions : none
868 Status : Stable
869
870 =cut
871
872 sub get_DNAAdaptor {
873 my ( $class, $species, $group ) = @_;
874
875 $species = $class->get_alias($species);
876 my $new_group =
877 $registry_register{_SPECIES}{$species}{ lc($group) }{'_DNA'};
878 my $new_species =
879 $registry_register{_SPECIES}{$species}{ lc($group) }{'_DNA2'};
880
881 if ( defined $new_group ) {
882 return $class->get_DBAdaptor( $new_species, $new_group );
883 }
884
885 return;
886 }
887
888 #
889 # General Adaptors
890 #
891
892 =head2 add_adaptor
893
894 Arg [1] : name of the species to add the adaptor to in the registry.
895 Arg [2] : name of the group to add the adaptor to in the registry.
896 Arg [3] : name of the type to add the adaptor to in the registry.
897 Arg [4] : The DBAdaptor to be added to the registry.
898 Arg [5] : (optional) Set to allow overwrites of existing adaptors.
899 Example : Bio::EnsEMBL::Registry->add_adaptor("Human", "core", "Gene", $adap);
900 Returntype : none
901 Exceptions : none
902 Caller : internal
903 Status : Stable
904
905 =cut
906
907 sub add_adaptor {
908 my ( $class, $species, $group, $type, $adap, $reset ) = @_;
909
910 $species = $class->get_alias($species);
911
912 # Since the adaptors are not stored initially, only their class paths
913 # when the adaptors are obtained, we need to store these instead. It
914 # is not necessarily an error if the registry is overwritten without
915 # the reset set but it is an indication that we are overwriting a
916 # database which should be a warning for now
917
918 if ( defined($reset) )
919 { # JUST RESET THE HASH VALUE NO MORE PROCESSING NEEDED
920 $registry_register{_SPECIES}{$species}{ lc($group) }{ lc($type) } =
921 $adap;
922 return;
923 }
924
925 if (
926 defined(
927 $registry_register{_SPECIES}{$species}{ lc($group) }{ lc($type) }
928 ) )
929 {
930 # print STDERR (
931 # "Overwriting Adaptor in Registry for $species $group $type\n");
932 $registry_register{_SPECIES}{$species}{ lc($group) }{ lc($type) } =
933 $adap;
934 return;
935 }
936 $registry_register{_SPECIES}{$species}{ lc($group) }{ lc($type) } =
937 $adap;
938
939 if ( !defined( $registry_register{_SPECIES}{$species}{'list'} ) ) {
940 $registry_register{_SPECIES}{$species}{'list'} = [$type];
941 } else {
942 push( @{ $registry_register{_SPECIES}{$species}{'list'} }, $type );
943 }
944
945 if ( !defined( $registry_register{_TYPE}{ lc($type) }{$species} ) ) {
946 $registry_register{_TYPE}{ lc($type) }{$species} = [$type];
947 } else {
948 push( @{ $registry_register{_TYPE}{ lc($type) }{$species} },
949 $adap );
950 }
951 return;
952 } ## end sub add_adaptor
953
954
955 =head2 get_adaptor
956
957 Arg [1] : name of the species to add the adaptor to in the registry.
958 Arg [2] : name of the group to add the adaptor to in the registry.
959 Arg [3] : name of the type to add the adaptor to in the registry.
960 Example : $adap = Bio::EnsEMBL::Registry->get_adaptor("Human", "core", "Gene");
961 Returntype : adaptor
962 Exceptions : Thrown if a valid internal name cannot be found for the given
963 name. If thrown check your API and DB version. Also thrown if
964 no type or group was given
965 Status : Stable
966
967 =cut
968
969 sub get_adaptor {
970 my ( $class, $species, $group, $type ) = @_;
971
972 my $ispecies = $class->get_alias($species);
973
974 if ( !defined($ispecies) ) {
975 throw("Can not find internal name for species '$species'");
976 }
977 else { $species = $ispecies }
978
979 throw 'No adaptor group given' if ! defined $group;
980 throw 'No adaptor type given' if ! defined $type;
981
982
983 if($type =~ /Adaptor$/i) {
984 warning("Detected additional Adaptor string in given the type '$type'. Removing it to avoid possible issues. Alter your type to stop this message");
985 $type =~ s/Adaptor$//i;
986 }
987
988 my %dnadb_adaptors = (
989 'sequence' => 1,
990 'assemblymapper' => 1,
991 'karyotypeband' => 1,
992 'repeatfeature' => 1,
993 'coordsystem' => 1,
994 'assemblyexceptionfeature' => 1
995 );
996
997 # warn "$species, $group, $type";
998
999 $type = lc($type);
1000
1001 # For historical reasons, allow use of group 'regulation' to refer to
1002 # group 'funcgen'.
1003 if ( lc($group) eq 'regulation' ) { $group = 'funcgen' }
1004
1005 my $dnadb_group =
1006 $registry_register{_SPECIES}{$species}{ lc($group) }{'_DNA'};
1007
1008 if ( defined($dnadb_group)
1009 && defined( $dnadb_adaptors{ lc($type) } ) )
1010 {
1011 $species =
1012 $registry_register{_SPECIES}{$species}{ lc($group) }{'_DNA2'};
1013 $group = $dnadb_group;
1014 }
1015
1016 my $ret =
1017 $registry_register{_SPECIES}{$species}{ lc($group) }{ lc($type) };
1018
1019 if ( !defined($ret) ) { return }
1020 if ( ref($ret) ) { return $ret }
1021
1022 # Not instantiated yet
1023
1024 my $dba = $registry_register{_SPECIES}{$species}{ lc($group) }{'_DB'};
1025 my $module = $ret;
1026
1027 my $test_eval = eval "require $module";
1028 if ($@ or (!$test_eval)) {
1029 warning("'$module' cannot be found.\nException $@\n");
1030 return;
1031 }
1032
1033 if (
1034 !defined(
1035 $registry_register{_SPECIES}{$species}{ lc($group) }{'CHECKED'} )
1036 )
1037 {
1038 $registry_register{_SPECIES}{$species}{ lc($group) }{'CHECKED'} = 1;
1039 $class->version_check($dba);
1040 }
1041
1042 my $adap = "$module"->new($dba);
1043 Bio::EnsEMBL::Registry->add_adaptor( $species, $group, $type, $adap,
1044 'reset' );
1045 $ret = $adap;
1046
1047 return $ret;
1048 } ## end sub get_adaptor
1049
1050 =head2 get_all_adaptors
1051
1052 Arg [SPECIES] : (optional) string
1053 species name to get adaptors for
1054 Arg [GROUP] : (optional) string
1055 group name to get adaptors for
1056 Arg [TYPE] : (optional) string
1057 type to get adaptors for
1058 Example : @adaps = @{Bio::EnsEMBL::Registry->get_all_adaptors()};
1059 Returntype : ref to list of adaptors
1060 Exceptions : none
1061 Status : Stable
1062
1063 =cut
1064
1065 sub get_all_adaptors{
1066 my ($class,@args)= @_;
1067 my ($species, $group, $type);
1068 my @ret=();
1069 my (%species_hash, %group_hash, %type_hash);
1070
1071
1072 if(@args == 1){ # Old species only one parameter
1073 warn("-SPECIES argument should now be used to get species adaptors");
1074 $species = $args[0];
1075 }
1076 else{
1077 # new style -SPECIES, -GROUP, -TYPE
1078 ($species, $group, $type) =
1079 rearrange([qw(SPECIES GROUP TYPE)], @args);
1080 }
1081
1082 if(defined($species)){
1083 $species_hash{$species} = 1;
1084 }
1085 else{
1086 # get list of species
1087 foreach my $dba (@{$registry_register{'_DBA'}}){
1088 $species_hash{lc($dba->species())} = 1;
1089 }
1090 }
1091 if(defined($group)){
1092 $group_hash{$group} = 1;
1093 }
1094 else{
1095 foreach my $dba (@{$registry_register{'_DBA'}}){
1096 $group_hash{lc($dba->group())} = 1;
1097 }
1098 }
1099
1100 if ( defined($type) ) {
1101 $type_hash{$type} = 1;
1102 } else {
1103 foreach my $dba ( @{ $registry_register{'_DBA'} } ) {
1104 foreach my $ty (
1105 @{ $registry_register{_SPECIES}{ lc( $dba->species ) }{'list'} }
1106 )
1107 {
1108 $type_hash{ lc($ty) } = 1;
1109 }
1110 }
1111 }
1112
1113 ### NOW NEED TO INSTANTIATE BY CALLING get_adaptor
1114 foreach my $sp ( keys %species_hash ) {
1115 foreach my $gr ( keys %group_hash ) {
1116 foreach my $ty ( keys %type_hash ) {
1117 my $temp = $class->get_adaptor( $sp, $gr, $ty );
1118 if ( defined($temp) ) {
1119 push @ret, $temp;
1120 }
1121 }
1122 }
1123 }
1124
1125 return (\@ret);
1126 }
1127
1128
1129 =head2 add_alias
1130
1131 Arg [1] : name of the species to add alias for
1132 Arg [2] : name of the alias
1133 Example : Bio::EnsEMBL::Registry->add_alias("Homo Sapiens","Human");
1134 Description: add alternative name for the species.
1135 Returntype : none
1136 Exceptions : none
1137 Status : Stable
1138
1139 =cut
1140
1141 sub add_alias{
1142 my ($class, $species,$key) = @_;
1143
1144 $registry_register{'_ALIAS'}{lc($key)} = lc($species);
1145 return;
1146 }
1147
1148 =head2 remove_alias
1149
1150 Arg [1] : name of the species to remove alias for
1151 Arg [2] : name of the alias
1152 Example : Bio::EnsEMBL::Registry->remove_alias("Homo Sapiens","Human");
1153 Description: remove alternative name for the species.
1154 Returntype : none
1155 Exceptions : none
1156 Status : Stable
1157
1158 =cut
1159
1160 sub remove_alias{
1161 my ($class, $species,$key) = @_;
1162
1163 delete $registry_register{'_ALIAS'}{lc($key)};
1164 return;
1165 }
1166
1167
1168
1169 =head2 get_alias
1170
1171 Arg [1] : name of the possible alias to get species for
1172 Example : Bio::EnsEMBL::Registry->get_alias("Human");
1173 Description: get proper species name.
1174 Returntype : species name
1175 Exceptions : none
1176 Status : Stable
1177
1178 =cut
1179
1180 sub get_alias {
1181 my ( $class, $key, $no_warn ) = @_;
1182
1183 if ( !defined( $registry_register{'_ALIAS'}{ lc($key) } ) ) {
1184 if ( ( !defined( $registry_register{_SPECIES}{ lc($key) } ) ) and
1185 ( !defined( $registry_register{_ALIAS}{ lc($key) } ) ) )
1186 {
1187 if ( ( !defined($no_warn) ) or ( !$no_warn ) ) {
1188 warning( "$key is not a valid species name " .
1189 "(check DB and API version)" );
1190 }
1191 return;
1192 }
1193 else { return $key }
1194 }
1195
1196 return $registry_register{'_ALIAS'}{ lc($key) };
1197 }
1198
1199 =head2 get_all_aliases
1200
1201 Arg [1] : Species name to retrieve aliases for
1202 (may be an alias as well).
1203 Example : Bio::EnsEMBL::Registry->get_all_aliases('Homo sapiens');
1204 Description: Returns all known aliases for a given species (but not the
1205 species name/alias that was given).
1206 Returntype : ArrayRef of all known aliases
1207 Exceptions : none
1208 Status : Development
1209
1210 =cut
1211
1212 sub get_all_aliases {
1213 my ( $class, $key ) = @_;
1214
1215 my $species = $registry_register{_ALIAS}{ lc($key) };
1216
1217 my @aliases;
1218 if ( defined($species) ) {
1219 foreach my $alias ( keys( %{ $registry_register{_ALIAS} } ) ) {
1220 if ( $species ne $alias
1221 && $species eq $registry_register{_ALIAS}{ lc($alias) } )
1222 {
1223 push( @aliases, $alias );
1224 }
1225 }
1226 }
1227
1228 return \@aliases;
1229 }
1230
1231 =head2 alias_exists
1232
1233 Arg [1] : name of the possible alias to get species for
1234 Example : Bio::EnsEMBL::Registry->alias_exists("Human");
1235 Description: does the species name exist.
1236 Returntype : 1 if exists else 0
1237 Exceptions : none
1238 Status : Stable
1239
1240 =cut
1241
1242 sub alias_exists {
1243 my ( $class, $key ) = @_;
1244
1245 return defined( $registry_register{'_ALIAS'}{ lc($key) } );
1246 }
1247
1248 =head2 set_disconnect_when_inactive
1249
1250 Example : Bio::EnsEMBL::Registry->set_disconnect_when_inactive();
1251 Description: Set the flag to make sure that the database connection is dropped if
1252 not being used on each database.
1253 Returntype : none
1254 Exceptions : none
1255 Status : Stable
1256
1257 =cut
1258
1259 sub set_disconnect_when_inactive{
1260 foreach my $dba ( @{get_all_DBAdaptors()}){
1261 my $dbc = $dba->dbc;
1262 # Disconnect if connected
1263 $dbc->disconnect_if_idle() if $dbc->connected();
1264 $dbc->disconnect_when_inactive(1);
1265 }
1266 return;
1267 }
1268
1269 =head2 set_reconnect_when_lost
1270
1271 Example : Bio::EnsEMBL::Registry->set_reconnect_when_lost();
1272 Description: Set the flag to make sure that the database connection is not lost before it's used.
1273 This is useful for long running jobs (over 8hrs).
1274 Returntype : none
1275 Exceptions : none
1276 Status : Stable
1277
1278 =cut
1279
1280 sub set_reconnect_when_lost{
1281 foreach my $dba ( @{get_all_DBAdaptors()}){
1282 my $dbc = $dba->dbc;
1283 $dbc->reconnect_when_lost(1);
1284 }
1285 return;
1286 }
1287
1288 =head2 disconnect_all
1289
1290 Example : Bio::EnsEMBL::Registry->disconnect_all();
1291 Description: disconnect from all the databases.
1292 Returntype : none
1293 Exceptions : none
1294 Status : Stable
1295
1296 =cut
1297
1298 sub disconnect_all {
1299 foreach my $dba ( @{get_all_DBAdaptors()||[]} ){
1300 my $dbc = $dba->dbc;
1301 next unless $dbc;
1302 # Disconnect if connected
1303 $dbc->disconnect_if_idle() if $dbc->connected();
1304 }
1305 return;
1306 }
1307
1308 =head get_DBAdaptor_count
1309
1310 Example : Bio::EnsEMBL::Registry->get_DBAdaptor_count();
1311 Description : Returns the count of database adaptors currently held by
1312 the registry
1313 Returntype : Int count of database adaptors currently known
1314 Exceptions : None
1315
1316 =cut
1317
1318 sub get_DBAdaptor_count {
1319 return scalar(@{$registry_register{'_DBA'}}) if(defined $registry_register{'_DBA'});
1320 return 0;
1321 }
1322
1323 =head2 change_access
1324
1325 Will change the username and password for a set of databases.
1326 if host,user or database names are missing then these are not checked.
1327 So for example if you do not specify a database then ALL databases on
1328 the specified host and port will be changed.
1329
1330 Arg [1] : name of the host to change access on
1331 Arg [2] : port number to change access on
1332 Arg [3] : name of the user to change access on
1333 Arg [4] : name of the database to change access on
1334 Arg [5] : name of the new user
1335 Arg [6] : new password
1336
1337 Example : Bio::EnsEMBL::Registry->get_alias("Human");
1338 Description: change username and password on one or more databases
1339 Returntype : none
1340 Exceptions : none
1341 Status : Stable
1342
1343 =cut
1344
1345 sub change_access{
1346 my ($self, $host,$port,$user,$dbname,$new_user,$new_pass) = @_;
1347 foreach my $dba ( @{$registry_register{'_DBA'}}){
1348 my $dbc = $dba->dbc;
1349 if((((!defined($host)) or ($host eq $dbc->host))) and
1350 (((!defined($port)) or ($port eq $dbc->port))) and
1351 (((!defined($user)) or ($user eq $dbc->username))) and
1352 ((!defined($dbname)) or ($dbname eq $dbc->dbname))){
1353 if($dbc->connected()){
1354 $dbc->db_handle->disconnect();
1355 $dbc->connected(undef);
1356 }
1357 # over write the username and password
1358 $dbc->username($new_user);
1359 $dbc->password($new_pass);
1360 }
1361 }
1362 return;
1363 }
1364
1365
1366
1367 =head2 load_registry_from_url
1368
1369 Arg [1] : string $url
1370 Arg [2] : (optional) integer
1371 If not 0, will print out all information.
1372 Arg [3] : (optional) integer
1373 This option will turn off caching for slice features, so,
1374 every time a set of features is retrieved, they will come
1375 from the database instead of the cache. This option is only
1376 recommended for advanced users, specially if you need to
1377 store and retrieve features. It might reduce performance when
1378 querying the database if not used properly. If in doubt, do
1379 not use it or ask in the developer mailing list.
1380
1381 Example : load_registry_from_url(
1382 'mysql://anonymous@ensembldb.ensembl.org:3306');
1383
1384 load_registry_from_url(
1385 'mysql://anonymous@ensembldb.ensembl.org:3306/homo_sapiens_core_65_37?group=core&species=homo_sapiens'
1386 );
1387
1388 load_registry_from_url(
1389 'mysql://anonymous@ensembldb.ensembl.org:3306/homo_sapiens_core_65_37?group=core'
1390 );
1391
1392
1393 Description: Will load the correct versions of the ensembl
1394 databases for the software release it can find on
1395 a database instance into the registry. Also adds
1396 a set of standard aliases. The url format is:
1397 mysql://[[username][:password]@]hostname[:port]. You
1398 can also request a specific version for the databases
1399 by adding a slash and the version number but your
1400 script may crash as the API version won't match the
1401 DB version.
1402
1403 You can also specify a database name which will cause the
1404 loading of a single DBAdaptor instance. Parameters are
1405 mapped from a normal URL parameter set to their DBAdaptor
1406 equivalent. Group must be defined.
1407
1408 Returntype : Int count of the DBAdaptor instances which can be found in the
1409 registry
1410
1411 Exceptions : Thrown if the given URL does not parse according to the above
1412 scheme and if the specified database cannot be connected to
1413 (see L<load_registry_from_db> for more information)
1414 Status : Stable
1415
1416 =cut
1417
1418 sub load_registry_from_url {
1419 my ( $self, $url, $verbose, $no_cache ) = @_;
1420
1421 if ( $url =~ /^mysql\:\/\/([^\@]+\@)?([^\:\/]+)(\:\d+)?(\/\d+)?\/?$/x ) {
1422 my $user_pass = $1;
1423 my $host = $2;
1424 my $port = $3;
1425 my $version = $4;
1426
1427 $user_pass =~ s/\@$//;
1428 my ( $user, $pass ) = $user_pass =~ m/([^\:]+)(\:.+)?/x;
1429 $pass =~ s/^\://x if ($pass);
1430 $port =~ s/^\://x if ($port);
1431 $version =~ s/^\///x if ($version);
1432
1433 return $self->load_registry_from_db(
1434 -host => $host,
1435 -user => $user,
1436 -pass => $pass,
1437 -port => $port,
1438 -db_version => $version,
1439 -verbose => $verbose,
1440 -no_cache => $no_cache
1441 );
1442 }
1443 my $uri = parse_uri($url);
1444 if($uri) {
1445 if($uri->scheme() eq 'mysql') {
1446 my %params = $uri->generate_dbsql_params();
1447 if($params{-DBNAME}) {
1448 $params{-SPECIES} = $params{-DBNAME} unless $params{-SPECIES};
1449 $params{-NO_CACHE} = 1 if $no_cache;
1450 my $group = $params{-GROUP};
1451 my $class = $self->_group_to_adaptor_class($group);
1452 if($verbose) {
1453 printf("Loading database '%s' from group '%s' with DBAdaptor class '%s' from url %s\n", $params{-DBNAME}, $group, $class, $url);
1454 }
1455 $class->new(%params);
1456 return 1;
1457 }
1458 }
1459 }
1460 throw("Only MySQL URLs are accepted. Given URL was '${url}'");
1461 } ## end sub load_registry_from_url
1462
1463
1464 =head2 load_registry_from_db
1465
1466 Arg [HOST] : string
1467 The domain name of the database host to connect to.
1468
1469 Arg [USER] : string
1470 The name of the database user to connect with.
1471
1472 Arg [PASS] : (optional) string
1473 The password to be used to connect to the database.
1474
1475 Arg [PORT] : (optional) integer
1476 The port to use when connecting to the database.
1477
1478 Arg [VERBOSE]: (optional) boolean
1479 Whether to print database messages.
1480
1481 Arg [SPECIES]: (optional) string
1482 By default, all databases that are found on the
1483 server and that corresponds to the correct release
1484 are probed for aliases etc. For some people,
1485 depending on where they are in the world, this might
1486 be a slow operation. With the '-species' argument,
1487 one may reduce the startup time by restricting the
1488 set of databases that are probed to those of a
1489 particular species.
1490
1491 Note that the latin name of the species is required,
1492 e.g., 'homo sapiens', 'gallus gallus', 'callithrix
1493 jacchus' etc. It may be the whole species name,
1494 or only the first part of the name, e.g. 'homo',
1495 'gallus', or 'callithrix'. This will be used in
1496 matching against the name of the databases.
1497
1498 Arg [DB_VERSION]: (optional) integer
1499 By default, only databases corresponding to the
1500 current API version are loaded. This argument
1501 allows the script to use databases from another
1502 version although it might not work properly. This
1503 argument should only be used for production or
1504 testing purposes and if you really know what you are
1505 doing.
1506
1507 Arg [WAIT_TIMEOUT]: (optional) integer
1508 Time in seconds for the wait timeout to happen.
1509 Time after which the connection is deleted if not
1510 used. By default this is 28800 (8 hours), so set
1511 this to greater than this if your connection are
1512 getting deleted. Only set this if you are having
1513 problems and know what you are doing.
1514
1515 Arg [-NO_CACHE]: (optional) boolean
1516 This option will turn off caching for slice features,
1517 so, every time a set of features is retrieved, they
1518 will come from the database instead of the cache. This
1519 option is only recommended for advanced users, specially
1520 if you need to store and retrieve features. It might
1521 reduce performance when querying the database if not
1522 used properly. If in doubt, do not use it or ask in the
1523 developer mailing list.
1524
1525 Arg [SPECIES_SUFFIX]: (optional) string
1526 This option will append the string to the species name
1527 in the registry for all databases found on this server.
1528
1529 Example :
1530
1531 $registry->load_registry_from_db(
1532 -host => 'ensembldb.ensembl.org',
1533 -user => 'anonymous',
1534 -verbose => '1'
1535 );
1536
1537 Description: Will load the correct versions of the Ensembl
1538 databases for the software release it can find on a
1539 database instance into the registry. Also adds a set
1540 of standard aliases.
1541
1542 Returntype : Int count of the DBAdaptor instances which can be found in the
1543 registry due to this method call.
1544
1545 Exceptions : Thrown if the given MySQL database cannot be connected to
1546 or there is any error whilst querying the database.
1547 Status : Stable
1548
1549 =cut
1550
1551 sub load_registry_from_db {
1552 my ( $self, @args ) = @_;
1553
1554 my ( $host, $port, $user,
1555 $pass, $verbose, $db_version,
1556 $wait_timeout, $no_cache, $species, $species_suffix )
1557 = rearrange( [ 'HOST', 'PORT',
1558 'USER', 'PASS',
1559 'VERBOSE', 'DB_VERSION',
1560 'WAIT_TIMEOUT', 'NO_CACHE',
1561 'SPECIES', 'SPECIES_SUFFIX' ],
1562 @args );
1563
1564 if ( defined($species) ) {
1565 $species = lc($species);
1566 $species =~ tr/ -/__/;
1567 }
1568 if (!defined($species_suffix)) {
1569 $species_suffix = "";
1570 }
1571
1572 my $ontology_db;
1573 my $ontology_version;
1574
1575 my $stable_ids_db;
1576 my $stable_ids_version;
1577
1578 $user ||= "ensro";
1579 if ( !defined($port) ) {
1580 $port = 3306;
1581 if ( $host eq "ensembldb.ensembl.org" ) {
1582 if ( (!defined($db_version)) or ($db_version >= 48) ) {
1583 $port = 5306;
1584 }
1585 }
1586 }
1587
1588 $wait_timeout ||= 0;
1589
1590 my $original_count = $self->get_DBAdaptor_count();
1591
1592 my $err_pattern = 'Cannot %s to the Ensembl MySQL server at %s:%d; check your settings & DBI error message: %s';
1593
1594 my $dbh = DBI->connect( "DBI:mysql:host=$host;port=$port", $user, $pass ) or
1595 throw(sprintf($err_pattern, 'connect', $host, $port, $DBI::errstr));
1596 $dbh->ping() or
1597 throw(sprintf($err_pattern, 'ping', $host, $port, $DBI::errstr));
1598
1599 my $res = $dbh->selectall_arrayref('SHOW DATABASES');
1600 my @dbnames = map { $_->[0] } @$res;
1601
1602 my %temp;
1603 my $software_version = software_version();
1604
1605 if ( defined($db_version) ) {
1606 $software_version = $db_version;
1607 }
1608
1609 if ($verbose) {
1610 printf( "Will only load v%d databases\n", $software_version );
1611 }
1612
1613 # From the list of all the databses create a tempory hash of those we
1614 # are interested in
1615
1616 for my $db (@dbnames) {
1617 if ( $db =~ /^(\w+_collection_\w+(?:_\d+)?)_((\d+)_\w+)/ )
1618 { # NEEDS TO BE FIRST TO PICK UP COLLECTION DBS
1619 if ( $3 eq $software_version ) {
1620 $temp{$1} = $2;
1621 }
1622 } elsif ( $db =~ /^(.+)_(userdata)$/x ) {
1623 $temp{$1} = $2;
1624 } elsif (
1625 $db =~ /^(ensembl_compara # compara database
1626 (?:_\w+)*?) # optional ensembl genomes bit
1627 _
1628 (\d+)$/x )
1629 { # db version
1630 if ( $2 eq $software_version ) {
1631 $temp{$1} = $2;
1632 }
1633 } elsif ( $db =~ /^(ensembl_ancestral(?:_\w+?)*?)_(\d+)$/x ) {
1634 if ( $2 eq $software_version ) {
1635 $temp{$1} = $2;
1636 }
1637 } elsif ( $db =~ /^ensembl(?:genomes)?_ontology_(?:\d+_)?(\d+)/x ) {
1638 if ( $1 eq $software_version ) {
1639 $ontology_db = $db;
1640 $ontology_version = $1;
1641 }
1642 } elsif ( $db =~ /^ensembl(?:genomes)?_stable_ids_(?:\d+_)?(\d+)/x ) {
1643 if ( $1 eq $software_version ) {
1644 $stable_ids_db = $db;
1645 $stable_ids_version = $1;
1646 }
1647
1648 } elsif (
1649 $db =~ /^([a-z]+_[a-z0-9]+(?:_[a-z0-9]+)? # species name e.g. homo_sapiens or canis_lupus_familiaris
1650 _
1651 [a-z]+ # db type
1652 (?:_\d+)?) # optional end bit for ensembl genomes databases
1653 _
1654 (\d+) # database release
1655 _
1656 (\w+)$ # assembly number can have letters too e.g 37c
1657 /x
1658 )
1659 {
1660
1661 # Species specific databases (core, cdna, vega etc.)
1662
1663 my ( $sp_name, $db_rel, $assem ) = ( $1, $2, $3 );
1664
1665 if ( !defined($species) || $sp_name =~ /^$species/ ) {
1666 if ( $db_rel eq $software_version ) {
1667 $temp{$sp_name} = $db_rel . "_" . $assem;
1668 }
1669 }
1670
1671 } else {
1672 # warn( sprintf( "Skipping database '%s'\n", $db ) );
1673 }
1674 } ## end for my $db (@dbnames)
1675
1676 @dbnames = ();
1677
1678 foreach my $key ( keys %temp ) {
1679 push @dbnames, $key . "_" . $temp{$key};
1680 }
1681
1682 # Register Core like databases
1683 my $core_like_dbs_found = 0;
1684 foreach my $type (qw(core cdna vega vega_update otherfeatures rnaseq)) {
1685
1686 my @dbs = grep { /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)? # species name
1687 _
1688 $type # the database type
1689 _
1690 (?:\d+_)? # optional end bit for ensembl genomes
1691 \d+ # database release
1692 _
1693 /x } @dbnames;
1694
1695 if(@dbs) {
1696 $core_like_dbs_found = 1;
1697 }
1698
1699 foreach my $database (@dbs) {
1700 if ( index( $database, 'collection' ) != -1 ) {
1701 # Skip multi-species databases.
1702 next;
1703 }
1704
1705
1706 my ( $species, $num ) =
1707 ( $database =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?) # species name
1708 _
1709 $type # type
1710 _
1711 (?:\d+_)? # optional endbit for ensembl genomes
1712 (\d+) # databases release
1713 _
1714 /x );
1715
1716 if(!defined($species)){
1717 warn "Cannot extract species name from database '$database'";
1718 }
1719
1720 my $dba =
1721 Bio::EnsEMBL::DBSQL::DBAdaptor->new(
1722 -group => $type,
1723 -species => $species.$species_suffix,
1724 -host => $host,
1725 -user => $user,
1726 -pass => $pass,
1727 -port => $port,
1728 -dbname => $database,
1729 -wait_timeout => $wait_timeout,
1730 -no_cache => $no_cache );
1731
1732 if ($verbose) {
1733 printf( "Species '%s' loaded from database '%s'\n",
1734 $species, $database );
1735 }
1736 }
1737 }
1738
1739 # Register multi-species databases
1740
1741 my @multi_dbs = grep { /^\w+_collection_core_\w+$/ } @dbnames;
1742
1743 foreach my $multidb (@multi_dbs) {
1744 my $sth = $dbh->prepare(
1745 sprintf(
1746 "SELECT species_id, meta_value FROM %s.meta "
1747 . "WHERE meta_key = 'species.db_name'",
1748 $dbh->quote_identifier($multidb) ) );
1749
1750 $sth->execute();
1751
1752 my ( $species_id, $species );
1753 $sth->bind_columns( \( $species_id, $species ) );
1754
1755 while ( $sth->fetch() ) {
1756 my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
1757 -group => "core",
1758 -species => $species.$species_suffix,
1759 -species_id => $species_id,
1760 -multispecies_db => 1,
1761 -host => $host,
1762 -user => $user,
1763 -pass => $pass,
1764 -port => $port,
1765 -dbname => $multidb,
1766 -wait_timeout => $wait_timeout,
1767 -no_cache => $no_cache
1768 );
1769
1770 if ($verbose) {
1771 printf( "Species '%s' (id:%d) loaded from database '%s'\n",
1772 $species, $species_id, $multidb );
1773 }
1774 }
1775 } ## end foreach my $multidb (@multi_dbs)
1776
1777 if(!$core_like_dbs_found && $verbose) {
1778 print("No core-like databases found. Check your DB_VERSION (used '$software_version')\n");
1779 }
1780
1781 # User upload DBs
1782
1783 my @userupload_dbs = grep { /_userdata$/ } @dbnames;
1784 for my $userupload_db (@userupload_dbs) {
1785 if ( index( $userupload_db, 'collection' ) != -1 ) {
1786 # Skip multi-species databases.
1787 next;
1788 }
1789
1790 my ($species) = ( $userupload_db =~ /(^.+)_userdata$/ );
1791 my $dba =
1792 Bio::EnsEMBL::DBSQL::DBAdaptor->new(
1793 -group => "userupload",
1794 -species => $species.$species_suffix,
1795 -host => $host,
1796 -user => $user,
1797 -pass => $pass,
1798 -port => $port,
1799 -wait_timeout => $wait_timeout,
1800 -dbname => $userupload_db,
1801 -no_cache => $no_cache );
1802
1803 if ($verbose) {
1804 printf( "%s loaded\n", $userupload_db );
1805 }
1806 }
1807
1808 # Register multi-species userupload databases.
1809 my @userdata_multidbs = grep { /^.+_collection_userdata$/ } @dbnames;
1810
1811 foreach my $multidb (@userdata_multidbs) {
1812 my $sth = $dbh->prepare(
1813 sprintf(
1814 "SELECT species_id, meta_value FROM %s.meta "
1815 . "WHERE meta_key = 'species.db_name'",
1816 $dbh->quote_identifier($multidb) ) );
1817
1818 $sth->execute();
1819
1820 my ( $species_id, $species );
1821 $sth->bind_columns( \( $species_id, $species ) );
1822
1823 while ( $sth->fetch() ) {
1824 my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
1825 -group => "userupload",
1826 -species => $species.$species_suffix,
1827 -species_id => $species_id,
1828 -multispecies_db => 1,
1829 -host => $host,
1830 -user => $user,
1831 -pass => $pass,
1832 -port => $port,
1833 -dbname => $multidb,
1834 -wait_timeout => $wait_timeout,
1835 -no_cache => $no_cache
1836 );
1837
1838 if ($verbose) {
1839 printf( "Species '%s' (id:%d) loaded from database '%s'\n",
1840 $species, $species_id, $multidb );
1841 }
1842 }
1843 } ## end foreach my $multidb (@userdata_multidbs)
1844
1845 # Variation
1846
1847 my $test_eval = eval "require Bio::EnsEMBL::Variation::DBSQL::DBAdaptor";
1848 if ($@or (!$test_eval)) {
1849 # Ignore variations as code required not there for this
1850 if ($verbose) {
1851 print(
1852 "Bio::EnsEMBL::Variation::DBSQL::DBAdaptor module not found "
1853 . "so variation databases will be ignored if found\n" );
1854 }
1855 }
1856 else {
1857 my @variation_dbs =
1858 grep { /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_variation_(?:\d+_)?\d+_/ } @dbnames;
1859
1860 if(! @variation_dbs && $verbose) {
1861 print("No variation databases found\n");
1862 }
1863
1864 for my $variation_db (@variation_dbs) {
1865
1866 if ( index( $variation_db, 'collection' ) != -1 ) {
1867 # Skip multi-species databases.
1868 next;
1869 }
1870
1871 my ( $species, $num ) =
1872 ( $variation_db =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?)_variation_(?:\d+_)?(\d+)_/ );
1873 my $dba =
1874 Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(
1875 -group => "variation",
1876 -species => $species.$species_suffix,
1877 -host => $host,
1878 -user => $user,
1879 -pass => $pass,
1880 -port => $port,
1881 -wait_timeout => $wait_timeout,
1882 -dbname => $variation_db,
1883 -no_cache => $no_cache );
1884
1885 if ($verbose) {
1886 printf( "%s loaded\n", $variation_db );
1887 }
1888 }
1889
1890 # Register variation multispecies databases
1891 my @variation_multidbs =
1892 grep { /^\w+_collection_variation_\w+$/ } @dbnames;
1893
1894 foreach my $multidb (@variation_multidbs) {
1895 my $sth = $dbh->prepare(
1896 sprintf( 'SELECT species_id, meta_value FROM %s.meta ',
1897 $dbh->quote_identifier($multidb) )
1898 . "WHERE meta_key = 'species.db_name'"
1899 );
1900
1901 $sth->execute();
1902
1903 my ( $species_id, $species );
1904 $sth->bind_columns( \( $species_id, $species ) );
1905
1906 while ( $sth->fetch() ) {
1907 my $dba = Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(
1908 -group => 'variation',
1909 -species => $species.$species_suffix,
1910 -species_id => $species_id,
1911 -multispecies_db => 1,
1912 -host => $host,
1913 -user => $user,
1914 -pass => $pass,
1915 -port => $port,
1916 -dbname => $multidb,
1917 -wait_timeout => $wait_timeout,
1918 -no_cache => $no_cache
1919 );
1920
1921 if ($verbose) {
1922 printf( "Species '%s' (id:%d) loaded from database '%s'\n",
1923 $species, $species_id, $multidb );
1924 }
1925 }
1926 } ## end foreach my $multidb (@variation_multidbs)
1927 }
1928
1929 my $func_eval = eval "require Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor";
1930 if ($@ or (!$func_eval)) {
1931 if ($verbose) {
1932 # Ignore funcgen DBs as code required not there for this
1933 print("Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor module not found "
1934 . "so functional genomics databases will be ignored if found\n"
1935 );
1936 }
1937 } else {
1938 my @funcgen_dbs =
1939 grep { /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_funcgen_(?:\d+_)?\d+_/ } @dbnames;
1940
1941 if(! @funcgen_dbs && $verbose) {
1942 print("No funcgen databases found\n");
1943 }
1944
1945 for my $funcgen_db (@funcgen_dbs) {
1946 if ( index( $funcgen_db, 'collection' ) != -1 ) {
1947 # Skip multi-species databases.
1948 next;
1949 }
1950
1951 my ( $species, $num ) =
1952 ( $funcgen_db =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?)_funcgen_(?:\d+_)?(\d+)_/ );
1953 my $dba = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(
1954 -group => "funcgen",
1955 -species => $species.$species_suffix,
1956 -host => $host,
1957 -user => $user,
1958 -pass => $pass,
1959 -port => $port,
1960 -wait_timeout => $wait_timeout,
1961 -dbname => $funcgen_db,
1962 -no_cache => $no_cache
1963 );
1964
1965 if ($verbose) {
1966 printf( "%s loaded\n", $funcgen_db );
1967 }
1968 }
1969
1970 # Register functional genomics multispecies databases
1971 my @funcgen_multidbs =
1972 grep { /^\w+_collection_funcgen_\w+$/ } @dbnames;
1973
1974 foreach my $multidb (@funcgen_multidbs) {
1975 my $sth = $dbh->prepare(
1976 sprintf( 'SELECT species_id, meta_value FROM %s.meta ',
1977 $dbh->quote_identifier($multidb) )
1978 . "WHERE meta_key = 'species.db_name'"
1979 );
1980
1981 $sth->execute();
1982
1983 my ( $species_id, $species );
1984 $sth->bind_columns( \( $species_id, $species ) );
1985
1986 while ( $sth->fetch() ) {
1987 my $dba = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(
1988 -group => 'funcgen',
1989 -species => $species.$species_suffix,
1990 -species_id => $species_id,
1991 -multispecies_db => 1,
1992 -host => $host,
1993 -user => $user,
1994 -pass => $pass,
1995 -port => $port,
1996 -dbname => $multidb,
1997 -wait_timeout => $wait_timeout,
1998 -no_cache => $no_cache
1999 );
2000
2001 if ($verbose) {
2002 printf( "Species '%s' (id:%d) loaded from database '%s'\n",
2003 $species, $species_id, $multidb );
2004 }
2005 }
2006 } ## end foreach my $multidb (@funcgen_multidbs)
2007 } ## end else [ if ($@) ]
2008
2009 # Compara
2010
2011 my @compara_dbs = grep { /^ensembl_compara/ } @dbnames;
2012
2013 if (@compara_dbs) {
2014 my $comp_eval = eval "require Bio::EnsEMBL::Compara::DBSQL::DBAdaptor";
2015 if ($@ or (!$comp_eval)) {
2016 # Ignore Compara as code required not there for this
2017 if ($verbose) {
2018 printf(
2019 "Bio::EnsEMBL::Compara::DBSQL::DBAdaptor "
2020 . "not found so the following compara "
2021 . "databases will be ignored: %s\n",
2022 join( ', ', @compara_dbs ) );
2023 }
2024 } else {
2025 foreach my $compara_db (@compara_dbs) {
2026 # Looking for EnsEMBL Genomes Comparas.
2027 # ensembl_compara_bacteria_2_53 is registered as
2028 # 'bacteria', ensembl_compara_pan_homology_2_53 is
2029 # registered as 'pan_homology', ensembl_compara_53 is
2030 # registered as 'multi', and the alias 'compara' still
2031 # operates.
2032
2033 my ($species) =
2034 $compara_db =~ /^ensembl_compara_(\w+)(?:_\d+){2}$/xm;
2035
2036 $species ||= 'multi';
2037
2038 my $dba = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(
2039 -group => 'compara',
2040 -species => $species.$species_suffix,
2041 -host => $host,
2042 -user => $user,
2043 -pass => $pass,
2044 -port => $port,
2045 -wait_timeout => $wait_timeout,
2046 -dbname => $compara_db,
2047 -no_cache => $no_cache
2048 );
2049
2050 if ($verbose) {
2051 printf( "%s loaded\n", $compara_db );
2052 }
2053 } ## end foreach my $compara_db (@compara_dbs)
2054 } ## end else [ if ($@)
2055 } elsif ($verbose) {
2056 print("No Compara databases found\n");
2057 }
2058
2059 # Ancestral sequences
2060
2061 my @ancestral_dbs =
2062 sort grep { /^ensembl_ancestral/ } @dbnames;
2063
2064 if (@ancestral_dbs) {
2065 my $ancestral_db = shift @ancestral_dbs;
2066
2067 my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
2068 -group => 'core',
2069 -species => 'Ancestral sequences'.$species_suffix,
2070 -host => $host,
2071 -user => $user,
2072 -pass => $pass,
2073 -port => $port,
2074 -wait_timeout => $wait_timeout,
2075 -dbname => $ancestral_db,
2076 -no_cache => $no_cache
2077 );
2078
2079 if ($verbose) {
2080 printf( "%s loaded\n", $ancestral_db );
2081
2082 if (@ancestral_dbs) {
2083 # If we still had some more then report the problem.
2084 printf(
2085 "Multiple ancestral databases found.\n"
2086 . "Ignoring the following: %s\n",
2087 join( ', ', @ancestral_dbs ) );
2088 }
2089 }
2090 } elsif ($verbose) {
2091 print("No ancestral database found\n");
2092 }
2093
2094 # Ontology
2095
2096 if ( defined($ontology_version) && $ontology_version != 0 ) {
2097 require Bio::EnsEMBL::DBSQL::OntologyDBAdaptor;
2098
2099 my $dba =
2100 Bio::EnsEMBL::DBSQL::OntologyDBAdaptor->new(
2101 '-species' => 'multi' . $species_suffix,
2102 '-group' => 'ontology',
2103 '-host' => $host,
2104 '-port' => $port,
2105 '-user' => $user,
2106 '-pass' => $pass,
2107 '-dbname' => $ontology_db, );
2108
2109 if ($verbose) {
2110 printf( "%s loaded\n", $ontology_db );
2111 }
2112 }
2113 elsif ($verbose) {
2114 print("No ontology database found\n");
2115 }
2116
2117
2118 if ( defined($stable_ids_db) && $stable_ids_version != 0 ) {
2119
2120 my $dba =
2121 Bio::EnsEMBL::DBSQL::DBAdaptor->new(
2122 '-species' => 'multi' . $species_suffix,
2123 '-group' => 'stable_ids',
2124 '-host' => $host,
2125 '-port' => $port,
2126 '-user' => $user,
2127 '-pass' => $pass,
2128 '-dbname' => $stable_ids_db, );
2129
2130 if ($verbose) {
2131 printf( "%s loaded\n", $stable_ids_db );
2132 }
2133
2134 }
2135
2136
2137 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
2138 -species => 'multi'.$species_suffix,
2139 -alias => ['compara'.$species_suffix] );
2140
2141 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
2142 -species => 'multi'.$species_suffix,
2143 -alias => ['ontology'.$species_suffix] );
2144
2145
2146 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
2147 -species => 'multi'.$species_suffix,
2148 -alias => ['stable_ids'.$species_suffix] );
2149
2150 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
2151 -species => 'Ancestral sequences'.$species_suffix,
2152 -alias => ['ancestral_sequences'.$species_suffix] );
2153
2154 # Register aliases as found in adaptor meta tables.
2155
2156 $self->find_and_add_aliases( '-handle' => $dbh,
2157 '-species_suffix' => $species_suffix );
2158
2159 $self->_additional_aliases($species_suffix);
2160
2161 $dbh->disconnect();
2162
2163 my $count = $self->get_DBAdaptor_count() - $original_count;
2164 return $count >= 0 ? $count : 0;
2165
2166 } ## end sub load_registry_from_db
2167
2168
2169 # Used as a place to push "hack" aliases
2170 sub _additional_aliases {
2171 my ($self, $species_suffix) = @_;
2172
2173 #Adding branch-68 thirteen-lined ground squirrel "old" aliases
2174 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
2175 -species => 'ictidomys_tridecemlineatus'.$species_suffix,
2176 -alias => ['spermophilus_tridecemlineatus'.$species_suffix] );
2177 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
2178 -species => 'ictidomys_tridecemlineatus'.$species_suffix,
2179 -alias => ['spermophilus tridecemlineatus'.$species_suffix] );
2180
2181 return;
2182 } # end sub _additional_aliases
2183
2184 =head2 _group_to_adaptor_class
2185
2186 Arg [1] : The group you wish to decode to an adaptor class
2187 Example : Bio::EnsEMBL::Registry->_group_to_adaptor_class('core');
2188 Description : Has an internal lookup of groups to their adaptor classes
2189 Returntype : String
2190 Exceptions : Thrown if the group is unknown
2191 Status : Stable
2192
2193 =cut
2194
2195 sub _group_to_adaptor_class {
2196 my ($self, $group) = @_;
2197 my $class = {
2198 core => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
2199 cdna => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
2200 otherfeatures => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
2201 rnaseq => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
2202 vega => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
2203 variation => 'Bio::EnsEMBL::Variation::DBSQL::DBAdaptor',
2204 funcgen => 'Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor',
2205 compara => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor',
2206 }->{$group};
2207 throw "Group '${group}' is unknown" if ! $class;
2208 return $class;
2209 }
2210
2211
2212 =head2 find_and_add_aliases
2213
2214 Arg [ADAPTOR] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor
2215 The adaptor to use to retrieve aliases from.
2216
2217 Arg [GROUP] : (optional) string
2218 The group you want to find aliases for. If not
2219 given assumes all types.
2220
2221 Arg [HANDLE] : (optional) DBI database handle
2222 A connected database handle to use instead of
2223 the database handles stored in the DBAdaptors.
2224 Bypasses the use of MetaContainer.
2225
2226 Arg [SPECIES_SUFFIX]: (optional) string
2227 This option will append the string to the species
2228 name in the registry for all databases.
2229
2230 Example : Bio::EnsEMBL::Registry->find_and_add_aliases(
2231 -ADAPTOR => $dba,
2232 -GROUP => 'core'
2233 );
2234
2235 Description : Looks in the meta container for each database for
2236 an entry called "species.alias". If any are found
2237 then the species adaptor is registered to that
2238 set of aliases. This can work across any adaptor
2239 which has a MetaContainer. If no MetaContainer
2240 can be returned from a given adaptor then no alias
2241 searching is performed.
2242
2243 Return type : none
2244 Exceptions : Throws if an alias is found in more than one species.
2245 Status : Stable
2246
2247 =cut
2248
2249 sub find_and_add_aliases {
2250 my $class = shift ;
2251
2252 my ($adaptor, $group, $dbh, $species_suffix ) =
2253 rearrange( [ 'ADAPTOR', 'GROUP', 'HANDLE', 'SPECIES_SUFFIX' ], @_ );
2254
2255 #Can be undef; needs to be something to avoid warnings
2256 $species_suffix ||= q{};
2257
2258 my @dbas;
2259 if ( defined($adaptor) ) {
2260 @dbas = ($adaptor);
2261 } elsif ( defined($dbh) ) {
2262
2263 if ( length($species_suffix) > 0 ) {
2264 my @full = @{ $class->get_all_DBAdaptors( '-GROUP' => $group ) };
2265
2266 foreach my $db (@full) {
2267 if ( $db->species =~ /$species_suffix/ ) {
2268 push( @dbas, $db );
2269 }
2270 }
2271
2272 } else {
2273 @dbas = @{ $class->get_all_DBAdaptors( '-GROUP' => $group ) };
2274 }
2275
2276 } else {
2277 @dbas = @{ $class->get_all_DBAdaptors( '-GROUP' => $group ) };
2278 }
2279
2280 foreach my $dba (@dbas) {
2281 my @aliases;
2282 my $species = $dba->species();
2283
2284 if ( defined($dbh) ) {
2285 my $dbname = $dba->dbc()->dbname();
2286 my $sth = $dbh->prepare( sprintf(
2287 "SELECT meta_value FROM %s.meta "
2288 . "WHERE meta_key = 'species.alias' "
2289 . "AND species_id = ?",
2290 $dbh->quote_identifier($dbname) ) );
2291
2292 # Execute, and don't care about errors (there will be errors for
2293 # databases without a 'meta' table.
2294 $sth->{'PrintError'} = 0;
2295 $sth->{'RaiseError'} = 0;
2296 if ( !$sth->execute( $dba->species_id() ) ) { next }
2297 $sth->{'PrintError'} = $dbh->{'PrintError'};
2298 $sth->{'RaiseError'} = $dbh->{'RaiseError'};
2299
2300 my $alias;
2301 $sth->bind_columns( \$alias );
2302 while ( $sth->fetch() ) {
2303 push( @aliases, $alias );
2304 }
2305 } else {
2306 my $meta_container = eval { $dba->get_MetaContainer() };
2307
2308 if ( defined($meta_container) ) {
2309 push( @aliases,
2310 @{ $meta_container->list_value_by_key('species.alias') }
2311 );
2312 }
2313
2314 # Need to disconnect so we do not spam the MySQL servers trying to
2315 # get aliases. Can only call disonnect if dbc was defined.
2316 if ( defined( $dba->dbc() ) ) {
2317 $dba->dbc()->disconnect_if_idle();
2318 }
2319 }
2320
2321 foreach my $alias (@aliases) {
2322 my $alias_suffix = $alias.$species_suffix;
2323 #Lowercase because stored aliases are lowercased
2324 my $lc_species = lc($species);
2325 my $lc_alias_suffix = lc($alias_suffix);
2326 if ( !$class->alias_exists( $alias_suffix )
2327 && $lc_species ne $lc_alias_suffix )
2328 {
2329 $class->add_alias( $species, $alias_suffix );
2330 } elsif (
2331 $lc_species ne $class->get_alias( $alias_suffix ) )
2332 {
2333 $class->remove_alias( $species, $alias_suffix );
2334 }
2335 }
2336
2337 } ## end foreach my $dba (@dbas)
2338 return;
2339 } ## end sub find_and_add_aliases
2340
2341
2342 =head2 load_registry_from_multiple_dbs
2343
2344 Arg [1] : Array of hashes, each hash being a set of arguments to
2345 load_registry_from_db() (see above).
2346
2347 Example :
2348
2349 $registry->load_registry_from_multiple_dbs( {
2350 '-host' => 'ensembldb.ensembl.org',
2351 '-user' => 'anonymous',
2352 '-verbose' => '1'
2353 },
2354 {
2355 '-host' => 'server.example.com',
2356 '-user' => 'anonymouse',
2357 '-password' => 'cheese',
2358 '-verbose' => '1'
2359 } );
2360
2361 Description: Will call load_registry_from_db() (see above)
2362 multiple times and merge the resulting registries
2363 into one, effectively allowing a user to connect to
2364 databases on multiple database servers from within
2365 one program.
2366
2367 If a database is found on more than one server, the
2368 first found instance of that database will be used.
2369
2370 Returntype : Int count of the DBAdaptor instances which can be found in the
2371 registry
2372
2373 =cut
2374
2375 sub load_registry_from_multiple_dbs {
2376 my ( $self, @args ) = @_;
2377
2378 my $original_count = $self->get_DBAdaptor_count();
2379
2380 my %merged_register = %registry_register;
2381
2382 foreach my $arg (@args) {
2383 local %registry_register = ();
2384
2385 my $verbose;
2386
2387 ($verbose) = rearrange( ['VERBOSE'], %{$arg} );
2388
2389 $self->load_registry_from_db( %{$arg} );
2390
2391 #
2392 # Merge the localized %registry_register into %merged_register.
2393 #
2394
2395 # Merge the _SPECIES and _ALIAS sections of %registry_register.
2396 foreach my $section ( 'Species', 'Alias' ) {
2397 my $section_key = '_' . uc($section);
2398
2399 while ( my ( $key, $value ) =
2400 each( %{ $registry_register{$section_key} } ) )
2401 {
2402 if ( !exists( $merged_register{$section_key}{$key} ) ) {
2403 $merged_register{$section_key}{$key} = $value;
2404 } elsif ($verbose) {
2405 printf( "%s '%s' found on multiple servers, "
2406 . "using first found\n",
2407 $section, $key );
2408 }
2409 }
2410 }
2411 } ## end foreach my $arg (@args)
2412
2413 # Add the DBAs from the _SPECIES section into the _DBA section.
2414 foreach my $species_hash ( values( %{ $merged_register{_SPECIES} } ) )
2415 {
2416 foreach my $group_hash ( values( %{$species_hash} ) ) {
2417 if ( ref($group_hash) eq 'HASH' && exists( $group_hash->{_DB} ) )
2418 {
2419 push( @{ $merged_register{_DBA} }, $group_hash->{_DB} );
2420 }
2421 }
2422 }
2423
2424 %registry_register = %merged_register;
2425
2426 my $count = $self->get_DBAdaptor_count() - $original_count;
2427 return $count >= 0 ? $count : 0;
2428 } ## end sub load_registry_from_multiple_dbs
2429
2430 #
2431 # Web specific routines
2432 #
2433
2434 =head2 DEPRECATED load_registry_with_web_adaptors
2435
2436 DEPRECATED: Use load_registry_from_db instead.
2437
2438 =cut
2439
2440 sub load_registry_with_web_adaptors{
2441 my $class = shift;
2442
2443 deprecate('Use the load_registry_from_db instead');
2444 my $site_eval = eval{ require SiteDefs };
2445 if ($@ or (!defined($site_eval))){ die "Can't use SiteDefs.pm - $@\n"; }
2446 SiteDefs->import(qw(:ALL));
2447
2448 my $species_eval = eval{ require SpeciesDefs };
2449 if ($@ or (!defined($species_eval))){ die "Can't use SpeciesDefs.pm - $@\n"; }
2450 my $conf = new SpeciesDefs();
2451
2452 my %species_alias = %{$SiteDefs::ENSEMBL_SPECIES_ALIASES};
2453
2454 foreach my $spec (keys %species_alias){
2455 Bio::EnsEMBL::Registry->add_alias($species_alias{$spec},$spec);
2456 }
2457 return;
2458 }
2459
2460 =head2 set_default_track
2461
2462 Sets a flag to say that that this species/group are a default track and do not
2463 need to be added as another web track.
2464
2465 Arg [1] : name of the species to get the adaptors for in the registry.
2466 Arg [2] : name of the type to get the adaptors for in the registry.
2467 Example : $merged = Bio::EnsEMBL::Registry->set_default_track("Human","core");
2468 Returntype : none
2469 Exceptions : none
2470 Status : At Risk.
2471
2472 =cut
2473
2474 sub set_default_track {
2475 my ( $class, $species, $group ) = @_;
2476
2477 $species = get_alias($species);
2478 $registry_register{'def_track'}{$species}{ lc($group) } = 1;
2479 return;
2480 }
2481
2482 =head2 default_track
2483
2484 Check flag to see if this is a default track
2485
2486 Arg [1] : name of the species to get the adaptors for in the registry.
2487 Arg [2] : name of the type to get the adaptors for in the registry.
2488 Example : $merged = Bio::EnsEMBL::Registry->set_default_track("Human","core");
2489 Returntype : int
2490 Exceptions : none
2491 Status : At Risk.
2492
2493 =cut
2494
2495 sub default_track {
2496 my ( $class, $species, $group ) = @_;
2497
2498 $species = get_alias($species);
2499 if (
2500 defined( $registry_register{'def_track'}{$species}{ lc($group) } ) )
2501 {
2502 return 1;
2503 }
2504
2505 return 0;
2506 }
2507
2508
2509 =head2 add_new_tracks
2510
2511 Will add new gene tracks to the configuration of the WEB server if they are
2512 not of the type default and the configuration already has genes in the display.
2513
2514 Arg [1] : hash of the default configuration of the web page
2515 Returntype : none
2516 Exceptions : none
2517 Called by : UserConfig.pm
2518 Status : At Risk.
2519
2520 =cut
2521
2522 sub add_new_tracks{
2523 my($class, $conf, $pos) = @_;
2524
2525 my $start = 0;
2526 my $reg = $class;
2527 my $species_reg = $reg->get_alias($conf->{'species'},"nothrow");
2528 my %pars;
2529 # print STDERR "Species $species_reg check for default tracks\n";
2530 if(defined($species_reg)){
2531 foreach my $dba (@{$reg->get_all_DBAdaptors()}){
2532 if(!$reg->default_track($dba->species,$dba->group)){
2533 $pars{'available'} = "species ".$reg->get_alias($dba->species());
2534 $pars{'db_alias'} = $dba->group();
2535 # print STDERR "Adding new track for ".$dba->species."\t".$dba->group."\n";
2536 $conf->add_new_track_generictranscript('',$dba->group(), "black",$pos,%pars);
2537 $pos++;
2538 }
2539 }
2540 }
2541 return $pos;
2542
2543 }
2544
2545 =head2 no_version_check
2546
2547 getter/setter for whether to run the version checking
2548
2549 Arg[0] : (optional) int
2550 Returntype : int or undef if not set
2551 Exceptions : none
2552 Status : At Risk.
2553
2554 =cut
2555
2556 sub no_version_check {
2557 my ( $self, $arg ) = @_;
2558 ( defined $arg )
2559 && ( $registry_register{'_no_version_check'} = $arg );
2560
2561 return $registry_register{'_no_version_check'};
2562 }
2563
2564 =head2 no_cache_warnings
2565
2566 Arg[0] : boolean for turning the flag on and off
2567 Description : Turns off any warnings about not using caching in all available
2568 adaptors.
2569 Returntype : boolean Current status
2570 Exceptions : None
2571
2572 =cut
2573
2574 sub no_cache_warnings {
2575 my ($self, $arg) = @_;
2576 if(defined $arg) {
2577 $Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::SILENCE_CACHE_WARNINGS = $arg;
2578 }
2579 return $Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::SILENCE_CACHE_WARNINGS;
2580 }
2581
2582
2583 =head2 version_check
2584
2585 run the database/API code version check for a DBAdaptor
2586
2587 Arg[0] : DBAdaptor to check
2588 Returntype : int 1 if okay, 0 if not the same
2589 Exceptions : none
2590 Status : At Risk.
2591
2592 =cut
2593
2594
2595 sub version_check {
2596 my ( $self, $dba ) = @_;
2597
2598 # Check the datbase and versions match
2599 # give warning if they do not.
2600 my $check = no_version_check();
2601
2602 if ( (
2603 defined( $ENV{HOME} )
2604 and ( -e $ENV{HOME} . "/.ensemblapi_no_version_check" ) )
2605 or ( defined($check) and ( $check != 0 ) ) )
2606 {
2607 return 1;
2608 }
2609
2610 my $mca =
2611 $self->get_adaptor( $dba->species(), $dba->group(),
2612 "MetaContainer" );
2613
2614 my $database_version = 0;
2615 if ( defined($mca) ) {
2616 $database_version = $mca->get_schema_version();
2617 }
2618
2619 if ( $database_version == 0 ) {
2620 # Try to work out the version
2621 if ( $dba->dbc()->dbname() =~ /^_test_db_/x ) {
2622 return 1;
2623 }
2624 if ( $dba->dbc()->dbname() =~ /(\d+)_\S+$/x ) {
2625 $database_version = $1;
2626 } elsif ( $dba->dbc()->dbname() =~ /ensembl_compara_(\d+)/x ) {
2627 $database_version = $1;
2628 } elsif ( $dba->dbc()->dbname() =~ /ensembl_help_(\d+)/x ) {
2629 $database_version = $1;
2630 } elsif ( $dba->dbc()->dbname() =~ /ensembl_ontology_(\d+)/x ) {
2631 $database_version = $1;
2632 } elsif ( $dba->dbc()->dbname() =~ /ensembl_stable_ids_(\d+)/x ) {
2633 $database_version = $1;
2634 } else {
2635 warn(
2636 sprintf(
2637 "No database version for database %s "
2638 . ". You must be using a post version 34 database "
2639 . "with version 34 or later code.\n"
2640 . "You need to update your database "
2641 . "or use the appropriate Ensembl software release "
2642 . "to ensure your script does not crash\n",
2643 $dba->dbc()->dbname() ) );
2644 }
2645 } ## end if ( $database_version...
2646
2647 if ( $database_version != software_version() ) {
2648 warn(
2649 sprintf(
2650 "For %s there is a difference in the software release (%s) "
2651 . "and the database release (%s). "
2652 . "You should update one of these to ensure that your script "
2653 . "does not crash.\n",
2654 $dba->dbc()->dbname(),
2655 software_version(), $database_version
2656 ) );
2657 return 0;
2658 }
2659
2660 return 1; # Ok
2661 } ## end sub version_check
2662
2663
2664 =head2 get_species_and_object_type
2665
2666 Description: Get the species name, object type (gene, transcript,
2667 translation, or exon etc.), and database type for a
2668 stable ID.
2669
2670 Arg [1] : String stable_id
2671 The stable ID to find species and object type for.
2672
2673 Arg [2] : String known_type (optional)
2674 The type of the stable ID, if it is known.
2675
2676 Arg [3] : String known_species (optional)
2677 The species, if known
2678
2679 Arg [4] : String known_db_type (optional)
2680 The database type, if known
2681
2682 Example : my $stable_id = 'ENST00000326632';
2683
2684 my ( $species, $object_type, $db_type ) =
2685 $registry->get_species_and_object_type($stable_id);
2686
2687 my $adaptor =
2688 $registry->get_adaptor( $species, $db_type,
2689 $object_type );
2690
2691 my $object = $adaptor->fetch_by_stable_id($stable_id);
2692
2693 Return type: Array consisting of the species name, object type,
2694 and database type. The array may be empty if no
2695 match is found.
2696
2697 Exceptions : none
2698 Status : At Risk.
2699
2700 =cut
2701
2702 my %stable_id_stmts = (
2703 gene => 'SELECT m.meta_value '
2704 . 'FROM %1$s.gene '
2705 . 'JOIN %1$s.seq_region USING (seq_region_id) '
2706 . 'JOIN %1$s.coord_system USING (coord_system_id) '
2707 . 'JOIN %1$s.meta m USING (species_id) '
2708 . 'WHERE stable_id = ? '
2709 . 'AND m.meta_key = "species.production_name"',
2710 transcript => 'SELECT m.meta_value '
2711 . 'FROM %1$s.transcript '
2712 . 'JOIN %1$s.seq_region USING (seq_region_id) '
2713 . 'JOIN %1$s.coord_system USING (coord_system_id) '
2714 . 'JOIN %1$s.meta m USING (species_id) '
2715 . 'WHERE stable_id = ? '
2716 . 'AND m.meta_key = "species.production_name"',
2717 exon => 'SELECT m.meta_value '
2718 . 'FROM %1$s.exon '
2719 . 'JOIN %1$s.seq_region USING (seq_region_id) '
2720 . 'JOIN %1$s.coord_system USING (coord_system_id) '
2721 . 'JOIN %1$s.meta m USING (species_id) '
2722 . 'WHERE stable_id = ? '
2723 . 'AND m.meta_key = "species.production_name"',
2724 translation => 'SELECT m.meta_value '
2725 . 'FROM %1$s.translation tl '
2726 . 'JOIN %1$s.transcript USING (transcript_id) '
2727 . 'JOIN %1$s.seq_region USING (seq_region_id) '
2728 . 'JOIN %1$s.coord_system USING (coord_system_id) '
2729 . 'JOIN %1$s.meta m USING (species_id) '
2730 . 'WHERE tl.stable_id = ? '
2731 . 'AND m.meta_key = "species.production_name"',
2732 operon => 'SELECT m.meta_value '
2733 . 'FROM %1$s.operon '
2734 . 'JOIN %1$s.seq_region USING (seq_region_id) '
2735 . 'JOIN %1$s.coord_system USING (coord_system_id) '
2736 . 'JOIN %1$s.meta m USING (species_id) '
2737 . 'WHERE stable_id = ? '
2738 . 'AND m.meta_key = "species.production_name"',
2739 operontranscript => 'SELECT m.meta_value '
2740 . 'FROM %1$s.operon_transcript '
2741 . 'JOIN %1$s.seq_region USING (seq_region_id) '
2742 . 'JOIN %1$s.coord_system USING (coord_system_id) '
2743 . 'JOIN %1$s.meta m USING (species_id) '
2744 . 'WHERE stable_id = ? '
2745 . 'AND m.meta_key = "species.production_name"',
2746
2747 );
2748
2749
2750 sub get_species_and_object_type {
2751 my ($self, $stable_id, $known_type, $known_species, $known_db_type, $force_long_lookup) = @_;
2752
2753 #get the stable_id lookup database adaptor
2754 my $stable_ids_dba = $self->get_DBAdaptor("multi", "stable_ids", 1);
2755
2756 if ($stable_ids_dba && ! $force_long_lookup) {
2757 my $statement = 'SELECT name, object_type, db_type FROM stable_id_lookup join species using(species_id) WHERE stable_id = ?';
2758
2759 if ($known_species) {
2760 $statement .= ' AND name = ?';
2761 }
2762 if ($known_db_type) {
2763 $statement .= ' AND db_type = ?';
2764 }
2765 if ($known_type) {
2766 $statement .= ' AND object_type = ?';
2767 }
2768
2769 my $sth = $stable_ids_dba->dbc()->prepare($statement);
2770 $sth->bind_param(1, $stable_id, SQL_VARCHAR);
2771 my $param_count = 1;
2772 if ($known_species) {
2773 $known_species = $self->get_alias($known_species);
2774 $param_count++;
2775 $sth->bind_param($param_count, $known_species, SQL_VARCHAR);
2776 }
2777 if ($known_db_type) {
2778 $param_count++;
2779 $sth->bind_param($param_count, $known_db_type, SQL_VARCHAR);
2780 }
2781 if ($known_type) {
2782 $param_count++;
2783 $sth->bind_param($param_count, $known_type, SQL_VARCHAR);
2784 }
2785 $sth->execute();
2786 my ($species, $type, $db_type) = $sth->fetchrow_array();
2787 $sth->finish();
2788 return ($species ,$type, $db_type);
2789
2790 } else {
2791 if (defined $known_type && !exists $stable_id_stmts{lc $known_type}) {
2792 return;
2793 }
2794
2795 my @types = defined $known_type ? ($known_type) : ('Gene', 'Transcript', 'Translation', 'Exon', 'Operon', 'OperonTranscript');
2796
2797 if(! $known_db_type) {
2798 $known_db_type = 'core';
2799 }
2800
2801 my %get_adaptors_args;
2802 $get_adaptors_args{'-group'} = $known_db_type;
2803 if ($known_species) {
2804 $get_adaptors_args{'-species'} = $known_species;
2805 }
2806
2807 my @dbas = sort { $a->dbc->host cmp $b->dbc->host || $a->dbc->port <=> $b->dbc->port }
2808 @{$self->get_all_DBAdaptors(%get_adaptors_args)};
2809 foreach my $dba (@dbas) {
2810
2811 foreach my $type (@types) {
2812 my $statement = sprintf $stable_id_stmts{lc $type}, $dba->dbc->dbname;
2813
2814 my $sth = $dba->dbc()->prepare($statement);
2815 $sth->bind_param(1, $stable_id, SQL_VARCHAR);
2816 $sth->execute;
2817
2818 my $species = $sth->fetchall_arrayref->[0][0];
2819
2820 $sth->finish;
2821
2822 return ($species, $type, $known_db_type) if defined $species;
2823 }
2824
2825 } ## end foreach my $dba ( sort { $a...})
2826
2827 }
2828
2829 return;
2830 } ## end sub get_species_and_object_type
2831
2832 1;