annotate variant_effect_predictor/Bio/EnsEMBL/Pipeline/SpeciesFactory.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Pipeline::SpeciesFactory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 A module which generates dump jobs for each species it finds in the Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 Registry. The species we run the code on can be controlled by specifying
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 the I<species> parameter or by reducing the number of DBAdaptors loaded into
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 the registry.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 Allowed parameters are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 =over 8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 =item species - Can be an array of species to perform dumps for or a single
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 species name. If specified only jobs will be created for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 those species. Defaults to nothing so all species are processed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 item db_types - Specify the types of database to dump. Defaults to core and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 should be an array.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 The code flows once per species to branch 2.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 package Bio::EnsEMBL::Pipeline::SpeciesFactory;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 use base qw/Bio::EnsEMBL::Pipeline::Base/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use Bio::EnsEMBL::Registry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 sub param_defaults {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 return {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 db_types => [qw/core/],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 species => []
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 sub fetch_input {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 $self->reset_empty_array_param('db_types');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 my $core_dbas = $self->get_DBAdaptors();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 $self->info('Found %d core DBAdaptor(s) to process', scalar(@{$core_dbas}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 $self->param('dbas', $core_dbas);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 my %species_lookup =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 map { $_ => 1 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 map { Bio::EnsEMBL::Registry->get_alias($_) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 @{$self->param('species')};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 $self->param('species_lookup', \%species_lookup);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 my @dna;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 my @genes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 my @species;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 foreach my $dba (@{$self->param('dbas')}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 if(!$self->process_dba($dba)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 $self->fine('Skipping %s', $dba->species());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 my $input_id = $self->input_id($dba);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 push(@species, [ $input_id, 2 ]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 $self->param('species', \@species);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 sub write_output {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 $self->do_flow('species');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 sub get_DBAdaptors {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 return Bio::EnsEMBL::Registry->get_all_DBAdaptors(-GROUP => 'core');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 sub do_flow {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 my ($self, $key) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my $targets = $self->param($key);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 foreach my $entry (@{$targets}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 my ($input_id, $flow) = @{$entry};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 $self->fine('Flowing %s to %d for %s', $input_id->{species}, $flow, $key);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 $self->dataflow_output_id($input_id, $flow);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 sub process_dba {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 my ($self, $dba) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 #Reject if DB was ancestral sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 return 0 if $dba->species() =~ /ancestral/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 #If species is defined then make sure we only allow those species through
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 if(@{$self->param('species')}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 my $lookup = $self->param('species_lookup');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 my $name = $dba->species();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 my $aliases = Bio::EnsEMBL::Registry->get_all_aliases($name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 push(@{$aliases}, $name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 my $found = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 foreach my $alias (@{$aliases}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 if($lookup->{$alias}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 $found = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 return $found;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 #Otherwise just accept
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 sub input_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 my ($self, $dba, $type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 my $mc = $dba->get_MetaContainer();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 my $input_id = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 db_types => $self->db_types($dba),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 species => $mc->get_production_name(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 return $input_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 sub db_types {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 my ($self, $dba) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 return $self->param('db_types');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 1;