0
|
1 =pod
|
|
2
|
|
3 =head1 LICENSE
|
|
4
|
|
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
6 Genome Research Limited. All rights reserved.
|
|
7
|
|
8 This software is distributed under a modified Apache license.
|
|
9 For license details, please see
|
|
10
|
|
11 http://www.ensembl.org/info/about/code_licence.html
|
|
12
|
|
13 =head1 CONTACT
|
|
14
|
|
15 Please email comments or questions to the public Ensembl
|
|
16 developers list at <dev@ensembl.org>.
|
|
17
|
|
18 Questions may also be sent to the Ensembl help desk at
|
|
19 <helpdesk@ensembl.org>.
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Pipeline::SpeciesFactory
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 A module which generates dump jobs for each species it finds in the Ensembl
|
|
28 Registry. The species we run the code on can be controlled by specifying
|
|
29 the I<species> parameter or by reducing the number of DBAdaptors loaded into
|
|
30 the registry.
|
|
31
|
|
32 Allowed parameters are:
|
|
33
|
|
34 =over 8
|
|
35
|
|
36 =item species - Can be an array of species to perform dumps for or a single
|
|
37 species name. If specified only jobs will be created for
|
|
38 those species. Defaults to nothing so all species are processed
|
|
39
|
|
40 item db_types - Specify the types of database to dump. Defaults to core and
|
|
41 should be an array.
|
|
42
|
|
43 =back
|
|
44
|
|
45 The code flows once per species to branch 2.
|
|
46
|
|
47 =cut
|
|
48
|
|
49 package Bio::EnsEMBL::Pipeline::SpeciesFactory;
|
|
50
|
|
51 use strict;
|
|
52 use warnings;
|
|
53
|
|
54 use base qw/Bio::EnsEMBL::Pipeline::Base/;
|
|
55
|
|
56 use Bio::EnsEMBL::Registry;
|
|
57
|
|
58 sub param_defaults {
|
|
59 my ($self) = @_;
|
|
60 return {
|
|
61 db_types => [qw/core/],
|
|
62 species => []
|
|
63 };
|
|
64 }
|
|
65
|
|
66 sub fetch_input {
|
|
67 my ($self) = @_;
|
|
68
|
|
69 $self->reset_empty_array_param('db_types');
|
|
70
|
|
71 my $core_dbas = $self->get_DBAdaptors();
|
|
72 $self->info('Found %d core DBAdaptor(s) to process', scalar(@{$core_dbas}));
|
|
73 $self->param('dbas', $core_dbas);
|
|
74
|
|
75 my %species_lookup =
|
|
76 map { $_ => 1 }
|
|
77 map { Bio::EnsEMBL::Registry->get_alias($_) }
|
|
78 @{$self->param('species')};
|
|
79 $self->param('species_lookup', \%species_lookup);
|
|
80
|
|
81 return;
|
|
82 }
|
|
83
|
|
84 sub run {
|
|
85 my ($self) = @_;
|
|
86 my @dna;
|
|
87 my @genes;
|
|
88 my @species;
|
|
89 foreach my $dba (@{$self->param('dbas')}) {
|
|
90 if(!$self->process_dba($dba)) {
|
|
91 $self->fine('Skipping %s', $dba->species());
|
|
92 next;
|
|
93 }
|
|
94 my $input_id = $self->input_id($dba);
|
|
95 push(@species, [ $input_id, 2 ]);
|
|
96 }
|
|
97 $self->param('species', \@species);
|
|
98 return;
|
|
99 }
|
|
100
|
|
101 sub write_output {
|
|
102 my ($self) = @_;
|
|
103 $self->do_flow('species');
|
|
104 return;
|
|
105 }
|
|
106
|
|
107 sub get_DBAdaptors {
|
|
108 my ($self) = @_;
|
|
109 return Bio::EnsEMBL::Registry->get_all_DBAdaptors(-GROUP => 'core');
|
|
110 }
|
|
111
|
|
112 sub do_flow {
|
|
113 my ($self, $key) = @_;
|
|
114 my $targets = $self->param($key);
|
|
115 foreach my $entry (@{$targets}) {
|
|
116 my ($input_id, $flow) = @{$entry};
|
|
117 $self->fine('Flowing %s to %d for %s', $input_id->{species}, $flow, $key);
|
|
118 $self->dataflow_output_id($input_id, $flow);
|
|
119 }
|
|
120 return;
|
|
121 }
|
|
122
|
|
123 sub process_dba {
|
|
124 my ($self, $dba) = @_;
|
|
125
|
|
126 #Reject if DB was ancestral sequences
|
|
127 return 0 if $dba->species() =~ /ancestral/i;
|
|
128
|
|
129 #If species is defined then make sure we only allow those species through
|
|
130 if(@{$self->param('species')}) {
|
|
131 my $lookup = $self->param('species_lookup');
|
|
132 my $name = $dba->species();
|
|
133 my $aliases = Bio::EnsEMBL::Registry->get_all_aliases($name);
|
|
134 push(@{$aliases}, $name);
|
|
135 my $found = 0;
|
|
136 foreach my $alias (@{$aliases}) {
|
|
137 if($lookup->{$alias}) {
|
|
138 $found = 1;
|
|
139 last;
|
|
140 }
|
|
141 }
|
|
142 return $found;
|
|
143 }
|
|
144
|
|
145 #Otherwise just accept
|
|
146 return 1;
|
|
147 }
|
|
148
|
|
149 sub input_id {
|
|
150 my ($self, $dba, $type) = @_;
|
|
151 my $mc = $dba->get_MetaContainer();
|
|
152 my $input_id = {
|
|
153 db_types => $self->db_types($dba),
|
|
154 species => $mc->get_production_name(),
|
|
155 };
|
|
156 return $input_id;
|
|
157 }
|
|
158
|
|
159 sub db_types {
|
|
160 my ($self, $dba) = @_;
|
|
161 return $self->param('db_types');
|
|
162 }
|
|
163
|
|
164 1;
|