comparison variant_effect_predictor/Bio/EnsEMBL/Pipeline/SpeciesFactory.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =pod
2
3 =head1 LICENSE
4
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
6 Genome Research Limited. All rights reserved.
7
8 This software is distributed under a modified Apache license.
9 For license details, please see
10
11 http://www.ensembl.org/info/about/code_licence.html
12
13 =head1 CONTACT
14
15 Please email comments or questions to the public Ensembl
16 developers list at <dev@ensembl.org>.
17
18 Questions may also be sent to the Ensembl help desk at
19 <helpdesk@ensembl.org>.
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Pipeline::SpeciesFactory
24
25 =head1 DESCRIPTION
26
27 A module which generates dump jobs for each species it finds in the Ensembl
28 Registry. The species we run the code on can be controlled by specifying
29 the I<species> parameter or by reducing the number of DBAdaptors loaded into
30 the registry.
31
32 Allowed parameters are:
33
34 =over 8
35
36 =item species - Can be an array of species to perform dumps for or a single
37 species name. If specified only jobs will be created for
38 those species. Defaults to nothing so all species are processed
39
40 item db_types - Specify the types of database to dump. Defaults to core and
41 should be an array.
42
43 =back
44
45 The code flows once per species to branch 2.
46
47 =cut
48
49 package Bio::EnsEMBL::Pipeline::SpeciesFactory;
50
51 use strict;
52 use warnings;
53
54 use base qw/Bio::EnsEMBL::Pipeline::Base/;
55
56 use Bio::EnsEMBL::Registry;
57
58 sub param_defaults {
59 my ($self) = @_;
60 return {
61 db_types => [qw/core/],
62 species => []
63 };
64 }
65
66 sub fetch_input {
67 my ($self) = @_;
68
69 $self->reset_empty_array_param('db_types');
70
71 my $core_dbas = $self->get_DBAdaptors();
72 $self->info('Found %d core DBAdaptor(s) to process', scalar(@{$core_dbas}));
73 $self->param('dbas', $core_dbas);
74
75 my %species_lookup =
76 map { $_ => 1 }
77 map { Bio::EnsEMBL::Registry->get_alias($_) }
78 @{$self->param('species')};
79 $self->param('species_lookup', \%species_lookup);
80
81 return;
82 }
83
84 sub run {
85 my ($self) = @_;
86 my @dna;
87 my @genes;
88 my @species;
89 foreach my $dba (@{$self->param('dbas')}) {
90 if(!$self->process_dba($dba)) {
91 $self->fine('Skipping %s', $dba->species());
92 next;
93 }
94 my $input_id = $self->input_id($dba);
95 push(@species, [ $input_id, 2 ]);
96 }
97 $self->param('species', \@species);
98 return;
99 }
100
101 sub write_output {
102 my ($self) = @_;
103 $self->do_flow('species');
104 return;
105 }
106
107 sub get_DBAdaptors {
108 my ($self) = @_;
109 return Bio::EnsEMBL::Registry->get_all_DBAdaptors(-GROUP => 'core');
110 }
111
112 sub do_flow {
113 my ($self, $key) = @_;
114 my $targets = $self->param($key);
115 foreach my $entry (@{$targets}) {
116 my ($input_id, $flow) = @{$entry};
117 $self->fine('Flowing %s to %d for %s', $input_id->{species}, $flow, $key);
118 $self->dataflow_output_id($input_id, $flow);
119 }
120 return;
121 }
122
123 sub process_dba {
124 my ($self, $dba) = @_;
125
126 #Reject if DB was ancestral sequences
127 return 0 if $dba->species() =~ /ancestral/i;
128
129 #If species is defined then make sure we only allow those species through
130 if(@{$self->param('species')}) {
131 my $lookup = $self->param('species_lookup');
132 my $name = $dba->species();
133 my $aliases = Bio::EnsEMBL::Registry->get_all_aliases($name);
134 push(@{$aliases}, $name);
135 my $found = 0;
136 foreach my $alias (@{$aliases}) {
137 if($lookup->{$alias}) {
138 $found = 1;
139 last;
140 }
141 }
142 return $found;
143 }
144
145 #Otherwise just accept
146 return 1;
147 }
148
149 sub input_id {
150 my ($self, $dba, $type) = @_;
151 my $mc = $dba->get_MetaContainer();
152 my $input_id = {
153 db_types => $self->db_types($dba),
154 species => $mc->get_production_name(),
155 };
156 return $input_id;
157 }
158
159 sub db_types {
160 my ($self, $dba) = @_;
161 return $self->param('db_types');
162 }
163
164 1;