Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Pipeline/SpeciesFactory.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =pod | |
| 2 | |
| 3 =head1 LICENSE | |
| 4 | |
| 5 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 6 Genome Research Limited. All rights reserved. | |
| 7 | |
| 8 This software is distributed under a modified Apache license. | |
| 9 For license details, please see | |
| 10 | |
| 11 http://www.ensembl.org/info/about/code_licence.html | |
| 12 | |
| 13 =head1 CONTACT | |
| 14 | |
| 15 Please email comments or questions to the public Ensembl | |
| 16 developers list at <dev@ensembl.org>. | |
| 17 | |
| 18 Questions may also be sent to the Ensembl help desk at | |
| 19 <helpdesk@ensembl.org>. | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::Pipeline::SpeciesFactory | |
| 24 | |
| 25 =head1 DESCRIPTION | |
| 26 | |
| 27 A module which generates dump jobs for each species it finds in the Ensembl | |
| 28 Registry. The species we run the code on can be controlled by specifying | |
| 29 the I<species> parameter or by reducing the number of DBAdaptors loaded into | |
| 30 the registry. | |
| 31 | |
| 32 Allowed parameters are: | |
| 33 | |
| 34 =over 8 | |
| 35 | |
| 36 =item species - Can be an array of species to perform dumps for or a single | |
| 37 species name. If specified only jobs will be created for | |
| 38 those species. Defaults to nothing so all species are processed | |
| 39 | |
| 40 item db_types - Specify the types of database to dump. Defaults to core and | |
| 41 should be an array. | |
| 42 | |
| 43 =back | |
| 44 | |
| 45 The code flows once per species to branch 2. | |
| 46 | |
| 47 =cut | |
| 48 | |
| 49 package Bio::EnsEMBL::Pipeline::SpeciesFactory; | |
| 50 | |
| 51 use strict; | |
| 52 use warnings; | |
| 53 | |
| 54 use base qw/Bio::EnsEMBL::Pipeline::Base/; | |
| 55 | |
| 56 use Bio::EnsEMBL::Registry; | |
| 57 | |
| 58 sub param_defaults { | |
| 59 my ($self) = @_; | |
| 60 return { | |
| 61 db_types => [qw/core/], | |
| 62 species => [] | |
| 63 }; | |
| 64 } | |
| 65 | |
| 66 sub fetch_input { | |
| 67 my ($self) = @_; | |
| 68 | |
| 69 $self->reset_empty_array_param('db_types'); | |
| 70 | |
| 71 my $core_dbas = $self->get_DBAdaptors(); | |
| 72 $self->info('Found %d core DBAdaptor(s) to process', scalar(@{$core_dbas})); | |
| 73 $self->param('dbas', $core_dbas); | |
| 74 | |
| 75 my %species_lookup = | |
| 76 map { $_ => 1 } | |
| 77 map { Bio::EnsEMBL::Registry->get_alias($_) } | |
| 78 @{$self->param('species')}; | |
| 79 $self->param('species_lookup', \%species_lookup); | |
| 80 | |
| 81 return; | |
| 82 } | |
| 83 | |
| 84 sub run { | |
| 85 my ($self) = @_; | |
| 86 my @dna; | |
| 87 my @genes; | |
| 88 my @species; | |
| 89 foreach my $dba (@{$self->param('dbas')}) { | |
| 90 if(!$self->process_dba($dba)) { | |
| 91 $self->fine('Skipping %s', $dba->species()); | |
| 92 next; | |
| 93 } | |
| 94 my $input_id = $self->input_id($dba); | |
| 95 push(@species, [ $input_id, 2 ]); | |
| 96 } | |
| 97 $self->param('species', \@species); | |
| 98 return; | |
| 99 } | |
| 100 | |
| 101 sub write_output { | |
| 102 my ($self) = @_; | |
| 103 $self->do_flow('species'); | |
| 104 return; | |
| 105 } | |
| 106 | |
| 107 sub get_DBAdaptors { | |
| 108 my ($self) = @_; | |
| 109 return Bio::EnsEMBL::Registry->get_all_DBAdaptors(-GROUP => 'core'); | |
| 110 } | |
| 111 | |
| 112 sub do_flow { | |
| 113 my ($self, $key) = @_; | |
| 114 my $targets = $self->param($key); | |
| 115 foreach my $entry (@{$targets}) { | |
| 116 my ($input_id, $flow) = @{$entry}; | |
| 117 $self->fine('Flowing %s to %d for %s', $input_id->{species}, $flow, $key); | |
| 118 $self->dataflow_output_id($input_id, $flow); | |
| 119 } | |
| 120 return; | |
| 121 } | |
| 122 | |
| 123 sub process_dba { | |
| 124 my ($self, $dba) = @_; | |
| 125 | |
| 126 #Reject if DB was ancestral sequences | |
| 127 return 0 if $dba->species() =~ /ancestral/i; | |
| 128 | |
| 129 #If species is defined then make sure we only allow those species through | |
| 130 if(@{$self->param('species')}) { | |
| 131 my $lookup = $self->param('species_lookup'); | |
| 132 my $name = $dba->species(); | |
| 133 my $aliases = Bio::EnsEMBL::Registry->get_all_aliases($name); | |
| 134 push(@{$aliases}, $name); | |
| 135 my $found = 0; | |
| 136 foreach my $alias (@{$aliases}) { | |
| 137 if($lookup->{$alias}) { | |
| 138 $found = 1; | |
| 139 last; | |
| 140 } | |
| 141 } | |
| 142 return $found; | |
| 143 } | |
| 144 | |
| 145 #Otherwise just accept | |
| 146 return 1; | |
| 147 } | |
| 148 | |
| 149 sub input_id { | |
| 150 my ($self, $dba, $type) = @_; | |
| 151 my $mc = $dba->get_MetaContainer(); | |
| 152 my $input_id = { | |
| 153 db_types => $self->db_types($dba), | |
| 154 species => $mc->get_production_name(), | |
| 155 }; | |
| 156 return $input_id; | |
| 157 } | |
| 158 | |
| 159 sub db_types { | |
| 160 my ($self, $dba) = @_; | |
| 161 return $self->param('db_types'); | |
| 162 } | |
| 163 | |
| 164 1; |
