Mercurial > repos > mahtabm > ensembl

diff variant_effect_predictor/Bio/EnsEMBL/Pipeline/SpeciesFactory.pm @ 0:1f6dce3d34e0
Uploaded
author: mahtabm
date: Thu, 11 Apr 2013 02:01:53 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Pipeline/SpeciesFactory.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,164 @@
+=pod
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=head1 NAME
+
+Bio::EnsEMBL::Pipeline::SpeciesFactory
+
+=head1 DESCRIPTION
+
+A module which generates dump jobs for each species it finds in the Ensembl
+Registry. The species we run the code on can be controlled by specifying
+the I<species> parameter or by reducing the number of DBAdaptors loaded into
+the registry. 
+
+Allowed parameters are:
+
+=over 8
+
+=item species - Can be an array of species to perform dumps for or a single
+                species name. If specified only jobs will be created for
+                those species. Defaults to nothing so all species are processed
+
+item db_types - Specify the types of database to dump. Defaults to core and
+                should be an array.
+
+=back
+
+The code flows once per species to branch 2.
+
+=cut
+
+package Bio::EnsEMBL::Pipeline::SpeciesFactory;
+
+use strict;
+use warnings;
+
+use base qw/Bio::EnsEMBL::Pipeline::Base/;
+
+use Bio::EnsEMBL::Registry;
+
+sub param_defaults {
+  my ($self) = @_;
+  return {
+    db_types => [qw/core/],
+    species => []
+  };
+}
+
+sub fetch_input {
+  my ($self) = @_;
+  
+  $self->reset_empty_array_param('db_types');
+  
+  my $core_dbas = $self->get_DBAdaptors();
+  $self->info('Found %d core DBAdaptor(s) to process', scalar(@{$core_dbas}));
+  $self->param('dbas', $core_dbas);
+  
+  my %species_lookup = 
+    map { $_ => 1 } 
+    map { Bio::EnsEMBL::Registry->get_alias($_)  } 
+    @{$self->param('species')};
+  $self->param('species_lookup', \%species_lookup);
+  
+  return;
+}
+  
+sub run {
+  my ($self) = @_;
+  my @dna;
+  my @genes;
+  my @species;
+  foreach my $dba (@{$self->param('dbas')}) {
+    if(!$self->process_dba($dba)) {
+      $self->fine('Skipping %s', $dba->species());
+      next;
+    }
+    my $input_id = $self->input_id($dba);
+    push(@species, [ $input_id, 2 ]);
+  }
+  $self->param('species', \@species);
+  return;
+}
+
+sub write_output {
+  my ($self) = @_;
+  $self->do_flow('species');
+  return;
+}
+
+sub get_DBAdaptors {
+  my ($self) = @_;
+  return Bio::EnsEMBL::Registry->get_all_DBAdaptors(-GROUP => 'core');
+}
+
+sub do_flow {
+  my ($self, $key) = @_;
+  my $targets = $self->param($key);
+  foreach my $entry (@{$targets}) {
+    my ($input_id, $flow) = @{$entry};
+    $self->fine('Flowing %s to %d for %s', $input_id->{species}, $flow, $key);
+    $self->dataflow_output_id($input_id, $flow);
+  }
+  return;
+}
+
+sub process_dba {
+  my ($self, $dba) = @_;
+  
+  #Reject if DB was ancestral sequences
+  return 0 if $dba->species() =~ /ancestral/i;
+  
+  #If species is defined then make sure we only allow those species through
+  if(@{$self->param('species')}) {
+    my $lookup = $self->param('species_lookup');
+    my $name = $dba->species();
+    my $aliases = Bio::EnsEMBL::Registry->get_all_aliases($name);
+    push(@{$aliases}, $name);
+    my $found = 0;
+    foreach my $alias (@{$aliases}) {
+      if($lookup->{$alias}) {
+        $found = 1;
+        last;
+      }
+    }
+    return $found;
+  }
+  
+  #Otherwise just accept
+  return 1;
+}
+
+sub input_id {
+  my ($self, $dba, $type) = @_;
+  my $mc = $dba->get_MetaContainer();
+  my $input_id = {
+    db_types => $self->db_types($dba),
+    species => $mc->get_production_name(),
+  };
+  return $input_id;
+}
+
+sub db_types {
+  my ($self, $dba) = @_;
+  return $self->param('db_types');
+}
+
+1;
author	mahtabm
date	Thu, 11 Apr 2013 02:01:53 -0400
parents
children