diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/SetupAnnotationPipeline.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/SetupAnnotationPipeline.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,75 @@
+=pod 
+
+=head1 NAME
+
+Bio::EnsEMBL::Hive::RunnableDB::Funcgen::SetupAnnotationPipeline
+
+=head1 DESCRIPTION
+
+'SetupAnnotationPipeline' Checks cell types and creates annotation processes for each
+This Runnable CANNOT be run multiple times in parallell!
+
+=cut
+
+package Bio::EnsEMBL::Funcgen::RunnableDB::SetupAnnotationPipeline;
+
+use warnings;
+use strict;
+use Bio::EnsEMBL::DBSQL::DBAdaptor;
+use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor; 
+use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
+
+use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Annotation');
+
+sub fetch_input {   # fetch parameters...
+  my $self = shift @_;
+  
+  $self->SUPER::fetch_input();
+
+  #Sets up the output dir 
+  my $output_dir = $self->_output_dir();
+  if(! -d $output_dir){ 
+    system("mkdir -p $output_dir") && throw("Couldn't create output directory $output_dir");
+  }
+
+  return 1;
+}
+
+sub run {   # Check parameters and do appropriate database/file operations... 
+  my $self = shift @_;
+
+  my $efgdba = $self->_efgdba();
+  #Check how many different cell types exist with regulatory features for current species
+  #Creates the appropriate jobs
+  my @reg_sets = @{$efgdba->get_FeatureSetAdaptor->fetch_all_by_type('regulatory')};
+  my @cell_types;
+  foreach my $set (@reg_sets){
+    if($set->cell_type->name ne 'MultiCell'){
+      push @cell_types, $set->cell_type->name;
+    }
+  }
+  $self->_cell_types_to_run(\@cell_types);
+
+  return 1;
+}
+
+
+sub write_output {  # Create the relevant job
+  my $self = shift @_;
+
+  foreach my $cell_type (@{$self->_cell_types_to_run()}){
+    my $new_input_id = eval($self->input_id);
+    $new_input_id->{"cell_type"} = $cell_type;
+    $self->dataflow_output_id($new_input_id, 2, { } );
+  }
+
+  return 1;
+
+}
+
+#Private getter / setter to the cell_types_to_run list
+sub _cell_types_to_run {
+  return $_[0]->_getter_setter('cell_types_to_run',$_[1]);
+}
+
+1;