Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/SetupMotifInference.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/SetupMotifInference.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,81 @@ +=pod + +=head1 NAME + +Bio::EnsEMBL::Funcgen::RunnableDB::SetupMotifInference + +=head1 DESCRIPTION + +'SetupMotifInference' + +=cut + + +package Bio::EnsEMBL::Funcgen::RunnableDB::SetupMotifInference; + +use warnings; +use strict; + +use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump); +use Data::Dumper; +use POSIX qw(floor); + + +use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Motif'); + + + +sub fetch_input { # nothing to fetch... just the parameters... + my $self = shift @_; + + $self->SUPER::fetch_input(); + + if(! -d $self->_output_dir){ + system('mkdir -p '. $self->_output_dir) && throw "Error creating output dir ". $self->_output_dir; + } + + return 1; +} + +sub run { # Create Subtasks of binsize peaks each, ignoring the last set of peaks ( < binsize peaks ) + my $self = shift @_; + + my $afa = $self->_efgdba()->get_AnnotatedFeatureAdaptor(); + my @features = @{$afa->fetch_all_by_FeatureSets( [ $self->_feature_set ] )}; + my $bins = POSIX::floor(scalar(@features)/$self->param('bin_size')); + if($bins < 1){ + warn "Insuficient peaks. Please select a smaller bin size."; + } + warn "Number of bins is $bins"; + + #Create jobs + my @bin_input_ids; + for (my $i=1;$i<=$bins;$i++){ + #Need to add the specific file to the input_id... + my $new_input_id = eval($self->input_id); + $new_input_id->{"bin"} = $i; + push(@bin_input_ids, $new_input_id); + } + $self->_output_ids(\@bin_input_ids); + + return 1; +} + + +sub write_output { # Nothing is written at this stage (for the moment) + my $self = shift @_; + + if($self->_output_ids && scalar($self->_output_ids)>0){ + my ($converge_job_id) = @{ $self->dataflow_output_id($self->input_id, 3, { -semaphore_count => scalar(@{$self->_output_ids}) }) }; + $self->dataflow_output_id($self->_output_ids, 2, { -semaphored_job_id => $converge_job_id }); + } + return 1; + +} + +#Private getter / setter to the output ids +sub _output_ids { + return $_[0]->_getter_setter('output_ids',$_[1]); +} + +1;