0
|
1 =pod
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 Bio::EnsEMBL::Funcgen::RunnableDB::SetupMotifInference
|
|
6
|
|
7 =head1 DESCRIPTION
|
|
8
|
|
9 'SetupMotifInference'
|
|
10
|
|
11 =cut
|
|
12
|
|
13
|
|
14 package Bio::EnsEMBL::Funcgen::RunnableDB::SetupMotifInference;
|
|
15
|
|
16 use warnings;
|
|
17 use strict;
|
|
18
|
|
19 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
|
|
20 use Data::Dumper;
|
|
21 use POSIX qw(floor);
|
|
22
|
|
23
|
|
24 use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Motif');
|
|
25
|
|
26
|
|
27
|
|
28 sub fetch_input { # nothing to fetch... just the parameters...
|
|
29 my $self = shift @_;
|
|
30
|
|
31 $self->SUPER::fetch_input();
|
|
32
|
|
33 if(! -d $self->_output_dir){
|
|
34 system('mkdir -p '. $self->_output_dir) && throw "Error creating output dir ". $self->_output_dir;
|
|
35 }
|
|
36
|
|
37 return 1;
|
|
38 }
|
|
39
|
|
40 sub run { # Create Subtasks of binsize peaks each, ignoring the last set of peaks ( < binsize peaks )
|
|
41 my $self = shift @_;
|
|
42
|
|
43 my $afa = $self->_efgdba()->get_AnnotatedFeatureAdaptor();
|
|
44 my @features = @{$afa->fetch_all_by_FeatureSets( [ $self->_feature_set ] )};
|
|
45 my $bins = POSIX::floor(scalar(@features)/$self->param('bin_size'));
|
|
46 if($bins < 1){
|
|
47 warn "Insuficient peaks. Please select a smaller bin size.";
|
|
48 }
|
|
49 warn "Number of bins is $bins";
|
|
50
|
|
51 #Create jobs
|
|
52 my @bin_input_ids;
|
|
53 for (my $i=1;$i<=$bins;$i++){
|
|
54 #Need to add the specific file to the input_id...
|
|
55 my $new_input_id = eval($self->input_id);
|
|
56 $new_input_id->{"bin"} = $i;
|
|
57 push(@bin_input_ids, $new_input_id);
|
|
58 }
|
|
59 $self->_output_ids(\@bin_input_ids);
|
|
60
|
|
61 return 1;
|
|
62 }
|
|
63
|
|
64
|
|
65 sub write_output { # Nothing is written at this stage (for the moment)
|
|
66 my $self = shift @_;
|
|
67
|
|
68 if($self->_output_ids && scalar($self->_output_ids)>0){
|
|
69 my ($converge_job_id) = @{ $self->dataflow_output_id($self->input_id, 3, { -semaphore_count => scalar(@{$self->_output_ids}) }) };
|
|
70 $self->dataflow_output_id($self->_output_ids, 2, { -semaphored_job_id => $converge_job_id });
|
|
71 }
|
|
72 return 1;
|
|
73
|
|
74 }
|
|
75
|
|
76 #Private getter / setter to the output ids
|
|
77 sub _output_ids {
|
|
78 return $_[0]->_getter_setter('output_ids',$_[1]);
|
|
79 }
|
|
80
|
|
81 1;
|