0
|
1 =pod
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 Bio::EnsEMBL::Funcgen::RunnableDB::Funcgen
|
|
6
|
|
7 =head1 DESCRIPTION
|
|
8
|
|
9 'Funcgen' is a base class for other runnables of the Funcgen Hive Pipeline
|
|
10 It performs common tasks such as connecting to the EFG DB etc...
|
|
11
|
|
12 =cut
|
|
13
|
|
14 package Bio::EnsEMBL::Funcgen::RunnableDB::Funcgen;
|
|
15
|
|
16 use warnings;
|
|
17 use strict;
|
|
18
|
|
19 use Bio::EnsEMBL::Funcgen::Utils::Helper;
|
|
20 use Bio::EnsEMBL::DBSQL::DBAdaptor;
|
|
21 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor;
|
|
22 use Bio::EnsEMBL::Funcgen::InputSet;
|
|
23 use Bio::EnsEMBL::Funcgen::DataSet;
|
|
24 use Bio::EnsEMBL::Funcgen::FeatureSet;
|
|
25 use Bio::EnsEMBL::Funcgen::AnnotatedFeature;
|
|
26 #use Bio::EnsEMBL::Hive::DBSQL::AnalysisDataAdaptor;
|
|
27 use base ('Bio::EnsEMBL::Hive::Process');
|
|
28
|
|
29 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
|
|
30 use Data::Dumper;
|
|
31
|
|
32 #This defines a set of parameters based on given parameters from the pipeline:
|
|
33 sub fetch_input { # nothing to fetch... just the DB parameters...
|
|
34 my $self = shift @_;
|
|
35
|
|
36 #An example of debug, in case needed
|
|
37 #print Dumper $self->param('dnadb');
|
|
38 if(!$self->param('bin_dir')){ throw "Folder with funcgen binaries bin_dir required"; }
|
|
39 $self->_bin_dir($self->param('bin_dir'));
|
|
40
|
|
41 my $dnadb_params = $self->param('dnadb') || throw "No parameters for Core DB";
|
|
42 my $efgdb_params = $self->param('efgdb') || throw "No parameters for EFG DB";
|
|
43
|
|
44 #Get efg connection, otherwise fail..
|
|
45 eval{
|
|
46 $self->_efgdba(Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new
|
|
47 (
|
|
48 %{ $efgdb_params },
|
|
49 #let efg dba hanle dnadb
|
|
50 {
|
|
51 -dnadb_name => $dnadb_params->{-dbname},
|
|
52 -dnadb_user => $dnadb_params->{-user},
|
|
53 -dnadb_host => $dnadb_params->{-host},
|
|
54 -dnadb_port => $dnadb_params->{-port},
|
|
55 -dnadb_pass => $dnadb_params->{-pass},
|
|
56
|
|
57 }
|
|
58 ));
|
|
59
|
|
60 #Actually test connections
|
|
61 $self->_efgdba->dbc->db_handle;
|
|
62 $self->_efgdba->dnadb->dbc->db_handle;
|
|
63 };
|
|
64
|
|
65 if($@) { throw "Error creating the EFG DBAdaptor and/or dna DBAdaptor $@"; }
|
|
66
|
|
67 #Set some params
|
|
68 my $cell_type = $self->param('cell_type') || throw "No cell_type given";
|
|
69 my $feature_type = $self->param('feature_type') || throw "No feature_type given";
|
|
70 my $experiment_name = $self->param('experiment_name') || throw "No experiment_name given";
|
|
71 $self->_experiment_name($experiment_name);
|
|
72 my $set_name = $self->param('set_name') || $cell_type."_".$feature_type."_".$experiment_name;
|
|
73 $self->_set_name($set_name);
|
|
74 my $group_name = $self->param('group') || 'efg';
|
|
75 my $species = $self->param('species') || throw "No species defined";
|
|
76 $self->_species($species);
|
|
77 my $assembly = $self->param('assembly') || throw "No assembly version given";
|
|
78 $self->_assembly($assembly);
|
|
79 my $file_type = $self->param('file_type') || throw "No file type given";
|
|
80 $self->_file_type($file_type);
|
|
81 my $work_dir = $self->param('work_dir') || throw "'work_dir' is a required parameter";
|
|
82 $self->_work_dir($work_dir);
|
|
83
|
|
84
|
|
85
|
|
86 #Configure DBAdaptors
|
|
87 my $efgdba = $self->_efgdba();
|
|
88 #To avoid farm issues...
|
|
89 $efgdba->dbc->disconnect_when_inactive(1);
|
|
90 $efgdba->dnadb->dbc->disconnect_when_inactive(1);
|
|
91
|
|
92 #Fetch & Set object params
|
|
93 #CellType
|
|
94 my $cta = $efgdba->get_CellTypeAdaptor();
|
|
95 my $ct_obj = $cta->fetch_by_name($cell_type);
|
|
96 if(!$ct_obj){ throw "Cell type $cell_type does not exist in the database"; }
|
|
97 $self->_cell_type($ct_obj);
|
|
98
|
|
99 #FeatureType
|
|
100 my $fta = $efgdba->get_FeatureTypeAdaptor();
|
|
101 my $ft_obj = $fta->fetch_by_name($feature_type);
|
|
102 if(!$ft_obj){ throw "Feature type $feature_type does not exist in the database"; }
|
|
103 $self->_feature_type($ft_obj);
|
|
104
|
|
105 #ExperimentalGroup
|
|
106 my $ega = $efgdba->get_ExperimentalGroupAdaptor();
|
|
107 my $eg_obj = $ega->fetch_by_name($group_name);
|
|
108 if(!$eg_obj){ throw "Experimental Group $group_name does not exist in the database"; }
|
|
109 $self->_group($eg_obj);
|
|
110
|
|
111
|
|
112 if($file_type eq 'sam'){
|
|
113 #Change the directory structure so it will agree with the rest, without the need to do uc()
|
|
114 my $sam_header = $self->_work_dir()."/sam_header/".$species."/".$species."_";
|
|
115 $sam_header .= $ct_obj->gender() ? $ct_obj->gender() : 'male';
|
|
116 #Carefull with naming standards...
|
|
117 #$sam_header .= "_".$assembly."_unmasked.fa.fai";
|
|
118 $sam_header .= "_".$assembly."_unmasked.fasta.fai";
|
|
119 $self->_sam_header($sam_header);
|
|
120 }
|
|
121
|
|
122 #Work with conventions here too?? work_dir/output/dbname ??
|
|
123 my $output_dir = $self->param('output_dir') || throw "'output_dir' is a required parameter";
|
|
124 $self->_output_dir($output_dir."/".$experiment_name);
|
|
125
|
|
126 return 1;
|
|
127 }
|
|
128
|
|
129
|
|
130 sub run {
|
|
131 my $self = shift @_;
|
|
132
|
|
133 return 1;
|
|
134 }
|
|
135
|
|
136
|
|
137 sub write_output {
|
|
138 my $self = shift @_;
|
|
139
|
|
140 return 1;
|
|
141
|
|
142 }
|
|
143
|
|
144
|
|
145 #Private Function to check and create Experiment and Feature/Data sets as needed
|
|
146 #Requires some global parameters that are not set in Funcgen->fetch_input, such as
|
|
147 #'analysis', 'feature_set_name', 'data_set_name' (these could be given as local parameters...)
|
|
148 sub _check_Experiment {
|
|
149
|
|
150 #Todo make it more generic and accept multiple input_subsets
|
|
151 #Also maybe pass parameters as hash list...
|
|
152 my ($self, $analysis, $input_subset, $fset_name) = @_;
|
|
153
|
|
154 #Global parameters set in Funcgen->fetch_input
|
|
155 my $efgdba = $self->_efgdba();
|
|
156 my $set_name = $self->_set_name();
|
|
157 my $group = $self->_group();
|
|
158 my $cell_type = $self->_cell_type();
|
|
159 my $feature_type = $self->_feature_type();
|
|
160
|
|
161 my $iset_name = $set_name;
|
|
162 my $dset_name = $fset_name;
|
|
163
|
|
164 # set experiment: Reuse if already exists? (This comes from result sets)
|
|
165 my $ea = $efgdba->get_ExperimentAdaptor;
|
|
166 my $exp = $ea->fetch_by_name($set_name);
|
|
167
|
|
168 my @date = (localtime)[5,4,3];
|
|
169 $date[0] += 1900; $date[1]++;
|
|
170
|
|
171 if (! defined $exp) {
|
|
172
|
|
173 #Group needs to be set manually, like Cell_Type and Feature_Type
|
|
174 #Do not create Group on the fly here, as it will cause concurrency issues...
|
|
175 $exp = Bio::EnsEMBL::Funcgen::Experiment->new
|
|
176 (
|
|
177 -NAME => $set_name,
|
|
178 -EXPERIMENTAL_GROUP => $group,
|
|
179 -DATE => join('-', @date),
|
|
180 -PRIMARY_DESIGN_TYPE => 'binding_site_identification',
|
|
181 -ADAPTOR => $ea,
|
|
182 );
|
|
183
|
|
184 ($exp) = @{$ea->store($exp)};
|
|
185
|
|
186 }
|
|
187 throw("Can't create experiment $set_name ") unless $exp;
|
|
188
|
|
189 my $isa = $efgdba->get_InputSetAdaptor();
|
|
190 my $iset = $isa->fetch_by_name($iset_name);
|
|
191
|
|
192 if (! defined $iset){
|
|
193
|
|
194 $iset = Bio::EnsEMBL::Funcgen::InputSet->new
|
|
195 (
|
|
196 -name => $iset_name,
|
|
197 -experiment => $exp,
|
|
198 -feature_type => $feature_type,
|
|
199 -cell_type => $cell_type,
|
|
200 -vendor => 'SOLEXA',
|
|
201 -format => 'SEQUENCING',
|
|
202 -feature_class => 'result'
|
|
203 # Analysis is not being used??
|
|
204 #-analysis => $self->feature_analysis,
|
|
205 );
|
|
206 warn "Storing new InputSet:\t$iset_name\n";
|
|
207 ($iset) = @{$isa->store($iset)};
|
|
208
|
|
209 $iset->add_new_subset($input_subset);
|
|
210 $iset->adaptor->store_InputSubsets($iset->get_InputSubsets);
|
|
211 } else {
|
|
212
|
|
213 #We only expect one subset here (? why??)...
|
|
214 #shouldn't we be adding the control file also when used?? But this is SWEmbl-specific...
|
|
215 #And it should be the same file name...
|
|
216 #Maybe do some file checking here???
|
|
217 warn "InputSet already exists:\t$iset_name\n";
|
|
218 my @issets = @{$iset->get_InputSubsets};
|
|
219
|
|
220 #if(scalar(@issets) > 1){
|
|
221 # throw("InputSet $iset_name has more than one InputSubset:\t".join("\t", (map $_->name, @issets)));
|
|
222 #} elsif((scalar(@issets) == 1) && ($issets[0]->name ne $self->param('input_file'))){
|
|
223 # throw("InputSet $iset_name already has an InputSubset(".$issets[0]->name.") which does not match ".$self->param('input_file'));
|
|
224 #} elsif(scalar(@issets) == 0){ #we can just add this InputSubset
|
|
225 # $iset->add_new_subset($self->input_id);
|
|
226 # $iset->adaptor->store_InputSubsets($iset->get_InputSubsets);
|
|
227 #}
|
|
228
|
|
229 if(scalar(@issets)==0){
|
|
230 #we can just add this InputSubset. Add an extra 'input:' as prefix?
|
|
231 $iset->add_new_subset($input_subset);
|
|
232 $iset->adaptor->store_InputSubsets($iset->get_InputSubsets);
|
|
233 } else {
|
|
234 #warn("Need to uncomment this section!! - it was commented just for testing purposes!!");
|
|
235 #we just need to check if our file(s) is(are) already here...
|
|
236 if(!$iset->get_subset_by_name($input_subset)){
|
|
237 #throw("InputSet $iset_name has InputSubsets(".join("\t", (map $_->name, @issets)).") which do not match ".$input_subset);
|
|
238 #warn("InputSet $iset_name has InputSubsets(".join("\t", (map $_->name, @issets)).") which do not match ".$input_subset);
|
|
239 }
|
|
240 }
|
|
241 }
|
|
242
|
|
243 my $fsa = $efgdba->get_FeatureSetAdaptor();
|
|
244 my $fset = $fsa->fetch_by_name($fset_name);
|
|
245
|
|
246 if ( ! defined $fset ) {
|
|
247
|
|
248 $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new
|
|
249 (
|
|
250 -analysis => $analysis,
|
|
251 -feature_type => $feature_type,
|
|
252 -cell_type => $cell_type,
|
|
253 -name => $fset_name,
|
|
254 -feature_class => 'annotated',
|
|
255 -experiment_id => $exp->dbID,
|
|
256 #The adaptor is needed to store!
|
|
257 -adaptor => $fsa
|
|
258
|
|
259 );
|
|
260
|
|
261 warn "Storing new FeatureSet:\t$fset_name\n";
|
|
262 ($fset) = @{$fsa->store($fset)};
|
|
263
|
|
264 }
|
|
265 else {
|
|
266 warn "FeatureSet already exists:\t$fset_name\n";
|
|
267
|
|
268 if(@{$efgdba->get_AnnotatedFeatureAdaptor->fetch_all_by_FeatureSets([$fset])}){
|
|
269 throw "Feature Set $set_name already contains data. Please rollback before rerunning";
|
|
270 }
|
|
271
|
|
272 }
|
|
273
|
|
274 my $dsa = $efgdba->get_DataSetAdaptor;
|
|
275 my $dset = $dsa->fetch_by_name($dset_name);
|
|
276
|
|
277
|
|
278 if ( ! defined $dset ) {
|
|
279
|
|
280 $dset = Bio::EnsEMBL::Funcgen::DataSet->new
|
|
281 (
|
|
282 -SUPPORTING_SETS => [$iset],
|
|
283 -FEATURE_SET => $fset,
|
|
284 -DISPLAYABLE => 1,
|
|
285 -NAME => $dset_name,
|
|
286 -SUPPORTING_SET_TYPE => 'input',
|
|
287 );
|
|
288
|
|
289 warn "Storing new DataSet:\t$dset_name\n";
|
|
290 ($dset) = @{$dsa->store($dset)}
|
|
291 }
|
|
292 else {
|
|
293
|
|
294 warn "DataSet already exists:\t$dset_name\n";
|
|
295
|
|
296 # need to check whether InputSets and supporting_sets are the same and
|
|
297 # possibly add InputSet to supporting_sets
|
|
298
|
|
299 my $ssets = $dset->get_supporting_sets();
|
|
300
|
|
301 my %ssets_dbIDs = ();
|
|
302 map { $ssets_dbIDs{$_->dbID}='' } (@{$ssets});
|
|
303 $dset->add_supporting_sets([ $iset ]) if (! exists $ssets_dbIDs{$iset->dbID});
|
|
304
|
|
305 }
|
|
306
|
|
307 }
|
|
308
|
|
309
|
|
310 #Private Generic getter and setter
|
|
311 sub _getter_setter {
|
|
312 my ($self, $param_name, $param_value) = @_;
|
|
313 if(!$param_name){ return undef; }
|
|
314 if(!$param_value){
|
|
315 $param_value = $self->param($param_name);
|
|
316 } else {
|
|
317 $self->param($param_name, $param_value);
|
|
318 }
|
|
319 return $param_value;
|
|
320 }
|
|
321
|
|
322 # Private getter / setters : Maybe do some validation in some cases...
|
|
323
|
|
324 #Private getter / setter to the EFG DB Adaptor
|
|
325 sub _efgdba {
|
|
326 return $_[0]->_getter_setter('efgdb',$_[1]);
|
|
327 }
|
|
328
|
|
329 #Private getter / setter to the Core DB Adaptor
|
|
330 sub _dnadba {
|
|
331 return $_[0]->_getter_setter('dnadb',$_[1]);
|
|
332 }
|
|
333
|
|
334 #Private getter / setter to the Cell Type object
|
|
335 sub _cell_type {
|
|
336 return $_[0]->_getter_setter('cell_type',$_[1]);
|
|
337 }
|
|
338
|
|
339 #Private getter / setter to the Feature Type object
|
|
340 sub _feature_type {
|
|
341 return $_[0]->_getter_setter('feature_type',$_[1]);
|
|
342 }
|
|
343
|
|
344 #Private getter / setter to the Species name
|
|
345 sub _species {
|
|
346 return $_[0]->_getter_setter('species',$_[1]);
|
|
347 }
|
|
348
|
|
349 #Private getter / setter to the assembly name
|
|
350 sub _assembly {
|
|
351 return $_[0]->_getter_setter('assembly',$_[1]);
|
|
352 }
|
|
353
|
|
354 #Private getter / setter to the Analysis object
|
|
355 sub _analysis {
|
|
356 return $_[0]->_getter_setter('analysis',$_[1]);
|
|
357 }
|
|
358
|
|
359 #Private getter / setter to the Group
|
|
360 sub _group {
|
|
361 return $_[0]->_getter_setter('group',$_[1]);
|
|
362 }
|
|
363
|
|
364 #Private getter / setter to the Experiment Name (do not mix with the Set Name)
|
|
365 sub _experiment_name {
|
|
366 return $_[0]->_getter_setter('experiment_name',$_[1]);
|
|
367 }
|
|
368
|
|
369 #Private getter / setter to the Set Name
|
|
370 sub _set_name {
|
|
371 return $_[0]->_getter_setter('set_name',$_[1]);
|
|
372 }
|
|
373
|
|
374 #Private getter / setter to the file type
|
|
375 sub _file_type {
|
|
376 return $_[0]->_getter_setter('file_type',$_[1]);
|
|
377 }
|
|
378
|
|
379 #Private getter / setter to the sam header (only set when file type is sam)
|
|
380 sub _sam_header {
|
|
381 return $_[0]->_getter_setter('sam_header',$_[1]);
|
|
382 }
|
|
383
|
|
384 #Private getter / setter to the work folder
|
|
385 sub _work_dir {
|
|
386 return $_[0]->_getter_setter('work_dir',$_[1]);
|
|
387 }
|
|
388
|
|
389 #Private getter / setter to the output folder
|
|
390 sub _output_dir {
|
|
391 return $_[0]->_getter_setter('output_dir',$_[1]);
|
|
392 }
|
|
393
|
|
394 #Private getter / setter to the bin folder
|
|
395 sub _bin_dir {
|
|
396 return $_[0]->_getter_setter('bin_dir',$_[1]);
|
|
397 }
|
|
398
|
|
399 1;
|