comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/Funcgen.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =pod
2
3 =head1 NAME
4
5 Bio::EnsEMBL::Funcgen::RunnableDB::Funcgen
6
7 =head1 DESCRIPTION
8
9 'Funcgen' is a base class for other runnables of the Funcgen Hive Pipeline
10 It performs common tasks such as connecting to the EFG DB etc...
11
12 =cut
13
14 package Bio::EnsEMBL::Funcgen::RunnableDB::Funcgen;
15
16 use warnings;
17 use strict;
18
19 use Bio::EnsEMBL::Funcgen::Utils::Helper;
20 use Bio::EnsEMBL::DBSQL::DBAdaptor;
21 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor;
22 use Bio::EnsEMBL::Funcgen::InputSet;
23 use Bio::EnsEMBL::Funcgen::DataSet;
24 use Bio::EnsEMBL::Funcgen::FeatureSet;
25 use Bio::EnsEMBL::Funcgen::AnnotatedFeature;
26 #use Bio::EnsEMBL::Hive::DBSQL::AnalysisDataAdaptor;
27 use base ('Bio::EnsEMBL::Hive::Process');
28
29 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
30 use Data::Dumper;
31
32 #This defines a set of parameters based on given parameters from the pipeline:
33 sub fetch_input { # nothing to fetch... just the DB parameters...
34 my $self = shift @_;
35
36 #An example of debug, in case needed
37 #print Dumper $self->param('dnadb');
38 if(!$self->param('bin_dir')){ throw "Folder with funcgen binaries bin_dir required"; }
39 $self->_bin_dir($self->param('bin_dir'));
40
41 my $dnadb_params = $self->param('dnadb') || throw "No parameters for Core DB";
42 my $efgdb_params = $self->param('efgdb') || throw "No parameters for EFG DB";
43
44 #Get efg connection, otherwise fail..
45 eval{
46 $self->_efgdba(Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new
47 (
48 %{ $efgdb_params },
49 #let efg dba hanle dnadb
50 {
51 -dnadb_name => $dnadb_params->{-dbname},
52 -dnadb_user => $dnadb_params->{-user},
53 -dnadb_host => $dnadb_params->{-host},
54 -dnadb_port => $dnadb_params->{-port},
55 -dnadb_pass => $dnadb_params->{-pass},
56
57 }
58 ));
59
60 #Actually test connections
61 $self->_efgdba->dbc->db_handle;
62 $self->_efgdba->dnadb->dbc->db_handle;
63 };
64
65 if($@) { throw "Error creating the EFG DBAdaptor and/or dna DBAdaptor $@"; }
66
67 #Set some params
68 my $cell_type = $self->param('cell_type') || throw "No cell_type given";
69 my $feature_type = $self->param('feature_type') || throw "No feature_type given";
70 my $experiment_name = $self->param('experiment_name') || throw "No experiment_name given";
71 $self->_experiment_name($experiment_name);
72 my $set_name = $self->param('set_name') || $cell_type."_".$feature_type."_".$experiment_name;
73 $self->_set_name($set_name);
74 my $group_name = $self->param('group') || 'efg';
75 my $species = $self->param('species') || throw "No species defined";
76 $self->_species($species);
77 my $assembly = $self->param('assembly') || throw "No assembly version given";
78 $self->_assembly($assembly);
79 my $file_type = $self->param('file_type') || throw "No file type given";
80 $self->_file_type($file_type);
81 my $work_dir = $self->param('work_dir') || throw "'work_dir' is a required parameter";
82 $self->_work_dir($work_dir);
83
84
85
86 #Configure DBAdaptors
87 my $efgdba = $self->_efgdba();
88 #To avoid farm issues...
89 $efgdba->dbc->disconnect_when_inactive(1);
90 $efgdba->dnadb->dbc->disconnect_when_inactive(1);
91
92 #Fetch & Set object params
93 #CellType
94 my $cta = $efgdba->get_CellTypeAdaptor();
95 my $ct_obj = $cta->fetch_by_name($cell_type);
96 if(!$ct_obj){ throw "Cell type $cell_type does not exist in the database"; }
97 $self->_cell_type($ct_obj);
98
99 #FeatureType
100 my $fta = $efgdba->get_FeatureTypeAdaptor();
101 my $ft_obj = $fta->fetch_by_name($feature_type);
102 if(!$ft_obj){ throw "Feature type $feature_type does not exist in the database"; }
103 $self->_feature_type($ft_obj);
104
105 #ExperimentalGroup
106 my $ega = $efgdba->get_ExperimentalGroupAdaptor();
107 my $eg_obj = $ega->fetch_by_name($group_name);
108 if(!$eg_obj){ throw "Experimental Group $group_name does not exist in the database"; }
109 $self->_group($eg_obj);
110
111
112 if($file_type eq 'sam'){
113 #Change the directory structure so it will agree with the rest, without the need to do uc()
114 my $sam_header = $self->_work_dir()."/sam_header/".$species."/".$species."_";
115 $sam_header .= $ct_obj->gender() ? $ct_obj->gender() : 'male';
116 #Carefull with naming standards...
117 #$sam_header .= "_".$assembly."_unmasked.fa.fai";
118 $sam_header .= "_".$assembly."_unmasked.fasta.fai";
119 $self->_sam_header($sam_header);
120 }
121
122 #Work with conventions here too?? work_dir/output/dbname ??
123 my $output_dir = $self->param('output_dir') || throw "'output_dir' is a required parameter";
124 $self->_output_dir($output_dir."/".$experiment_name);
125
126 return 1;
127 }
128
129
130 sub run {
131 my $self = shift @_;
132
133 return 1;
134 }
135
136
137 sub write_output {
138 my $self = shift @_;
139
140 return 1;
141
142 }
143
144
145 #Private Function to check and create Experiment and Feature/Data sets as needed
146 #Requires some global parameters that are not set in Funcgen->fetch_input, such as
147 #'analysis', 'feature_set_name', 'data_set_name' (these could be given as local parameters...)
148 sub _check_Experiment {
149
150 #Todo make it more generic and accept multiple input_subsets
151 #Also maybe pass parameters as hash list...
152 my ($self, $analysis, $input_subset, $fset_name) = @_;
153
154 #Global parameters set in Funcgen->fetch_input
155 my $efgdba = $self->_efgdba();
156 my $set_name = $self->_set_name();
157 my $group = $self->_group();
158 my $cell_type = $self->_cell_type();
159 my $feature_type = $self->_feature_type();
160
161 my $iset_name = $set_name;
162 my $dset_name = $fset_name;
163
164 # set experiment: Reuse if already exists? (This comes from result sets)
165 my $ea = $efgdba->get_ExperimentAdaptor;
166 my $exp = $ea->fetch_by_name($set_name);
167
168 my @date = (localtime)[5,4,3];
169 $date[0] += 1900; $date[1]++;
170
171 if (! defined $exp) {
172
173 #Group needs to be set manually, like Cell_Type and Feature_Type
174 #Do not create Group on the fly here, as it will cause concurrency issues...
175 $exp = Bio::EnsEMBL::Funcgen::Experiment->new
176 (
177 -NAME => $set_name,
178 -EXPERIMENTAL_GROUP => $group,
179 -DATE => join('-', @date),
180 -PRIMARY_DESIGN_TYPE => 'binding_site_identification',
181 -ADAPTOR => $ea,
182 );
183
184 ($exp) = @{$ea->store($exp)};
185
186 }
187 throw("Can't create experiment $set_name ") unless $exp;
188
189 my $isa = $efgdba->get_InputSetAdaptor();
190 my $iset = $isa->fetch_by_name($iset_name);
191
192 if (! defined $iset){
193
194 $iset = Bio::EnsEMBL::Funcgen::InputSet->new
195 (
196 -name => $iset_name,
197 -experiment => $exp,
198 -feature_type => $feature_type,
199 -cell_type => $cell_type,
200 -vendor => 'SOLEXA',
201 -format => 'SEQUENCING',
202 -feature_class => 'result'
203 # Analysis is not being used??
204 #-analysis => $self->feature_analysis,
205 );
206 warn "Storing new InputSet:\t$iset_name\n";
207 ($iset) = @{$isa->store($iset)};
208
209 $iset->add_new_subset($input_subset);
210 $iset->adaptor->store_InputSubsets($iset->get_InputSubsets);
211 } else {
212
213 #We only expect one subset here (? why??)...
214 #shouldn't we be adding the control file also when used?? But this is SWEmbl-specific...
215 #And it should be the same file name...
216 #Maybe do some file checking here???
217 warn "InputSet already exists:\t$iset_name\n";
218 my @issets = @{$iset->get_InputSubsets};
219
220 #if(scalar(@issets) > 1){
221 # throw("InputSet $iset_name has more than one InputSubset:\t".join("\t", (map $_->name, @issets)));
222 #} elsif((scalar(@issets) == 1) && ($issets[0]->name ne $self->param('input_file'))){
223 # throw("InputSet $iset_name already has an InputSubset(".$issets[0]->name.") which does not match ".$self->param('input_file'));
224 #} elsif(scalar(@issets) == 0){ #we can just add this InputSubset
225 # $iset->add_new_subset($self->input_id);
226 # $iset->adaptor->store_InputSubsets($iset->get_InputSubsets);
227 #}
228
229 if(scalar(@issets)==0){
230 #we can just add this InputSubset. Add an extra 'input:' as prefix?
231 $iset->add_new_subset($input_subset);
232 $iset->adaptor->store_InputSubsets($iset->get_InputSubsets);
233 } else {
234 #warn("Need to uncomment this section!! - it was commented just for testing purposes!!");
235 #we just need to check if our file(s) is(are) already here...
236 if(!$iset->get_subset_by_name($input_subset)){
237 #throw("InputSet $iset_name has InputSubsets(".join("\t", (map $_->name, @issets)).") which do not match ".$input_subset);
238 #warn("InputSet $iset_name has InputSubsets(".join("\t", (map $_->name, @issets)).") which do not match ".$input_subset);
239 }
240 }
241 }
242
243 my $fsa = $efgdba->get_FeatureSetAdaptor();
244 my $fset = $fsa->fetch_by_name($fset_name);
245
246 if ( ! defined $fset ) {
247
248 $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new
249 (
250 -analysis => $analysis,
251 -feature_type => $feature_type,
252 -cell_type => $cell_type,
253 -name => $fset_name,
254 -feature_class => 'annotated',
255 -experiment_id => $exp->dbID,
256 #The adaptor is needed to store!
257 -adaptor => $fsa
258
259 );
260
261 warn "Storing new FeatureSet:\t$fset_name\n";
262 ($fset) = @{$fsa->store($fset)};
263
264 }
265 else {
266 warn "FeatureSet already exists:\t$fset_name\n";
267
268 if(@{$efgdba->get_AnnotatedFeatureAdaptor->fetch_all_by_FeatureSets([$fset])}){
269 throw "Feature Set $set_name already contains data. Please rollback before rerunning";
270 }
271
272 }
273
274 my $dsa = $efgdba->get_DataSetAdaptor;
275 my $dset = $dsa->fetch_by_name($dset_name);
276
277
278 if ( ! defined $dset ) {
279
280 $dset = Bio::EnsEMBL::Funcgen::DataSet->new
281 (
282 -SUPPORTING_SETS => [$iset],
283 -FEATURE_SET => $fset,
284 -DISPLAYABLE => 1,
285 -NAME => $dset_name,
286 -SUPPORTING_SET_TYPE => 'input',
287 );
288
289 warn "Storing new DataSet:\t$dset_name\n";
290 ($dset) = @{$dsa->store($dset)}
291 }
292 else {
293
294 warn "DataSet already exists:\t$dset_name\n";
295
296 # need to check whether InputSets and supporting_sets are the same and
297 # possibly add InputSet to supporting_sets
298
299 my $ssets = $dset->get_supporting_sets();
300
301 my %ssets_dbIDs = ();
302 map { $ssets_dbIDs{$_->dbID}='' } (@{$ssets});
303 $dset->add_supporting_sets([ $iset ]) if (! exists $ssets_dbIDs{$iset->dbID});
304
305 }
306
307 }
308
309
310 #Private Generic getter and setter
311 sub _getter_setter {
312 my ($self, $param_name, $param_value) = @_;
313 if(!$param_name){ return undef; }
314 if(!$param_value){
315 $param_value = $self->param($param_name);
316 } else {
317 $self->param($param_name, $param_value);
318 }
319 return $param_value;
320 }
321
322 # Private getter / setters : Maybe do some validation in some cases...
323
324 #Private getter / setter to the EFG DB Adaptor
325 sub _efgdba {
326 return $_[0]->_getter_setter('efgdb',$_[1]);
327 }
328
329 #Private getter / setter to the Core DB Adaptor
330 sub _dnadba {
331 return $_[0]->_getter_setter('dnadb',$_[1]);
332 }
333
334 #Private getter / setter to the Cell Type object
335 sub _cell_type {
336 return $_[0]->_getter_setter('cell_type',$_[1]);
337 }
338
339 #Private getter / setter to the Feature Type object
340 sub _feature_type {
341 return $_[0]->_getter_setter('feature_type',$_[1]);
342 }
343
344 #Private getter / setter to the Species name
345 sub _species {
346 return $_[0]->_getter_setter('species',$_[1]);
347 }
348
349 #Private getter / setter to the assembly name
350 sub _assembly {
351 return $_[0]->_getter_setter('assembly',$_[1]);
352 }
353
354 #Private getter / setter to the Analysis object
355 sub _analysis {
356 return $_[0]->_getter_setter('analysis',$_[1]);
357 }
358
359 #Private getter / setter to the Group
360 sub _group {
361 return $_[0]->_getter_setter('group',$_[1]);
362 }
363
364 #Private getter / setter to the Experiment Name (do not mix with the Set Name)
365 sub _experiment_name {
366 return $_[0]->_getter_setter('experiment_name',$_[1]);
367 }
368
369 #Private getter / setter to the Set Name
370 sub _set_name {
371 return $_[0]->_getter_setter('set_name',$_[1]);
372 }
373
374 #Private getter / setter to the file type
375 sub _file_type {
376 return $_[0]->_getter_setter('file_type',$_[1]);
377 }
378
379 #Private getter / setter to the sam header (only set when file type is sam)
380 sub _sam_header {
381 return $_[0]->_getter_setter('sam_header',$_[1]);
382 }
383
384 #Private getter / setter to the work folder
385 sub _work_dir {
386 return $_[0]->_getter_setter('work_dir',$_[1]);
387 }
388
389 #Private getter / setter to the output folder
390 sub _output_dir {
391 return $_[0]->_getter_setter('output_dir',$_[1]);
392 }
393
394 #Private getter / setter to the bin folder
395 sub _bin_dir {
396 return $_[0]->_getter_setter('bin_dir',$_[1]);
397 }
398
399 1;