Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/Funcgen.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =pod | |
2 | |
3 =head1 NAME | |
4 | |
5 Bio::EnsEMBL::Funcgen::RunnableDB::Funcgen | |
6 | |
7 =head1 DESCRIPTION | |
8 | |
9 'Funcgen' is a base class for other runnables of the Funcgen Hive Pipeline | |
10 It performs common tasks such as connecting to the EFG DB etc... | |
11 | |
12 =cut | |
13 | |
14 package Bio::EnsEMBL::Funcgen::RunnableDB::Funcgen; | |
15 | |
16 use warnings; | |
17 use strict; | |
18 | |
19 use Bio::EnsEMBL::Funcgen::Utils::Helper; | |
20 use Bio::EnsEMBL::DBSQL::DBAdaptor; | |
21 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor; | |
22 use Bio::EnsEMBL::Funcgen::InputSet; | |
23 use Bio::EnsEMBL::Funcgen::DataSet; | |
24 use Bio::EnsEMBL::Funcgen::FeatureSet; | |
25 use Bio::EnsEMBL::Funcgen::AnnotatedFeature; | |
26 #use Bio::EnsEMBL::Hive::DBSQL::AnalysisDataAdaptor; | |
27 use base ('Bio::EnsEMBL::Hive::Process'); | |
28 | |
29 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump); | |
30 use Data::Dumper; | |
31 | |
32 #This defines a set of parameters based on given parameters from the pipeline: | |
33 sub fetch_input { # nothing to fetch... just the DB parameters... | |
34 my $self = shift @_; | |
35 | |
36 #An example of debug, in case needed | |
37 #print Dumper $self->param('dnadb'); | |
38 if(!$self->param('bin_dir')){ throw "Folder with funcgen binaries bin_dir required"; } | |
39 $self->_bin_dir($self->param('bin_dir')); | |
40 | |
41 my $dnadb_params = $self->param('dnadb') || throw "No parameters for Core DB"; | |
42 my $efgdb_params = $self->param('efgdb') || throw "No parameters for EFG DB"; | |
43 | |
44 #Get efg connection, otherwise fail.. | |
45 eval{ | |
46 $self->_efgdba(Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new | |
47 ( | |
48 %{ $efgdb_params }, | |
49 #let efg dba hanle dnadb | |
50 { | |
51 -dnadb_name => $dnadb_params->{-dbname}, | |
52 -dnadb_user => $dnadb_params->{-user}, | |
53 -dnadb_host => $dnadb_params->{-host}, | |
54 -dnadb_port => $dnadb_params->{-port}, | |
55 -dnadb_pass => $dnadb_params->{-pass}, | |
56 | |
57 } | |
58 )); | |
59 | |
60 #Actually test connections | |
61 $self->_efgdba->dbc->db_handle; | |
62 $self->_efgdba->dnadb->dbc->db_handle; | |
63 }; | |
64 | |
65 if($@) { throw "Error creating the EFG DBAdaptor and/or dna DBAdaptor $@"; } | |
66 | |
67 #Set some params | |
68 my $cell_type = $self->param('cell_type') || throw "No cell_type given"; | |
69 my $feature_type = $self->param('feature_type') || throw "No feature_type given"; | |
70 my $experiment_name = $self->param('experiment_name') || throw "No experiment_name given"; | |
71 $self->_experiment_name($experiment_name); | |
72 my $set_name = $self->param('set_name') || $cell_type."_".$feature_type."_".$experiment_name; | |
73 $self->_set_name($set_name); | |
74 my $group_name = $self->param('group') || 'efg'; | |
75 my $species = $self->param('species') || throw "No species defined"; | |
76 $self->_species($species); | |
77 my $assembly = $self->param('assembly') || throw "No assembly version given"; | |
78 $self->_assembly($assembly); | |
79 my $file_type = $self->param('file_type') || throw "No file type given"; | |
80 $self->_file_type($file_type); | |
81 my $work_dir = $self->param('work_dir') || throw "'work_dir' is a required parameter"; | |
82 $self->_work_dir($work_dir); | |
83 | |
84 | |
85 | |
86 #Configure DBAdaptors | |
87 my $efgdba = $self->_efgdba(); | |
88 #To avoid farm issues... | |
89 $efgdba->dbc->disconnect_when_inactive(1); | |
90 $efgdba->dnadb->dbc->disconnect_when_inactive(1); | |
91 | |
92 #Fetch & Set object params | |
93 #CellType | |
94 my $cta = $efgdba->get_CellTypeAdaptor(); | |
95 my $ct_obj = $cta->fetch_by_name($cell_type); | |
96 if(!$ct_obj){ throw "Cell type $cell_type does not exist in the database"; } | |
97 $self->_cell_type($ct_obj); | |
98 | |
99 #FeatureType | |
100 my $fta = $efgdba->get_FeatureTypeAdaptor(); | |
101 my $ft_obj = $fta->fetch_by_name($feature_type); | |
102 if(!$ft_obj){ throw "Feature type $feature_type does not exist in the database"; } | |
103 $self->_feature_type($ft_obj); | |
104 | |
105 #ExperimentalGroup | |
106 my $ega = $efgdba->get_ExperimentalGroupAdaptor(); | |
107 my $eg_obj = $ega->fetch_by_name($group_name); | |
108 if(!$eg_obj){ throw "Experimental Group $group_name does not exist in the database"; } | |
109 $self->_group($eg_obj); | |
110 | |
111 | |
112 if($file_type eq 'sam'){ | |
113 #Change the directory structure so it will agree with the rest, without the need to do uc() | |
114 my $sam_header = $self->_work_dir()."/sam_header/".$species."/".$species."_"; | |
115 $sam_header .= $ct_obj->gender() ? $ct_obj->gender() : 'male'; | |
116 #Carefull with naming standards... | |
117 #$sam_header .= "_".$assembly."_unmasked.fa.fai"; | |
118 $sam_header .= "_".$assembly."_unmasked.fasta.fai"; | |
119 $self->_sam_header($sam_header); | |
120 } | |
121 | |
122 #Work with conventions here too?? work_dir/output/dbname ?? | |
123 my $output_dir = $self->param('output_dir') || throw "'output_dir' is a required parameter"; | |
124 $self->_output_dir($output_dir."/".$experiment_name); | |
125 | |
126 return 1; | |
127 } | |
128 | |
129 | |
130 sub run { | |
131 my $self = shift @_; | |
132 | |
133 return 1; | |
134 } | |
135 | |
136 | |
137 sub write_output { | |
138 my $self = shift @_; | |
139 | |
140 return 1; | |
141 | |
142 } | |
143 | |
144 | |
145 #Private Function to check and create Experiment and Feature/Data sets as needed | |
146 #Requires some global parameters that are not set in Funcgen->fetch_input, such as | |
147 #'analysis', 'feature_set_name', 'data_set_name' (these could be given as local parameters...) | |
148 sub _check_Experiment { | |
149 | |
150 #Todo make it more generic and accept multiple input_subsets | |
151 #Also maybe pass parameters as hash list... | |
152 my ($self, $analysis, $input_subset, $fset_name) = @_; | |
153 | |
154 #Global parameters set in Funcgen->fetch_input | |
155 my $efgdba = $self->_efgdba(); | |
156 my $set_name = $self->_set_name(); | |
157 my $group = $self->_group(); | |
158 my $cell_type = $self->_cell_type(); | |
159 my $feature_type = $self->_feature_type(); | |
160 | |
161 my $iset_name = $set_name; | |
162 my $dset_name = $fset_name; | |
163 | |
164 # set experiment: Reuse if already exists? (This comes from result sets) | |
165 my $ea = $efgdba->get_ExperimentAdaptor; | |
166 my $exp = $ea->fetch_by_name($set_name); | |
167 | |
168 my @date = (localtime)[5,4,3]; | |
169 $date[0] += 1900; $date[1]++; | |
170 | |
171 if (! defined $exp) { | |
172 | |
173 #Group needs to be set manually, like Cell_Type and Feature_Type | |
174 #Do not create Group on the fly here, as it will cause concurrency issues... | |
175 $exp = Bio::EnsEMBL::Funcgen::Experiment->new | |
176 ( | |
177 -NAME => $set_name, | |
178 -EXPERIMENTAL_GROUP => $group, | |
179 -DATE => join('-', @date), | |
180 -PRIMARY_DESIGN_TYPE => 'binding_site_identification', | |
181 -ADAPTOR => $ea, | |
182 ); | |
183 | |
184 ($exp) = @{$ea->store($exp)}; | |
185 | |
186 } | |
187 throw("Can't create experiment $set_name ") unless $exp; | |
188 | |
189 my $isa = $efgdba->get_InputSetAdaptor(); | |
190 my $iset = $isa->fetch_by_name($iset_name); | |
191 | |
192 if (! defined $iset){ | |
193 | |
194 $iset = Bio::EnsEMBL::Funcgen::InputSet->new | |
195 ( | |
196 -name => $iset_name, | |
197 -experiment => $exp, | |
198 -feature_type => $feature_type, | |
199 -cell_type => $cell_type, | |
200 -vendor => 'SOLEXA', | |
201 -format => 'SEQUENCING', | |
202 -feature_class => 'result' | |
203 # Analysis is not being used?? | |
204 #-analysis => $self->feature_analysis, | |
205 ); | |
206 warn "Storing new InputSet:\t$iset_name\n"; | |
207 ($iset) = @{$isa->store($iset)}; | |
208 | |
209 $iset->add_new_subset($input_subset); | |
210 $iset->adaptor->store_InputSubsets($iset->get_InputSubsets); | |
211 } else { | |
212 | |
213 #We only expect one subset here (? why??)... | |
214 #shouldn't we be adding the control file also when used?? But this is SWEmbl-specific... | |
215 #And it should be the same file name... | |
216 #Maybe do some file checking here??? | |
217 warn "InputSet already exists:\t$iset_name\n"; | |
218 my @issets = @{$iset->get_InputSubsets}; | |
219 | |
220 #if(scalar(@issets) > 1){ | |
221 # throw("InputSet $iset_name has more than one InputSubset:\t".join("\t", (map $_->name, @issets))); | |
222 #} elsif((scalar(@issets) == 1) && ($issets[0]->name ne $self->param('input_file'))){ | |
223 # throw("InputSet $iset_name already has an InputSubset(".$issets[0]->name.") which does not match ".$self->param('input_file')); | |
224 #} elsif(scalar(@issets) == 0){ #we can just add this InputSubset | |
225 # $iset->add_new_subset($self->input_id); | |
226 # $iset->adaptor->store_InputSubsets($iset->get_InputSubsets); | |
227 #} | |
228 | |
229 if(scalar(@issets)==0){ | |
230 #we can just add this InputSubset. Add an extra 'input:' as prefix? | |
231 $iset->add_new_subset($input_subset); | |
232 $iset->adaptor->store_InputSubsets($iset->get_InputSubsets); | |
233 } else { | |
234 #warn("Need to uncomment this section!! - it was commented just for testing purposes!!"); | |
235 #we just need to check if our file(s) is(are) already here... | |
236 if(!$iset->get_subset_by_name($input_subset)){ | |
237 #throw("InputSet $iset_name has InputSubsets(".join("\t", (map $_->name, @issets)).") which do not match ".$input_subset); | |
238 #warn("InputSet $iset_name has InputSubsets(".join("\t", (map $_->name, @issets)).") which do not match ".$input_subset); | |
239 } | |
240 } | |
241 } | |
242 | |
243 my $fsa = $efgdba->get_FeatureSetAdaptor(); | |
244 my $fset = $fsa->fetch_by_name($fset_name); | |
245 | |
246 if ( ! defined $fset ) { | |
247 | |
248 $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new | |
249 ( | |
250 -analysis => $analysis, | |
251 -feature_type => $feature_type, | |
252 -cell_type => $cell_type, | |
253 -name => $fset_name, | |
254 -feature_class => 'annotated', | |
255 -experiment_id => $exp->dbID, | |
256 #The adaptor is needed to store! | |
257 -adaptor => $fsa | |
258 | |
259 ); | |
260 | |
261 warn "Storing new FeatureSet:\t$fset_name\n"; | |
262 ($fset) = @{$fsa->store($fset)}; | |
263 | |
264 } | |
265 else { | |
266 warn "FeatureSet already exists:\t$fset_name\n"; | |
267 | |
268 if(@{$efgdba->get_AnnotatedFeatureAdaptor->fetch_all_by_FeatureSets([$fset])}){ | |
269 throw "Feature Set $set_name already contains data. Please rollback before rerunning"; | |
270 } | |
271 | |
272 } | |
273 | |
274 my $dsa = $efgdba->get_DataSetAdaptor; | |
275 my $dset = $dsa->fetch_by_name($dset_name); | |
276 | |
277 | |
278 if ( ! defined $dset ) { | |
279 | |
280 $dset = Bio::EnsEMBL::Funcgen::DataSet->new | |
281 ( | |
282 -SUPPORTING_SETS => [$iset], | |
283 -FEATURE_SET => $fset, | |
284 -DISPLAYABLE => 1, | |
285 -NAME => $dset_name, | |
286 -SUPPORTING_SET_TYPE => 'input', | |
287 ); | |
288 | |
289 warn "Storing new DataSet:\t$dset_name\n"; | |
290 ($dset) = @{$dsa->store($dset)} | |
291 } | |
292 else { | |
293 | |
294 warn "DataSet already exists:\t$dset_name\n"; | |
295 | |
296 # need to check whether InputSets and supporting_sets are the same and | |
297 # possibly add InputSet to supporting_sets | |
298 | |
299 my $ssets = $dset->get_supporting_sets(); | |
300 | |
301 my %ssets_dbIDs = (); | |
302 map { $ssets_dbIDs{$_->dbID}='' } (@{$ssets}); | |
303 $dset->add_supporting_sets([ $iset ]) if (! exists $ssets_dbIDs{$iset->dbID}); | |
304 | |
305 } | |
306 | |
307 } | |
308 | |
309 | |
310 #Private Generic getter and setter | |
311 sub _getter_setter { | |
312 my ($self, $param_name, $param_value) = @_; | |
313 if(!$param_name){ return undef; } | |
314 if(!$param_value){ | |
315 $param_value = $self->param($param_name); | |
316 } else { | |
317 $self->param($param_name, $param_value); | |
318 } | |
319 return $param_value; | |
320 } | |
321 | |
322 # Private getter / setters : Maybe do some validation in some cases... | |
323 | |
324 #Private getter / setter to the EFG DB Adaptor | |
325 sub _efgdba { | |
326 return $_[0]->_getter_setter('efgdb',$_[1]); | |
327 } | |
328 | |
329 #Private getter / setter to the Core DB Adaptor | |
330 sub _dnadba { | |
331 return $_[0]->_getter_setter('dnadb',$_[1]); | |
332 } | |
333 | |
334 #Private getter / setter to the Cell Type object | |
335 sub _cell_type { | |
336 return $_[0]->_getter_setter('cell_type',$_[1]); | |
337 } | |
338 | |
339 #Private getter / setter to the Feature Type object | |
340 sub _feature_type { | |
341 return $_[0]->_getter_setter('feature_type',$_[1]); | |
342 } | |
343 | |
344 #Private getter / setter to the Species name | |
345 sub _species { | |
346 return $_[0]->_getter_setter('species',$_[1]); | |
347 } | |
348 | |
349 #Private getter / setter to the assembly name | |
350 sub _assembly { | |
351 return $_[0]->_getter_setter('assembly',$_[1]); | |
352 } | |
353 | |
354 #Private getter / setter to the Analysis object | |
355 sub _analysis { | |
356 return $_[0]->_getter_setter('analysis',$_[1]); | |
357 } | |
358 | |
359 #Private getter / setter to the Group | |
360 sub _group { | |
361 return $_[0]->_getter_setter('group',$_[1]); | |
362 } | |
363 | |
364 #Private getter / setter to the Experiment Name (do not mix with the Set Name) | |
365 sub _experiment_name { | |
366 return $_[0]->_getter_setter('experiment_name',$_[1]); | |
367 } | |
368 | |
369 #Private getter / setter to the Set Name | |
370 sub _set_name { | |
371 return $_[0]->_getter_setter('set_name',$_[1]); | |
372 } | |
373 | |
374 #Private getter / setter to the file type | |
375 sub _file_type { | |
376 return $_[0]->_getter_setter('file_type',$_[1]); | |
377 } | |
378 | |
379 #Private getter / setter to the sam header (only set when file type is sam) | |
380 sub _sam_header { | |
381 return $_[0]->_getter_setter('sam_header',$_[1]); | |
382 } | |
383 | |
384 #Private getter / setter to the work folder | |
385 sub _work_dir { | |
386 return $_[0]->_getter_setter('work_dir',$_[1]); | |
387 } | |
388 | |
389 #Private getter / setter to the output folder | |
390 sub _output_dir { | |
391 return $_[0]->_getter_setter('output_dir',$_[1]); | |
392 } | |
393 | |
394 #Private getter / setter to the bin folder | |
395 sub _bin_dir { | |
396 return $_[0]->_getter_setter('bin_dir',$_[1]); | |
397 } | |
398 | |
399 1; |