Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/ResultSet.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # | |
2 # Ensembl module for Bio::EnsEMBL::Funcgen::ResultSet | |
3 # | |
4 | |
5 =head1 LICENSE | |
6 | |
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and | |
8 Genome Research Limited. All rights reserved. | |
9 | |
10 This software is distributed under a modified Apache license. | |
11 For license details, please see | |
12 | |
13 http://www.ensembl.org/info/about/code_licence.html | |
14 | |
15 =head1 CONTACT | |
16 | |
17 Please email comments or questions to the public Ensembl | |
18 developers list at <ensembl-dev@ebi.ac.uk>. | |
19 | |
20 Questions may also be sent to the Ensembl help desk at | |
21 <helpdesk@ensembl.org>. | |
22 | |
23 | |
24 =head1 NAME | |
25 | |
26 Bio::EnsEMBL::ResultSet - A module to represent ResultSet. | |
27 | |
28 | |
29 =head1 SYNOPSIS | |
30 | |
31 use Bio::EnsEMBL::Funcgen::ResultSet; | |
32 | |
33 my $result_set = Bio::EnsEMBL::Funcgen::ResultSet->new( | |
34 -dbid => $dbid, | |
35 -analysis => $analysis, | |
36 -table_name => 'experimental_chip', | |
37 -table_id => $ec_id, | |
38 ); | |
39 | |
40 | |
41 | |
42 =head1 DESCRIPTION | |
43 | |
44 A ResultSet object provides access to a set raw results from an Experiment. A set will be one or more | |
45 contiguous chips to be treated as one set, with the same analysis. Duplicate sets will form a separate | |
46 result set, as will the same raw data analysed or normalised in a different manner. | |
47 | |
48 =cut | |
49 | |
50 #To do | |
51 #Change add_table_id to add_ExperimentalChip_Channel? | |
52 | |
53 | |
54 use strict; | |
55 use warnings; | |
56 | |
57 package Bio::EnsEMBL::Funcgen::ResultSet; | |
58 | |
59 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
60 use Bio::EnsEMBL::Utils::Exception qw( throw deprecate); | |
61 use Bio::EnsEMBL::Funcgen::Set; | |
62 | |
63 use vars qw(@ISA); | |
64 @ISA = qw(Bio::EnsEMBL::Funcgen::Set); | |
65 | |
66 | |
67 =head2 new | |
68 | |
69 Arg [-ANALYSIS] : | |
70 | |
71 | |
72 | |
73 Example : my $feature = Bio::EnsEMBL::Funcgen::ResultSet->new( | |
74 -dbid => $dbid, | |
75 -analysis => $analysis, | |
76 -table_name => 'experimental_chip', | |
77 -table_id => $ec_id, | |
78 -result_feature_set => 1, | |
79 ); | |
80 Description: Constructor for ResultSet objects. | |
81 Returntype : Bio::EnsEMBL::Funcgen::ResultSet | |
82 Exceptions : Throws if no experiment_id defined | |
83 Caller : General | |
84 Status : At risk | |
85 | |
86 =cut | |
87 | |
88 sub new { | |
89 my $caller = shift; | |
90 | |
91 my $class = ref($caller) || $caller; | |
92 | |
93 my $self = $class->SUPER::new(@_, ('-feature_class' => 'result')); | |
94 | |
95 my ($table_name, $table_id, $rf_set, $dbfile_data_dir) | |
96 = rearrange(['TABLE_NAME', 'TABLE_ID', 'RESULT_FEATURE_SET', 'DBFILE_DATA_DIR'], @_); | |
97 | |
98 $self->{'table_id_hash'} = {}; | |
99 | |
100 #maybe don't need tha analysis args as mandatory as we're testing in the adaptor store method | |
101 if (! $table_name){ | |
102 throw("Need to pass the following arg:\t-table_name"); | |
103 } | |
104 | |
105 #do we need some control of creating new objects with dbID and adding result_groups/feature_sets and them storing/updating them | |
106 #potential for someone to create one from new using a duplicate dbID and then linking incorrect data to a pre-existing ResultGroup | |
107 #we need to verify that each table_name/id in the set is from the same experiment | |
108 $self->table_name($table_name); | |
109 $self->add_table_id($table_id) if $table_id; | |
110 $self->result_feature_set($rf_set) if $rf_set; | |
111 $self->dbfile_data_dir($dbfile_data_dir) if $dbfile_data_dir; | |
112 | |
113 return $self; | |
114 } | |
115 | |
116 | |
117 #These are CollectionContainer? methods | |
118 #For a core track the would probably be in the Analysis | |
119 #All other collection methods are in ResultFeatureAdaptor(and parents) | |
120 | |
121 =head2 get_dbfile_path_by_window_size | |
122 | |
123 Arg[1] : int - window size | |
124 Arg[2] : OPTIONAL Bio::EnsEMBL::Slice Used when generating individual seq_region Collections | |
125 Example : my $filepath = $self->get_dbfile_path_by_ResultSet_window_size($rset, $wsize); | |
126 Description: Generates the default dbfile path for a given ResultSet and window_size | |
127 Returntype : string | |
128 Exceptions : Throws if Slice is not valid | |
129 Caller : general | |
130 Status : At risk | |
131 | |
132 =cut | |
133 | |
134 sub get_dbfile_path_by_window_size{ | |
135 my ($self, $window_size, $slice) = @_; | |
136 | |
137 if($slice){ | |
138 | |
139 if(! (ref($slice) && $slice->isa("Bio::EnsEMBL::Slice"))){ | |
140 throw('You must provide a valid Bio::EnsEMBL::Slice'); | |
141 } | |
142 | |
143 $window_size .= '.'.$slice->seq_region_name; | |
144 } | |
145 | |
146 return $self->dbfile_data_dir.'/result_features.'.$self->name.'.'.$window_size.'.col'; | |
147 } | |
148 | |
149 | |
150 =head2 dbfile_data_dir | |
151 | |
152 Arg[1] : OPTIONAL string - data directory for this ResultSet | |
153 Example : my $dbfile_data_dir = $self->dbfile_data_dir; | |
154 Description: Getter/Setter for the root dbfile data directory for this ResultSet | |
155 Returntype : string | |
156 Exceptions : None | |
157 Caller : self | |
158 Status : at risk | |
159 | |
160 =cut | |
161 | |
162 | |
163 | |
164 sub dbfile_data_dir{ | |
165 my ($self, $data_dir) = @_; | |
166 | |
167 $self->{'dbfile_data_dir'} = $data_dir if defined $data_dir; | |
168 | |
169 return $self->{'dbfile_data_dir'}; | |
170 } | |
171 | |
172 | |
173 | |
174 =head2 result_feature_set | |
175 | |
176 Arg [1] : optional - boolean 0 or 1. | |
177 Example : if($rset->result_feature_set){ ...use result_feature table ...}; | |
178 Description: Getter and setter for the result_feature_set attribute. | |
179 Returntype : boolean | |
180 Exceptions : None | |
181 Caller : General | |
182 Status : At Risk | |
183 | |
184 =cut | |
185 | |
186 | |
187 sub result_feature_set{ | |
188 my $self = shift; | |
189 | |
190 $self->{'result_feature_set'} = shift if @_;; | |
191 return $self->{'result_feature_set'}; | |
192 } | |
193 | |
194 | |
195 =head2 table_name | |
196 | |
197 Arg [1] : (optional) string - table_name (experimental_chip, channel or input_set) | |
198 Example : $result_set->experiment_id($exp_id); | |
199 Description: Getter and setter for the table_name for this ResultSet. | |
200 Returntype : string | |
201 Exceptions : None | |
202 Caller : General | |
203 Status : At Risk | |
204 | |
205 =cut | |
206 | |
207 | |
208 sub table_name{ | |
209 my $self = shift; | |
210 | |
211 if (@_){ | |
212 | |
213 if($self->{'table_name'} && ($self->{'table_name'} ne $_[0])){ | |
214 throw("Cannot mix table name/types of a ResultSet"); | |
215 } | |
216 | |
217 $self->{'table_name'} = $_[0]; | |
218 } | |
219 | |
220 return $self->{'table_name'}; | |
221 } | |
222 | |
223 | |
224 | |
225 =head2 add_table_id | |
226 | |
227 Example : $result_set->add_table_id($ec_id, $cc_id); | |
228 Description: Caches table_id result_set_input_id to the ResultSet. In the case of an | |
229 array ResultSet, the unique result_set_input_id is used to key into the | |
230 result table, it also reduces redundancy and enable mapping of results to chips | |
231 rather than just the ResultSet. This enables result retrieval | |
232 based on chips in the same set which have a differing status. | |
233 In the case of a sequencing ResultSet, this simply refers to the InputSet ids. | |
234 Returntype : None | |
235 Exceptions : Throws if no table_id defined | |
236 Caller : General | |
237 Status : At Risk | |
238 | |
239 =cut | |
240 | |
241 sub add_table_id { | |
242 my ($self, $table_id, $cc_id) = @_; | |
243 | |
244 if (! defined $table_id){ | |
245 throw("Need to pass a table_id"); | |
246 }else{ | |
247 | |
248 if((exists $self->{'table_id_hash'}->{$table_id}) && (defined $self->{'table_id_hash'}->{$table_id})){ | |
249 throw("You are attempting to redefine a result_set_input_id which is already defined"); | |
250 } | |
251 | |
252 $self->{'table_id_hash'}->{$table_id} = $cc_id; | |
253 | |
254 } | |
255 | |
256 return; | |
257 } | |
258 | |
259 | |
260 =head2 table_ids | |
261 | |
262 Example : $result_set->feature_group_id($fg_id); | |
263 Description: Getter and setter for the feature_group_id for this ResultSet. | |
264 Returntype : int | |
265 Exceptions : None | |
266 Caller : General | |
267 Status : At Risk | |
268 | |
269 =cut | |
270 | |
271 sub table_ids { | |
272 my $self = shift; | |
273 | |
274 return [ keys %{$self->{'table_id_hash'}} ]; | |
275 } | |
276 | |
277 | |
278 sub chip_channel_ids { | |
279 my $self = shift; | |
280 | |
281 deprecate('ResultSet::chip_channel_ids is deprecated, please use result_set_input_ids'); | |
282 | |
283 return $self->result_set_input_ids; | |
284 } | |
285 | |
286 =head2 result_set_input_ids | |
287 | |
288 Example : my @rset_rsi_ids = @{$result_set->result_set_input_ids()}; | |
289 Description: Getter for the input ids for this ResultSet. | |
290 Returntype : arrayref | |
291 Exceptions : None | |
292 Caller : General | |
293 Status : At Risk | |
294 | |
295 =cut | |
296 | |
297 | |
298 sub result_set_input_ids { | |
299 my $self = shift; | |
300 | |
301 return [ values %{$self->{'table_id_hash'}} ]; | |
302 } | |
303 | |
304 | |
305 | |
306 | |
307 =head2 contains | |
308 | |
309 Example : if($result_set->contains($chip_or_channel)){...do some chip or channel erpartions here...}; | |
310 Description: Returns true if the given Channel or ExperimentalChip is part of this ResultSet | |
311 Returntype : boolean | |
312 Exceptions : warns if ResultSet table name is not of argument type | |
313 Caller : General | |
314 Status : At Risk | |
315 | |
316 =cut | |
317 | |
318 | |
319 sub contains{ | |
320 my ($self, $chip_channel) = @_; | |
321 | |
322 my $contains = 0; | |
323 my @tables = $chip_channel->adaptor->_tables(); | |
324 my ($table_name, undef) = @{$tables[0]}; | |
325 | |
326 if($table_name ne $self->table_name()){ | |
327 warn("ResultSet(".$self->table_name().") cannot contain ${table_name}s"); | |
328 }else{ | |
329 $contains = 1 if (exists $self->{'table_id_hash'}->{$chip_channel->dbID()}); | |
330 } | |
331 | |
332 return $contains; | |
333 } | |
334 | |
335 =head2 get_result_set_input_id | |
336 | |
337 Arg [1] : int - dbID (experimental_chip, channel or input_set) | |
338 Example : $result_set->get_result_set_input_id($ec_id); | |
339 Description: Retrieves a result_set_input_id from the cache given a dbID | |
340 Returntype : int | |
341 Exceptions : none | |
342 Caller : General | |
343 Status : At Risk | |
344 | |
345 =cut | |
346 | |
347 sub get_result_set_input_id{ | |
348 my ($self, $table_id) = @_; | |
349 | |
350 return (exists $self->{'table_id_hash'}->{$table_id}) ? $self->{'table_id_hash'}->{$table_id} : undef; | |
351 } | |
352 | |
353 | |
354 sub get_chip_channel_id{ | |
355 my ($self, $table_id) = @_; | |
356 | |
357 deprecate('ResultSet::get_chip_channel_ids is dperecated, please us get_result_set_input_id'); | |
358 return $self->get_result_set_input_id($table_id); | |
359 } | |
360 | |
361 | |
362 | |
363 =head2 get_InputSets | |
364 | |
365 Example : my @ecs = @{$result_set->get_ExperimentalChips()}; | |
366 Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID | |
367 Returntype : Listref of ExperimentalChip object | |
368 Exceptions : warns is not an experimental_chip ResultSet | |
369 Caller : General | |
370 Status : At Risk | |
371 | |
372 =cut | |
373 | |
374 sub get_InputSets{ | |
375 my $self = shift; | |
376 | |
377 if($self->table_name ne 'input_set'){ | |
378 warn 'Cannot get_InputSets for an array based ResultSet'; | |
379 return; | |
380 } | |
381 | |
382 | |
383 | |
384 if(! defined $self->{'input_sets'}){ | |
385 my $is_adaptor = $self->adaptor->db->get_InputSetAdaptor(); | |
386 | |
387 foreach my $is_id(@{$self->table_ids()}){ | |
388 push @{$self->{'input_sets'}}, $is_adaptor->fetch_by_dbID($is_id); | |
389 } | |
390 } | |
391 | |
392 return $self->{'input_sets'}; | |
393 } | |
394 | |
395 | |
396 =head2 get_ExperimentalChips | |
397 | |
398 Example : my @ecs = @{$result_set->get_ExperimentalChips()}; | |
399 Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID | |
400 Returntype : Listref of ExperimentalChip object | |
401 Exceptions : warns is not an experimental_chip ResultSet | |
402 Caller : General | |
403 Status : At Risk | |
404 | |
405 =cut | |
406 | |
407 sub get_ExperimentalChips{ | |
408 my $self = shift; | |
409 | |
410 if($self->table_name eq 'input_set'){ | |
411 warn 'Cannot get_ExperimentalChips for an InputSet ResultSet'; | |
412 return; | |
413 } | |
414 | |
415 if(! defined $self->{'experimental_chips'}){ | |
416 my $ec_adaptor = $self->adaptor->db->get_ExperimentalChipAdaptor(); | |
417 | |
418 if($self->table_name() eq "experimental_chip"){ | |
419 | |
420 foreach my $ec_id(@{$self->table_ids()}){ | |
421 #warn "Getting ec with id $ec_id"; | |
422 push @{$self->{'experimental_chips'}}, $ec_adaptor->fetch_by_dbID($ec_id); | |
423 #should this be hashed on chip_channel_id? | |
424 } | |
425 }else{ | |
426 #warn("Retrieving ExperimentalChips for a Channel ResultSet"); | |
427 | |
428 my %echips; | |
429 my $chan_adaptor = $self->adaptor->db->get_ChannelAdaptor(); | |
430 | |
431 foreach my $chan_id(@{$self->table_ids()}){ | |
432 my $chan = $chan_adaptor->fetch_by_dbID($chan_id); | |
433 $echips{$chan->experimental_chip_id} ||= $ec_adaptor->fetch_by_dbID($chan->experimental_chip_id); | |
434 } | |
435 | |
436 @{$self->{'experimental_chips'}} = values %echips; | |
437 } | |
438 } | |
439 | |
440 return $self->{'experimental_chips'}; | |
441 } | |
442 | |
443 | |
444 | |
445 =head2 get_replicate_set_by_result_set_input_id | |
446 | |
447 Arg[0] : int - chip_channel_id | |
448 Example : my $rep_set_name = $result_set->get_replicate_set_by_result_set_input_id($cc_id); | |
449 Description: Retrieves the replicate set name defined by the corresponding ExperimentalChip | |
450 Returntype : String - replicate set name | |
451 Exceptions : | |
452 Caller : General | |
453 Status : At Risk - implement for Channels? | |
454 | |
455 =cut | |
456 | |
457 #Where is this used? | |
458 | |
459 sub get_replicate_set_by_result_set_input_id{ | |
460 my ($self, $cc_id) = @_; | |
461 | |
462 if( ! defined $self->{'_replicate_cache'}){ | |
463 | |
464 warn "Generating replicate cache!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"; | |
465 | |
466 | |
467 foreach my $ec (@{$self->get_ExperimentalChips()}){ | |
468 | |
469 $self->{'_replicate_cache'}{$self->get_result_set_input_id($ec->dbID())} = $ec->replicate(); | |
470 | |
471 | |
472 } | |
473 } | |
474 | |
475 | |
476 #warn here of absent replicate info? | |
477 | |
478 return (exists $self->{'_replicate_cache'}{$cc_id}) ? $self->{'_replicate_cache'}{$cc_id} : undef; | |
479 | |
480 } | |
481 | |
482 sub get_replicate_set_by_chip_channel_id{ | |
483 my ($self, $cc_id) = @_; | |
484 | |
485 deprecate('Please use get_replicate_set_by_result_set_input_id instead'); | |
486 return $self->get_replicate_set_by_result_set_input_id($cc_id); | |
487 } | |
488 | |
489 | |
490 =head2 get_displayable_ResultFeatures_by_Slice | |
491 | |
492 Arg[1] : Bio::EnsEMBL::Slice | |
493 Arg[2] : Boolean - with probe flag, will nest Probe object in ResultFeature | |
494 Example : my @results = @{$ResultSet->get_all_displayable_ResultFeatures_by_Slice($slice)}; | |
495 Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet | |
496 Returntype : Arrayref of ResultFeatures | |
497 Exceptions : None | |
498 Caller : General | |
499 Status : At Risk | |
500 | |
501 =cut | |
502 | |
503 | |
504 sub get_displayable_ResultFeatures_by_Slice{ | |
505 my ($self, $slice, $with_probe, $max_bins, $window_size, $constraint) = @_; | |
506 return $self->adaptor->fetch_ResultFeatures_by_Slice_ResultSet($slice, $self, 'DISPLAYABLE', $with_probe, $max_bins, $window_size, $constraint); | |
507 } | |
508 | |
509 | |
510 | |
511 | |
512 =head2 get_ResultFeatures_by_Slice | |
513 | |
514 Arg[1] : Bio::EnsEMBL::Slice | |
515 Arg[2] : string - Status name e.g. 'DISPLAYABLE' | |
516 Arg[3] : Boolean - with probe flag, will nest Probe object in ResultFeature | |
517 Arg[4] : int - Max bins i.e. pixel width of display | |
518 Arg[5] : int - window_size | |
519 Arg[6] : string - constraint | |
520 Example : my @rfs_with_rpobe = @{$ResultSet->get_all_ResultFeatures_by_Slice($slice, undef, 1)}; | |
521 Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet | |
522 Returntype : Arrayref of ResultFeatures | |
523 Exceptions : None | |
524 Caller : General | |
525 Status : At Risk | |
526 | |
527 =cut | |
528 | |
529 sub get_ResultFeatures_by_Slice{ | |
530 my ($self, $slice, $status, $with_probe, $max_bins, $window_size, $constraint) = @_; | |
531 return $self->adaptor->db->get_ResultFeatureAdaptor->fetch_all_by_Slice_ResultSet($slice, $self, $status, $with_probe, $max_bins, $window_size, $constraint); | |
532 } | |
533 | |
534 | |
535 | |
536 #Floats unpack inaccurately so need 3 sigfiging | |
537 #This should match the format in which they are originally stored | |
538 #This is dependant on ResultSet type i.e. reads or intensity? | |
539 #No format for reads! | |
540 #Should this be set in the ResultSet instead? | |
541 #It may be more efficient for the caller to test for format first rather than blindly printf'ing | |
542 #even if there is no format? | |
543 #This needs setting in new, so we don't have to eval for every score. | |
544 | |
545 sub score_format{ | |
546 return '%.3f'; | |
547 } | |
548 | |
549 | |
550 | |
551 | |
552 =head2 log_label | |
553 | |
554 Example : print $rset->log_label(); | |
555 Description: Get a string of the unique key fields for logging purposes | |
556 Returntype : string | |
557 Exceptions : None | |
558 Caller : General | |
559 Status : At Risk | |
560 | |
561 =cut | |
562 | |
563 sub log_label { | |
564 my $self = shift; | |
565 | |
566 my $label; | |
567 | |
568 if(defined $self->feature_type()){ | |
569 $label = $self->feature_type->name.":"; | |
570 }else{ | |
571 $label = "Unknown FeatureType:"; | |
572 } | |
573 | |
574 if(defined $self->cell_type()){ | |
575 $label .= $self->cell_type->name; | |
576 }else{ | |
577 $label .= "Uknown CellType"; | |
578 } | |
579 | |
580 return $self->name.":".$self->analysis->logic_name.":".$label; | |
581 } | |
582 | |
583 | |
584 | |
585 1; |