comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/ResultSet.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # Ensembl module for Bio::EnsEMBL::Funcgen::ResultSet
3 #
4
5 =head1 LICENSE
6
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and
8 Genome Research Limited. All rights reserved.
9
10 This software is distributed under a modified Apache license.
11 For license details, please see
12
13 http://www.ensembl.org/info/about/code_licence.html
14
15 =head1 CONTACT
16
17 Please email comments or questions to the public Ensembl
18 developers list at <ensembl-dev@ebi.ac.uk>.
19
20 Questions may also be sent to the Ensembl help desk at
21 <helpdesk@ensembl.org>.
22
23
24 =head1 NAME
25
26 Bio::EnsEMBL::ResultSet - A module to represent ResultSet.
27
28
29 =head1 SYNOPSIS
30
31 use Bio::EnsEMBL::Funcgen::ResultSet;
32
33 my $result_set = Bio::EnsEMBL::Funcgen::ResultSet->new(
34 -dbid => $dbid,
35 -analysis => $analysis,
36 -table_name => 'experimental_chip',
37 -table_id => $ec_id,
38 );
39
40
41
42 =head1 DESCRIPTION
43
44 A ResultSet object provides access to a set raw results from an Experiment. A set will be one or more
45 contiguous chips to be treated as one set, with the same analysis. Duplicate sets will form a separate
46 result set, as will the same raw data analysed or normalised in a different manner.
47
48 =cut
49
50 #To do
51 #Change add_table_id to add_ExperimentalChip_Channel?
52
53
54 use strict;
55 use warnings;
56
57 package Bio::EnsEMBL::Funcgen::ResultSet;
58
59 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
60 use Bio::EnsEMBL::Utils::Exception qw( throw deprecate);
61 use Bio::EnsEMBL::Funcgen::Set;
62
63 use vars qw(@ISA);
64 @ISA = qw(Bio::EnsEMBL::Funcgen::Set);
65
66
67 =head2 new
68
69 Arg [-ANALYSIS] :
70
71
72
73 Example : my $feature = Bio::EnsEMBL::Funcgen::ResultSet->new(
74 -dbid => $dbid,
75 -analysis => $analysis,
76 -table_name => 'experimental_chip',
77 -table_id => $ec_id,
78 -result_feature_set => 1,
79 );
80 Description: Constructor for ResultSet objects.
81 Returntype : Bio::EnsEMBL::Funcgen::ResultSet
82 Exceptions : Throws if no experiment_id defined
83 Caller : General
84 Status : At risk
85
86 =cut
87
88 sub new {
89 my $caller = shift;
90
91 my $class = ref($caller) || $caller;
92
93 my $self = $class->SUPER::new(@_, ('-feature_class' => 'result'));
94
95 my ($table_name, $table_id, $rf_set, $dbfile_data_dir)
96 = rearrange(['TABLE_NAME', 'TABLE_ID', 'RESULT_FEATURE_SET', 'DBFILE_DATA_DIR'], @_);
97
98 $self->{'table_id_hash'} = {};
99
100 #maybe don't need tha analysis args as mandatory as we're testing in the adaptor store method
101 if (! $table_name){
102 throw("Need to pass the following arg:\t-table_name");
103 }
104
105 #do we need some control of creating new objects with dbID and adding result_groups/feature_sets and them storing/updating them
106 #potential for someone to create one from new using a duplicate dbID and then linking incorrect data to a pre-existing ResultGroup
107 #we need to verify that each table_name/id in the set is from the same experiment
108 $self->table_name($table_name);
109 $self->add_table_id($table_id) if $table_id;
110 $self->result_feature_set($rf_set) if $rf_set;
111 $self->dbfile_data_dir($dbfile_data_dir) if $dbfile_data_dir;
112
113 return $self;
114 }
115
116
117 #These are CollectionContainer? methods
118 #For a core track the would probably be in the Analysis
119 #All other collection methods are in ResultFeatureAdaptor(and parents)
120
121 =head2 get_dbfile_path_by_window_size
122
123 Arg[1] : int - window size
124 Arg[2] : OPTIONAL Bio::EnsEMBL::Slice Used when generating individual seq_region Collections
125 Example : my $filepath = $self->get_dbfile_path_by_ResultSet_window_size($rset, $wsize);
126 Description: Generates the default dbfile path for a given ResultSet and window_size
127 Returntype : string
128 Exceptions : Throws if Slice is not valid
129 Caller : general
130 Status : At risk
131
132 =cut
133
134 sub get_dbfile_path_by_window_size{
135 my ($self, $window_size, $slice) = @_;
136
137 if($slice){
138
139 if(! (ref($slice) && $slice->isa("Bio::EnsEMBL::Slice"))){
140 throw('You must provide a valid Bio::EnsEMBL::Slice');
141 }
142
143 $window_size .= '.'.$slice->seq_region_name;
144 }
145
146 return $self->dbfile_data_dir.'/result_features.'.$self->name.'.'.$window_size.'.col';
147 }
148
149
150 =head2 dbfile_data_dir
151
152 Arg[1] : OPTIONAL string - data directory for this ResultSet
153 Example : my $dbfile_data_dir = $self->dbfile_data_dir;
154 Description: Getter/Setter for the root dbfile data directory for this ResultSet
155 Returntype : string
156 Exceptions : None
157 Caller : self
158 Status : at risk
159
160 =cut
161
162
163
164 sub dbfile_data_dir{
165 my ($self, $data_dir) = @_;
166
167 $self->{'dbfile_data_dir'} = $data_dir if defined $data_dir;
168
169 return $self->{'dbfile_data_dir'};
170 }
171
172
173
174 =head2 result_feature_set
175
176 Arg [1] : optional - boolean 0 or 1.
177 Example : if($rset->result_feature_set){ ...use result_feature table ...};
178 Description: Getter and setter for the result_feature_set attribute.
179 Returntype : boolean
180 Exceptions : None
181 Caller : General
182 Status : At Risk
183
184 =cut
185
186
187 sub result_feature_set{
188 my $self = shift;
189
190 $self->{'result_feature_set'} = shift if @_;;
191 return $self->{'result_feature_set'};
192 }
193
194
195 =head2 table_name
196
197 Arg [1] : (optional) string - table_name (experimental_chip, channel or input_set)
198 Example : $result_set->experiment_id($exp_id);
199 Description: Getter and setter for the table_name for this ResultSet.
200 Returntype : string
201 Exceptions : None
202 Caller : General
203 Status : At Risk
204
205 =cut
206
207
208 sub table_name{
209 my $self = shift;
210
211 if (@_){
212
213 if($self->{'table_name'} && ($self->{'table_name'} ne $_[0])){
214 throw("Cannot mix table name/types of a ResultSet");
215 }
216
217 $self->{'table_name'} = $_[0];
218 }
219
220 return $self->{'table_name'};
221 }
222
223
224
225 =head2 add_table_id
226
227 Example : $result_set->add_table_id($ec_id, $cc_id);
228 Description: Caches table_id result_set_input_id to the ResultSet. In the case of an
229 array ResultSet, the unique result_set_input_id is used to key into the
230 result table, it also reduces redundancy and enable mapping of results to chips
231 rather than just the ResultSet. This enables result retrieval
232 based on chips in the same set which have a differing status.
233 In the case of a sequencing ResultSet, this simply refers to the InputSet ids.
234 Returntype : None
235 Exceptions : Throws if no table_id defined
236 Caller : General
237 Status : At Risk
238
239 =cut
240
241 sub add_table_id {
242 my ($self, $table_id, $cc_id) = @_;
243
244 if (! defined $table_id){
245 throw("Need to pass a table_id");
246 }else{
247
248 if((exists $self->{'table_id_hash'}->{$table_id}) && (defined $self->{'table_id_hash'}->{$table_id})){
249 throw("You are attempting to redefine a result_set_input_id which is already defined");
250 }
251
252 $self->{'table_id_hash'}->{$table_id} = $cc_id;
253
254 }
255
256 return;
257 }
258
259
260 =head2 table_ids
261
262 Example : $result_set->feature_group_id($fg_id);
263 Description: Getter and setter for the feature_group_id for this ResultSet.
264 Returntype : int
265 Exceptions : None
266 Caller : General
267 Status : At Risk
268
269 =cut
270
271 sub table_ids {
272 my $self = shift;
273
274 return [ keys %{$self->{'table_id_hash'}} ];
275 }
276
277
278 sub chip_channel_ids {
279 my $self = shift;
280
281 deprecate('ResultSet::chip_channel_ids is deprecated, please use result_set_input_ids');
282
283 return $self->result_set_input_ids;
284 }
285
286 =head2 result_set_input_ids
287
288 Example : my @rset_rsi_ids = @{$result_set->result_set_input_ids()};
289 Description: Getter for the input ids for this ResultSet.
290 Returntype : arrayref
291 Exceptions : None
292 Caller : General
293 Status : At Risk
294
295 =cut
296
297
298 sub result_set_input_ids {
299 my $self = shift;
300
301 return [ values %{$self->{'table_id_hash'}} ];
302 }
303
304
305
306
307 =head2 contains
308
309 Example : if($result_set->contains($chip_or_channel)){...do some chip or channel erpartions here...};
310 Description: Returns true if the given Channel or ExperimentalChip is part of this ResultSet
311 Returntype : boolean
312 Exceptions : warns if ResultSet table name is not of argument type
313 Caller : General
314 Status : At Risk
315
316 =cut
317
318
319 sub contains{
320 my ($self, $chip_channel) = @_;
321
322 my $contains = 0;
323 my @tables = $chip_channel->adaptor->_tables();
324 my ($table_name, undef) = @{$tables[0]};
325
326 if($table_name ne $self->table_name()){
327 warn("ResultSet(".$self->table_name().") cannot contain ${table_name}s");
328 }else{
329 $contains = 1 if (exists $self->{'table_id_hash'}->{$chip_channel->dbID()});
330 }
331
332 return $contains;
333 }
334
335 =head2 get_result_set_input_id
336
337 Arg [1] : int - dbID (experimental_chip, channel or input_set)
338 Example : $result_set->get_result_set_input_id($ec_id);
339 Description: Retrieves a result_set_input_id from the cache given a dbID
340 Returntype : int
341 Exceptions : none
342 Caller : General
343 Status : At Risk
344
345 =cut
346
347 sub get_result_set_input_id{
348 my ($self, $table_id) = @_;
349
350 return (exists $self->{'table_id_hash'}->{$table_id}) ? $self->{'table_id_hash'}->{$table_id} : undef;
351 }
352
353
354 sub get_chip_channel_id{
355 my ($self, $table_id) = @_;
356
357 deprecate('ResultSet::get_chip_channel_ids is dperecated, please us get_result_set_input_id');
358 return $self->get_result_set_input_id($table_id);
359 }
360
361
362
363 =head2 get_InputSets
364
365 Example : my @ecs = @{$result_set->get_ExperimentalChips()};
366 Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID
367 Returntype : Listref of ExperimentalChip object
368 Exceptions : warns is not an experimental_chip ResultSet
369 Caller : General
370 Status : At Risk
371
372 =cut
373
374 sub get_InputSets{
375 my $self = shift;
376
377 if($self->table_name ne 'input_set'){
378 warn 'Cannot get_InputSets for an array based ResultSet';
379 return;
380 }
381
382
383
384 if(! defined $self->{'input_sets'}){
385 my $is_adaptor = $self->adaptor->db->get_InputSetAdaptor();
386
387 foreach my $is_id(@{$self->table_ids()}){
388 push @{$self->{'input_sets'}}, $is_adaptor->fetch_by_dbID($is_id);
389 }
390 }
391
392 return $self->{'input_sets'};
393 }
394
395
396 =head2 get_ExperimentalChips
397
398 Example : my @ecs = @{$result_set->get_ExperimentalChips()};
399 Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID
400 Returntype : Listref of ExperimentalChip object
401 Exceptions : warns is not an experimental_chip ResultSet
402 Caller : General
403 Status : At Risk
404
405 =cut
406
407 sub get_ExperimentalChips{
408 my $self = shift;
409
410 if($self->table_name eq 'input_set'){
411 warn 'Cannot get_ExperimentalChips for an InputSet ResultSet';
412 return;
413 }
414
415 if(! defined $self->{'experimental_chips'}){
416 my $ec_adaptor = $self->adaptor->db->get_ExperimentalChipAdaptor();
417
418 if($self->table_name() eq "experimental_chip"){
419
420 foreach my $ec_id(@{$self->table_ids()}){
421 #warn "Getting ec with id $ec_id";
422 push @{$self->{'experimental_chips'}}, $ec_adaptor->fetch_by_dbID($ec_id);
423 #should this be hashed on chip_channel_id?
424 }
425 }else{
426 #warn("Retrieving ExperimentalChips for a Channel ResultSet");
427
428 my %echips;
429 my $chan_adaptor = $self->adaptor->db->get_ChannelAdaptor();
430
431 foreach my $chan_id(@{$self->table_ids()}){
432 my $chan = $chan_adaptor->fetch_by_dbID($chan_id);
433 $echips{$chan->experimental_chip_id} ||= $ec_adaptor->fetch_by_dbID($chan->experimental_chip_id);
434 }
435
436 @{$self->{'experimental_chips'}} = values %echips;
437 }
438 }
439
440 return $self->{'experimental_chips'};
441 }
442
443
444
445 =head2 get_replicate_set_by_result_set_input_id
446
447 Arg[0] : int - chip_channel_id
448 Example : my $rep_set_name = $result_set->get_replicate_set_by_result_set_input_id($cc_id);
449 Description: Retrieves the replicate set name defined by the corresponding ExperimentalChip
450 Returntype : String - replicate set name
451 Exceptions :
452 Caller : General
453 Status : At Risk - implement for Channels?
454
455 =cut
456
457 #Where is this used?
458
459 sub get_replicate_set_by_result_set_input_id{
460 my ($self, $cc_id) = @_;
461
462 if( ! defined $self->{'_replicate_cache'}){
463
464 warn "Generating replicate cache!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
465
466
467 foreach my $ec (@{$self->get_ExperimentalChips()}){
468
469 $self->{'_replicate_cache'}{$self->get_result_set_input_id($ec->dbID())} = $ec->replicate();
470
471
472 }
473 }
474
475
476 #warn here of absent replicate info?
477
478 return (exists $self->{'_replicate_cache'}{$cc_id}) ? $self->{'_replicate_cache'}{$cc_id} : undef;
479
480 }
481
482 sub get_replicate_set_by_chip_channel_id{
483 my ($self, $cc_id) = @_;
484
485 deprecate('Please use get_replicate_set_by_result_set_input_id instead');
486 return $self->get_replicate_set_by_result_set_input_id($cc_id);
487 }
488
489
490 =head2 get_displayable_ResultFeatures_by_Slice
491
492 Arg[1] : Bio::EnsEMBL::Slice
493 Arg[2] : Boolean - with probe flag, will nest Probe object in ResultFeature
494 Example : my @results = @{$ResultSet->get_all_displayable_ResultFeatures_by_Slice($slice)};
495 Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet
496 Returntype : Arrayref of ResultFeatures
497 Exceptions : None
498 Caller : General
499 Status : At Risk
500
501 =cut
502
503
504 sub get_displayable_ResultFeatures_by_Slice{
505 my ($self, $slice, $with_probe, $max_bins, $window_size, $constraint) = @_;
506 return $self->adaptor->fetch_ResultFeatures_by_Slice_ResultSet($slice, $self, 'DISPLAYABLE', $with_probe, $max_bins, $window_size, $constraint);
507 }
508
509
510
511
512 =head2 get_ResultFeatures_by_Slice
513
514 Arg[1] : Bio::EnsEMBL::Slice
515 Arg[2] : string - Status name e.g. 'DISPLAYABLE'
516 Arg[3] : Boolean - with probe flag, will nest Probe object in ResultFeature
517 Arg[4] : int - Max bins i.e. pixel width of display
518 Arg[5] : int - window_size
519 Arg[6] : string - constraint
520 Example : my @rfs_with_rpobe = @{$ResultSet->get_all_ResultFeatures_by_Slice($slice, undef, 1)};
521 Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet
522 Returntype : Arrayref of ResultFeatures
523 Exceptions : None
524 Caller : General
525 Status : At Risk
526
527 =cut
528
529 sub get_ResultFeatures_by_Slice{
530 my ($self, $slice, $status, $with_probe, $max_bins, $window_size, $constraint) = @_;
531 return $self->adaptor->db->get_ResultFeatureAdaptor->fetch_all_by_Slice_ResultSet($slice, $self, $status, $with_probe, $max_bins, $window_size, $constraint);
532 }
533
534
535
536 #Floats unpack inaccurately so need 3 sigfiging
537 #This should match the format in which they are originally stored
538 #This is dependant on ResultSet type i.e. reads or intensity?
539 #No format for reads!
540 #Should this be set in the ResultSet instead?
541 #It may be more efficient for the caller to test for format first rather than blindly printf'ing
542 #even if there is no format?
543 #This needs setting in new, so we don't have to eval for every score.
544
545 sub score_format{
546 return '%.3f';
547 }
548
549
550
551
552 =head2 log_label
553
554 Example : print $rset->log_label();
555 Description: Get a string of the unique key fields for logging purposes
556 Returntype : string
557 Exceptions : None
558 Caller : General
559 Status : At Risk
560
561 =cut
562
563 sub log_label {
564 my $self = shift;
565
566 my $label;
567
568 if(defined $self->feature_type()){
569 $label = $self->feature_type->name.":";
570 }else{
571 $label = "Unknown FeatureType:";
572 }
573
574 if(defined $self->cell_type()){
575 $label .= $self->cell_type->name;
576 }else{
577 $label .= "Uknown CellType";
578 }
579
580 return $self->name.":".$self->analysis->logic_name.":".$label;
581 }
582
583
584
585 1;