0
|
1 #
|
|
2 # Ensembl module for Bio::EnsEMBL::Funcgen::ResultSet
|
|
3 #
|
|
4
|
|
5 =head1 LICENSE
|
|
6
|
|
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and
|
|
8 Genome Research Limited. All rights reserved.
|
|
9
|
|
10 This software is distributed under a modified Apache license.
|
|
11 For license details, please see
|
|
12
|
|
13 http://www.ensembl.org/info/about/code_licence.html
|
|
14
|
|
15 =head1 CONTACT
|
|
16
|
|
17 Please email comments or questions to the public Ensembl
|
|
18 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
19
|
|
20 Questions may also be sent to the Ensembl help desk at
|
|
21 <helpdesk@ensembl.org>.
|
|
22
|
|
23
|
|
24 =head1 NAME
|
|
25
|
|
26 Bio::EnsEMBL::ResultSet - A module to represent ResultSet.
|
|
27
|
|
28
|
|
29 =head1 SYNOPSIS
|
|
30
|
|
31 use Bio::EnsEMBL::Funcgen::ResultSet;
|
|
32
|
|
33 my $result_set = Bio::EnsEMBL::Funcgen::ResultSet->new(
|
|
34 -dbid => $dbid,
|
|
35 -analysis => $analysis,
|
|
36 -table_name => 'experimental_chip',
|
|
37 -table_id => $ec_id,
|
|
38 );
|
|
39
|
|
40
|
|
41
|
|
42 =head1 DESCRIPTION
|
|
43
|
|
44 A ResultSet object provides access to a set raw results from an Experiment. A set will be one or more
|
|
45 contiguous chips to be treated as one set, with the same analysis. Duplicate sets will form a separate
|
|
46 result set, as will the same raw data analysed or normalised in a different manner.
|
|
47
|
|
48 =cut
|
|
49
|
|
50 #To do
|
|
51 #Change add_table_id to add_ExperimentalChip_Channel?
|
|
52
|
|
53
|
|
54 use strict;
|
|
55 use warnings;
|
|
56
|
|
57 package Bio::EnsEMBL::Funcgen::ResultSet;
|
|
58
|
|
59 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
60 use Bio::EnsEMBL::Utils::Exception qw( throw deprecate);
|
|
61 use Bio::EnsEMBL::Funcgen::Set;
|
|
62
|
|
63 use vars qw(@ISA);
|
|
64 @ISA = qw(Bio::EnsEMBL::Funcgen::Set);
|
|
65
|
|
66
|
|
67 =head2 new
|
|
68
|
|
69 Arg [-ANALYSIS] :
|
|
70
|
|
71
|
|
72
|
|
73 Example : my $feature = Bio::EnsEMBL::Funcgen::ResultSet->new(
|
|
74 -dbid => $dbid,
|
|
75 -analysis => $analysis,
|
|
76 -table_name => 'experimental_chip',
|
|
77 -table_id => $ec_id,
|
|
78 -result_feature_set => 1,
|
|
79 );
|
|
80 Description: Constructor for ResultSet objects.
|
|
81 Returntype : Bio::EnsEMBL::Funcgen::ResultSet
|
|
82 Exceptions : Throws if no experiment_id defined
|
|
83 Caller : General
|
|
84 Status : At risk
|
|
85
|
|
86 =cut
|
|
87
|
|
88 sub new {
|
|
89 my $caller = shift;
|
|
90
|
|
91 my $class = ref($caller) || $caller;
|
|
92
|
|
93 my $self = $class->SUPER::new(@_, ('-feature_class' => 'result'));
|
|
94
|
|
95 my ($table_name, $table_id, $rf_set, $dbfile_data_dir)
|
|
96 = rearrange(['TABLE_NAME', 'TABLE_ID', 'RESULT_FEATURE_SET', 'DBFILE_DATA_DIR'], @_);
|
|
97
|
|
98 $self->{'table_id_hash'} = {};
|
|
99
|
|
100 #maybe don't need tha analysis args as mandatory as we're testing in the adaptor store method
|
|
101 if (! $table_name){
|
|
102 throw("Need to pass the following arg:\t-table_name");
|
|
103 }
|
|
104
|
|
105 #do we need some control of creating new objects with dbID and adding result_groups/feature_sets and them storing/updating them
|
|
106 #potential for someone to create one from new using a duplicate dbID and then linking incorrect data to a pre-existing ResultGroup
|
|
107 #we need to verify that each table_name/id in the set is from the same experiment
|
|
108 $self->table_name($table_name);
|
|
109 $self->add_table_id($table_id) if $table_id;
|
|
110 $self->result_feature_set($rf_set) if $rf_set;
|
|
111 $self->dbfile_data_dir($dbfile_data_dir) if $dbfile_data_dir;
|
|
112
|
|
113 return $self;
|
|
114 }
|
|
115
|
|
116
|
|
117 #These are CollectionContainer? methods
|
|
118 #For a core track the would probably be in the Analysis
|
|
119 #All other collection methods are in ResultFeatureAdaptor(and parents)
|
|
120
|
|
121 =head2 get_dbfile_path_by_window_size
|
|
122
|
|
123 Arg[1] : int - window size
|
|
124 Arg[2] : OPTIONAL Bio::EnsEMBL::Slice Used when generating individual seq_region Collections
|
|
125 Example : my $filepath = $self->get_dbfile_path_by_ResultSet_window_size($rset, $wsize);
|
|
126 Description: Generates the default dbfile path for a given ResultSet and window_size
|
|
127 Returntype : string
|
|
128 Exceptions : Throws if Slice is not valid
|
|
129 Caller : general
|
|
130 Status : At risk
|
|
131
|
|
132 =cut
|
|
133
|
|
134 sub get_dbfile_path_by_window_size{
|
|
135 my ($self, $window_size, $slice) = @_;
|
|
136
|
|
137 if($slice){
|
|
138
|
|
139 if(! (ref($slice) && $slice->isa("Bio::EnsEMBL::Slice"))){
|
|
140 throw('You must provide a valid Bio::EnsEMBL::Slice');
|
|
141 }
|
|
142
|
|
143 $window_size .= '.'.$slice->seq_region_name;
|
|
144 }
|
|
145
|
|
146 return $self->dbfile_data_dir.'/result_features.'.$self->name.'.'.$window_size.'.col';
|
|
147 }
|
|
148
|
|
149
|
|
150 =head2 dbfile_data_dir
|
|
151
|
|
152 Arg[1] : OPTIONAL string - data directory for this ResultSet
|
|
153 Example : my $dbfile_data_dir = $self->dbfile_data_dir;
|
|
154 Description: Getter/Setter for the root dbfile data directory for this ResultSet
|
|
155 Returntype : string
|
|
156 Exceptions : None
|
|
157 Caller : self
|
|
158 Status : at risk
|
|
159
|
|
160 =cut
|
|
161
|
|
162
|
|
163
|
|
164 sub dbfile_data_dir{
|
|
165 my ($self, $data_dir) = @_;
|
|
166
|
|
167 $self->{'dbfile_data_dir'} = $data_dir if defined $data_dir;
|
|
168
|
|
169 return $self->{'dbfile_data_dir'};
|
|
170 }
|
|
171
|
|
172
|
|
173
|
|
174 =head2 result_feature_set
|
|
175
|
|
176 Arg [1] : optional - boolean 0 or 1.
|
|
177 Example : if($rset->result_feature_set){ ...use result_feature table ...};
|
|
178 Description: Getter and setter for the result_feature_set attribute.
|
|
179 Returntype : boolean
|
|
180 Exceptions : None
|
|
181 Caller : General
|
|
182 Status : At Risk
|
|
183
|
|
184 =cut
|
|
185
|
|
186
|
|
187 sub result_feature_set{
|
|
188 my $self = shift;
|
|
189
|
|
190 $self->{'result_feature_set'} = shift if @_;;
|
|
191 return $self->{'result_feature_set'};
|
|
192 }
|
|
193
|
|
194
|
|
195 =head2 table_name
|
|
196
|
|
197 Arg [1] : (optional) string - table_name (experimental_chip, channel or input_set)
|
|
198 Example : $result_set->experiment_id($exp_id);
|
|
199 Description: Getter and setter for the table_name for this ResultSet.
|
|
200 Returntype : string
|
|
201 Exceptions : None
|
|
202 Caller : General
|
|
203 Status : At Risk
|
|
204
|
|
205 =cut
|
|
206
|
|
207
|
|
208 sub table_name{
|
|
209 my $self = shift;
|
|
210
|
|
211 if (@_){
|
|
212
|
|
213 if($self->{'table_name'} && ($self->{'table_name'} ne $_[0])){
|
|
214 throw("Cannot mix table name/types of a ResultSet");
|
|
215 }
|
|
216
|
|
217 $self->{'table_name'} = $_[0];
|
|
218 }
|
|
219
|
|
220 return $self->{'table_name'};
|
|
221 }
|
|
222
|
|
223
|
|
224
|
|
225 =head2 add_table_id
|
|
226
|
|
227 Example : $result_set->add_table_id($ec_id, $cc_id);
|
|
228 Description: Caches table_id result_set_input_id to the ResultSet. In the case of an
|
|
229 array ResultSet, the unique result_set_input_id is used to key into the
|
|
230 result table, it also reduces redundancy and enable mapping of results to chips
|
|
231 rather than just the ResultSet. This enables result retrieval
|
|
232 based on chips in the same set which have a differing status.
|
|
233 In the case of a sequencing ResultSet, this simply refers to the InputSet ids.
|
|
234 Returntype : None
|
|
235 Exceptions : Throws if no table_id defined
|
|
236 Caller : General
|
|
237 Status : At Risk
|
|
238
|
|
239 =cut
|
|
240
|
|
241 sub add_table_id {
|
|
242 my ($self, $table_id, $cc_id) = @_;
|
|
243
|
|
244 if (! defined $table_id){
|
|
245 throw("Need to pass a table_id");
|
|
246 }else{
|
|
247
|
|
248 if((exists $self->{'table_id_hash'}->{$table_id}) && (defined $self->{'table_id_hash'}->{$table_id})){
|
|
249 throw("You are attempting to redefine a result_set_input_id which is already defined");
|
|
250 }
|
|
251
|
|
252 $self->{'table_id_hash'}->{$table_id} = $cc_id;
|
|
253
|
|
254 }
|
|
255
|
|
256 return;
|
|
257 }
|
|
258
|
|
259
|
|
260 =head2 table_ids
|
|
261
|
|
262 Example : $result_set->feature_group_id($fg_id);
|
|
263 Description: Getter and setter for the feature_group_id for this ResultSet.
|
|
264 Returntype : int
|
|
265 Exceptions : None
|
|
266 Caller : General
|
|
267 Status : At Risk
|
|
268
|
|
269 =cut
|
|
270
|
|
271 sub table_ids {
|
|
272 my $self = shift;
|
|
273
|
|
274 return [ keys %{$self->{'table_id_hash'}} ];
|
|
275 }
|
|
276
|
|
277
|
|
278 sub chip_channel_ids {
|
|
279 my $self = shift;
|
|
280
|
|
281 deprecate('ResultSet::chip_channel_ids is deprecated, please use result_set_input_ids');
|
|
282
|
|
283 return $self->result_set_input_ids;
|
|
284 }
|
|
285
|
|
286 =head2 result_set_input_ids
|
|
287
|
|
288 Example : my @rset_rsi_ids = @{$result_set->result_set_input_ids()};
|
|
289 Description: Getter for the input ids for this ResultSet.
|
|
290 Returntype : arrayref
|
|
291 Exceptions : None
|
|
292 Caller : General
|
|
293 Status : At Risk
|
|
294
|
|
295 =cut
|
|
296
|
|
297
|
|
298 sub result_set_input_ids {
|
|
299 my $self = shift;
|
|
300
|
|
301 return [ values %{$self->{'table_id_hash'}} ];
|
|
302 }
|
|
303
|
|
304
|
|
305
|
|
306
|
|
307 =head2 contains
|
|
308
|
|
309 Example : if($result_set->contains($chip_or_channel)){...do some chip or channel erpartions here...};
|
|
310 Description: Returns true if the given Channel or ExperimentalChip is part of this ResultSet
|
|
311 Returntype : boolean
|
|
312 Exceptions : warns if ResultSet table name is not of argument type
|
|
313 Caller : General
|
|
314 Status : At Risk
|
|
315
|
|
316 =cut
|
|
317
|
|
318
|
|
319 sub contains{
|
|
320 my ($self, $chip_channel) = @_;
|
|
321
|
|
322 my $contains = 0;
|
|
323 my @tables = $chip_channel->adaptor->_tables();
|
|
324 my ($table_name, undef) = @{$tables[0]};
|
|
325
|
|
326 if($table_name ne $self->table_name()){
|
|
327 warn("ResultSet(".$self->table_name().") cannot contain ${table_name}s");
|
|
328 }else{
|
|
329 $contains = 1 if (exists $self->{'table_id_hash'}->{$chip_channel->dbID()});
|
|
330 }
|
|
331
|
|
332 return $contains;
|
|
333 }
|
|
334
|
|
335 =head2 get_result_set_input_id
|
|
336
|
|
337 Arg [1] : int - dbID (experimental_chip, channel or input_set)
|
|
338 Example : $result_set->get_result_set_input_id($ec_id);
|
|
339 Description: Retrieves a result_set_input_id from the cache given a dbID
|
|
340 Returntype : int
|
|
341 Exceptions : none
|
|
342 Caller : General
|
|
343 Status : At Risk
|
|
344
|
|
345 =cut
|
|
346
|
|
347 sub get_result_set_input_id{
|
|
348 my ($self, $table_id) = @_;
|
|
349
|
|
350 return (exists $self->{'table_id_hash'}->{$table_id}) ? $self->{'table_id_hash'}->{$table_id} : undef;
|
|
351 }
|
|
352
|
|
353
|
|
354 sub get_chip_channel_id{
|
|
355 my ($self, $table_id) = @_;
|
|
356
|
|
357 deprecate('ResultSet::get_chip_channel_ids is dperecated, please us get_result_set_input_id');
|
|
358 return $self->get_result_set_input_id($table_id);
|
|
359 }
|
|
360
|
|
361
|
|
362
|
|
363 =head2 get_InputSets
|
|
364
|
|
365 Example : my @ecs = @{$result_set->get_ExperimentalChips()};
|
|
366 Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID
|
|
367 Returntype : Listref of ExperimentalChip object
|
|
368 Exceptions : warns is not an experimental_chip ResultSet
|
|
369 Caller : General
|
|
370 Status : At Risk
|
|
371
|
|
372 =cut
|
|
373
|
|
374 sub get_InputSets{
|
|
375 my $self = shift;
|
|
376
|
|
377 if($self->table_name ne 'input_set'){
|
|
378 warn 'Cannot get_InputSets for an array based ResultSet';
|
|
379 return;
|
|
380 }
|
|
381
|
|
382
|
|
383
|
|
384 if(! defined $self->{'input_sets'}){
|
|
385 my $is_adaptor = $self->adaptor->db->get_InputSetAdaptor();
|
|
386
|
|
387 foreach my $is_id(@{$self->table_ids()}){
|
|
388 push @{$self->{'input_sets'}}, $is_adaptor->fetch_by_dbID($is_id);
|
|
389 }
|
|
390 }
|
|
391
|
|
392 return $self->{'input_sets'};
|
|
393 }
|
|
394
|
|
395
|
|
396 =head2 get_ExperimentalChips
|
|
397
|
|
398 Example : my @ecs = @{$result_set->get_ExperimentalChips()};
|
|
399 Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID
|
|
400 Returntype : Listref of ExperimentalChip object
|
|
401 Exceptions : warns is not an experimental_chip ResultSet
|
|
402 Caller : General
|
|
403 Status : At Risk
|
|
404
|
|
405 =cut
|
|
406
|
|
407 sub get_ExperimentalChips{
|
|
408 my $self = shift;
|
|
409
|
|
410 if($self->table_name eq 'input_set'){
|
|
411 warn 'Cannot get_ExperimentalChips for an InputSet ResultSet';
|
|
412 return;
|
|
413 }
|
|
414
|
|
415 if(! defined $self->{'experimental_chips'}){
|
|
416 my $ec_adaptor = $self->adaptor->db->get_ExperimentalChipAdaptor();
|
|
417
|
|
418 if($self->table_name() eq "experimental_chip"){
|
|
419
|
|
420 foreach my $ec_id(@{$self->table_ids()}){
|
|
421 #warn "Getting ec with id $ec_id";
|
|
422 push @{$self->{'experimental_chips'}}, $ec_adaptor->fetch_by_dbID($ec_id);
|
|
423 #should this be hashed on chip_channel_id?
|
|
424 }
|
|
425 }else{
|
|
426 #warn("Retrieving ExperimentalChips for a Channel ResultSet");
|
|
427
|
|
428 my %echips;
|
|
429 my $chan_adaptor = $self->adaptor->db->get_ChannelAdaptor();
|
|
430
|
|
431 foreach my $chan_id(@{$self->table_ids()}){
|
|
432 my $chan = $chan_adaptor->fetch_by_dbID($chan_id);
|
|
433 $echips{$chan->experimental_chip_id} ||= $ec_adaptor->fetch_by_dbID($chan->experimental_chip_id);
|
|
434 }
|
|
435
|
|
436 @{$self->{'experimental_chips'}} = values %echips;
|
|
437 }
|
|
438 }
|
|
439
|
|
440 return $self->{'experimental_chips'};
|
|
441 }
|
|
442
|
|
443
|
|
444
|
|
445 =head2 get_replicate_set_by_result_set_input_id
|
|
446
|
|
447 Arg[0] : int - chip_channel_id
|
|
448 Example : my $rep_set_name = $result_set->get_replicate_set_by_result_set_input_id($cc_id);
|
|
449 Description: Retrieves the replicate set name defined by the corresponding ExperimentalChip
|
|
450 Returntype : String - replicate set name
|
|
451 Exceptions :
|
|
452 Caller : General
|
|
453 Status : At Risk - implement for Channels?
|
|
454
|
|
455 =cut
|
|
456
|
|
457 #Where is this used?
|
|
458
|
|
459 sub get_replicate_set_by_result_set_input_id{
|
|
460 my ($self, $cc_id) = @_;
|
|
461
|
|
462 if( ! defined $self->{'_replicate_cache'}){
|
|
463
|
|
464 warn "Generating replicate cache!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
|
|
465
|
|
466
|
|
467 foreach my $ec (@{$self->get_ExperimentalChips()}){
|
|
468
|
|
469 $self->{'_replicate_cache'}{$self->get_result_set_input_id($ec->dbID())} = $ec->replicate();
|
|
470
|
|
471
|
|
472 }
|
|
473 }
|
|
474
|
|
475
|
|
476 #warn here of absent replicate info?
|
|
477
|
|
478 return (exists $self->{'_replicate_cache'}{$cc_id}) ? $self->{'_replicate_cache'}{$cc_id} : undef;
|
|
479
|
|
480 }
|
|
481
|
|
482 sub get_replicate_set_by_chip_channel_id{
|
|
483 my ($self, $cc_id) = @_;
|
|
484
|
|
485 deprecate('Please use get_replicate_set_by_result_set_input_id instead');
|
|
486 return $self->get_replicate_set_by_result_set_input_id($cc_id);
|
|
487 }
|
|
488
|
|
489
|
|
490 =head2 get_displayable_ResultFeatures_by_Slice
|
|
491
|
|
492 Arg[1] : Bio::EnsEMBL::Slice
|
|
493 Arg[2] : Boolean - with probe flag, will nest Probe object in ResultFeature
|
|
494 Example : my @results = @{$ResultSet->get_all_displayable_ResultFeatures_by_Slice($slice)};
|
|
495 Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet
|
|
496 Returntype : Arrayref of ResultFeatures
|
|
497 Exceptions : None
|
|
498 Caller : General
|
|
499 Status : At Risk
|
|
500
|
|
501 =cut
|
|
502
|
|
503
|
|
504 sub get_displayable_ResultFeatures_by_Slice{
|
|
505 my ($self, $slice, $with_probe, $max_bins, $window_size, $constraint) = @_;
|
|
506 return $self->adaptor->fetch_ResultFeatures_by_Slice_ResultSet($slice, $self, 'DISPLAYABLE', $with_probe, $max_bins, $window_size, $constraint);
|
|
507 }
|
|
508
|
|
509
|
|
510
|
|
511
|
|
512 =head2 get_ResultFeatures_by_Slice
|
|
513
|
|
514 Arg[1] : Bio::EnsEMBL::Slice
|
|
515 Arg[2] : string - Status name e.g. 'DISPLAYABLE'
|
|
516 Arg[3] : Boolean - with probe flag, will nest Probe object in ResultFeature
|
|
517 Arg[4] : int - Max bins i.e. pixel width of display
|
|
518 Arg[5] : int - window_size
|
|
519 Arg[6] : string - constraint
|
|
520 Example : my @rfs_with_rpobe = @{$ResultSet->get_all_ResultFeatures_by_Slice($slice, undef, 1)};
|
|
521 Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet
|
|
522 Returntype : Arrayref of ResultFeatures
|
|
523 Exceptions : None
|
|
524 Caller : General
|
|
525 Status : At Risk
|
|
526
|
|
527 =cut
|
|
528
|
|
529 sub get_ResultFeatures_by_Slice{
|
|
530 my ($self, $slice, $status, $with_probe, $max_bins, $window_size, $constraint) = @_;
|
|
531 return $self->adaptor->db->get_ResultFeatureAdaptor->fetch_all_by_Slice_ResultSet($slice, $self, $status, $with_probe, $max_bins, $window_size, $constraint);
|
|
532 }
|
|
533
|
|
534
|
|
535
|
|
536 #Floats unpack inaccurately so need 3 sigfiging
|
|
537 #This should match the format in which they are originally stored
|
|
538 #This is dependant on ResultSet type i.e. reads or intensity?
|
|
539 #No format for reads!
|
|
540 #Should this be set in the ResultSet instead?
|
|
541 #It may be more efficient for the caller to test for format first rather than blindly printf'ing
|
|
542 #even if there is no format?
|
|
543 #This needs setting in new, so we don't have to eval for every score.
|
|
544
|
|
545 sub score_format{
|
|
546 return '%.3f';
|
|
547 }
|
|
548
|
|
549
|
|
550
|
|
551
|
|
552 =head2 log_label
|
|
553
|
|
554 Example : print $rset->log_label();
|
|
555 Description: Get a string of the unique key fields for logging purposes
|
|
556 Returntype : string
|
|
557 Exceptions : None
|
|
558 Caller : General
|
|
559 Status : At Risk
|
|
560
|
|
561 =cut
|
|
562
|
|
563 sub log_label {
|
|
564 my $self = shift;
|
|
565
|
|
566 my $label;
|
|
567
|
|
568 if(defined $self->feature_type()){
|
|
569 $label = $self->feature_type->name.":";
|
|
570 }else{
|
|
571 $label = "Unknown FeatureType:";
|
|
572 }
|
|
573
|
|
574 if(defined $self->cell_type()){
|
|
575 $label .= $self->cell_type->name;
|
|
576 }else{
|
|
577 $label .= "Uknown CellType";
|
|
578 }
|
|
579
|
|
580 return $self->name.":".$self->analysis->logic_name.":".$label;
|
|
581 }
|
|
582
|
|
583
|
|
584
|
|
585 1;
|