comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/DataSet.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 #
2 # Ensembl module for Bio::EnsEMBL::Funcgen::DataSet
3 #
4 # You may distribute this module under the same terms as Perl itself
5
6
7 =head1 LICENSE
8
9 Copyright (c) 1999-2011 The European Bioinformatics Institute and
10 Genome Research Limited. All rights reserved.
11
12 This software is distributed under a modified Apache license.
13 For license details, please see
14
15 http://www.ensembl.org/info/about/code_licence.html
16
17 =head1 CONTACT
18
19 Please email comments or questions to the public Ensembl
20 developers list at <ensembl-dev@ebi.ac.uk>.
21
22 Questions may also be sent to the Ensembl help desk at
23 <helpdesk@ensembl.org>.
24
25
26 =head1 NAME
27
28 Bio::EnsEMBL::Funcgen::DataSet - A module to represent DataSet object.
29
30
31 =head1 SYNOPSIS
32
33 use Bio::EnsEMBL::Funcgen::DataSet;
34
35 my $data_set = Bio::EnsEMBL::Funcgen::DataSet->new(
36 -DBID => $dbID,
37 -ADAPTOR => $self,
38 -SUPPORTING_SETS => [$rset],
39 -FEATURE_SET => $fset,
40 -DISPLAYABLE => 1,
41 -NAME => 'DATASET1',
42 );
43
44
45
46 =head1 DESCRIPTION
47
48 A DataSet object provides access to either or both raw results and AnnotatedFeatures
49 for a given experiment within a Slice, associated with set wide experimental meta data.
50 This was aimed primarily at easing access to data via the web API by creating
51 a wrapper class with convenience methods. The focus of this class is to contain raw and
52 associated processed/analysed data to be displayed as a set within the browser i.e. an
53 experiment may have different cell lines, features or time points, these would require different DataSets.
54 # However a DataSet may contain mixed data types i.e. promoter & histone???? No give separate sets?
55 May have duplicates for raw data but only one predicted features track??
56 The data in this class is kept as lightweight as possible with data being loaded dynamically.
57
58
59 =cut
60
61 use strict;
62 use warnings;
63
64 package Bio::EnsEMBL::Funcgen::DataSet;
65
66 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
67 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate);
68 use Bio::EnsEMBL::Funcgen::Storable;
69
70 use vars qw(@ISA);
71 @ISA = qw(Bio::EnsEMBL::Funcgen::Storable);
72 #Should not be a Set as is sufficiently different
73
74
75 =head2 new
76
77
78
79 Example : my $dset = Bio::EnsEMBL::Funcgen::DataSet->new(
80 -SUPPORTING_SETS => [$fset1, $fset2],
81 -FEATURE_SET => $fset,
82 -DISPLAYABLE => 1,
83 -NAME => 'DATASET1',
84 );
85
86 Description: Constructor for DataSet objects.
87 Returntype : Bio::EnsEMBL::Funcgen::DataSet
88 Exceptions : Throws if no experiment_id defined
89 Caller : General
90 Status : At risk
91
92 =cut
93
94 sub new {
95 my $caller = shift;
96
97 my $class = ref($caller) || $caller;
98
99 my $self = $class->SUPER::new(@_);
100
101 #do we need to add $fg_ids to this? Currently maintaining one feature_group focus.(combi exps?)
102 my ($fset, $sets, $name)
103 = rearrange(['FEATURE_SET', 'SUPPORTING_SETS', 'NAME'], @_);
104
105
106 my @caller = caller();
107
108 #do we need to passexperiment_id to check that table_name/id correspond for storage?
109 #'EXPERIMENT_ID', 'EXPERIMENT_IDS',
110
111 #Can have more than one experiment_id for a combined feature set. But shouldn't query like that.
112 #therefore we need to be able to track back from feature to ec's rather than exps.
113 #as there may be mixed data in an exp which didn't necessarily contribute to the combined feature
114 #We are now separating potentially different featuretype from the same exp into different result_groups
115 #therefore we only have to track back to the result_group e.g. the contig chip set
116
117 #We also need a way of pulling back GOLDEN/combined resultssets based on feature_set_id
118 #Set status as GOLDEN, then pull back displayable or GOLDEN raw results
119
120 #Could link experiment_feature_type/feature_set to ec or result_set table?
121 #latter would mean we don't have to specifiy which ec, just part of set.
122 #This will make it easier for populating pfs but will mean that we can't easily track back to a particular ec without doing some probe/slice look up via the array chip.
123 #Not really a requirement, so let's take this hit.
124
125 #Could then maybe use DataSet to store pfs, otherwise we'd have to pass the rset or at the very least the result_set_id.
126 #do we need some control of creating new objects with dbID and adding result_groups/feature_sets and them storing/updating them
127 #potential for someone to create one from new using a duplicate dbID and then linking incorrect data to a pre-existing ResultGroup
128 #can we check wether caller is DataSetAdaptor if we have dbID?
129
130 if($self->dbID() && $caller[0] ne "Bio::EnsEMBL::Funcgen::DBSQL::DataSetAdaptor"){
131 throw('You must use the DataSetAdaptor to generate DataSets with dbID i.e. from the DB,'.
132 ' as this module accomodates updating which may cause incorrect data if the object'.
133 ' is not generated from the DB');
134 }
135
136
137 $self->{'supporting_sets'} ||= {};
138 #throw("Must specify at least one Result/FeatureSet") if((! $sets) && (! $fset));
139 #removed this to allow generation of DataSets without feature sets
140 #could reimplement this if we change the DataSetAdaptor::_obj_from_sth
141
142 $self->add_supporting_sets($sets) if $sets;
143 $self->product_FeatureSet($fset) if $fset;
144 $self->name($name) if $name;
145
146 return $self;
147 }
148
149
150
151
152
153
154
155 #methods
156 #set wide display label(predicted_feature) + more wordy label for wiggle tracks?
157 #defined by experiment type i.e. time course would require timepoint in display label
158 #deal with this dynamically or have display_label in table
159 #Need call on type, or fetch all would
160
161 #_get_ec_ids or contigsets?
162 #this should now be an intrinsic part of this class/adaptor
163
164 #cell line
165 #feature_type
166 #displayable...should have one for the whole set and one for each raw and predicted?
167
168 #have analysis as arg? Or do we get all analysis sets?
169 #we need to be able to set analyses for DataSets dynamically from DB
170 #pick up all DataSets
171 #displayable field in DataSets also?
172
173 #If we have mixed types in the same experiment then we could get promoter features and histone wiggle tracks displayed togeter
174 #Not v.good for display purposes? We may want to separate the promoter and histone tracks, or we may want ll the experiment data together but of mixed types.
175 #We need to be able to pull back the experiment type for each set, therefore this needs setting on an ec level, not an experiment level.
176 #This is also v.reliant on putting contig set info in place, otherwise we may get mixed chip types in same set.
177
178 #get_raw_analysis_name
179 #get_predicted_feature_analysis_name
180 #set ResultFeatures and AnnotatedFeatures in hash keyed by analysis_name?
181
182 #Need to change to simple accessor
183 #or should we maintain to provide explicit method for delineating between parent and supporting FeatureSets?
184 #yes, and sub the feature_type/cell_type checks
185
186
187 =head2 product_FeatureSet
188
189 Arg [1] : (optional) Bio::EnsEMBL::Funcgen::FeatureSet
190 Example : $data_set->product_FeatureSet($fset);
191 Description: Getter and setter for the main feature_set attribute for this DataSet.
192 Returntype : Bio::EnsEMBL::Funcgen::FeatureSet
193 Exceptions : Throws not a valid FeatureSet or if main feature_set has already been set.
194 Caller : General
195 Status : At Risk - change to get_product_FeatureSet
196
197 =cut
198
199 sub product_FeatureSet {
200 my ($self, $fset) = @_;
201
202 if($fset){
203
204 if (! ($fset && ref($fset) && $fset->isa("Bio::EnsEMBL::Funcgen::FeatureSet"))){
205 throw("Need to pass a valid Bio::EnsEMBL::Funcgen::FeatureSet")
206 }
207
208 if(defined $self->{'feature_set'}){
209 throw("The main feature_set has already been set for this DataSet, maybe you want add_SupportingSets?");
210 }
211 else{
212 $self->_validate_and_set_types($fset);
213 $self->{'feature_set'} = $fset;
214 }
215 }
216
217 return $self->{'feature_set'};
218 }
219
220
221 =head2 add_supporting_sets
222
223 Arg [1] : Array of Bio::EnsEMBL::Feature/ResultSet object
224 Example : $dset->add_supporting_sets($rset);
225 Description: Adds Result/FeatureSets to the DataSet
226 Returntype : none
227 Exceptions : Throws if set not valid for supporting_set type of DataSet
228 Throws if supporting_sets is not an array ref
229 Caller : General
230 Status : At Risk
231
232 =cut
233
234
235 sub add_supporting_sets {
236 my ($self, $sets) = @_;
237
238 #should we handle displayable here, and propogate to the ResultSet if update_status is set
239 #is there scope to write a Funcgen::Storable, which provides convenience methods to StatusAdaptor?
240 #would have to make sure Feature object also inherited from Funcgen::Storable aswell as BaseFeature
241
242 throw("Supporting sets need to be a reference to an ARRAY:\t".$sets) if ref($sets) ne 'ARRAY';
243
244 foreach my $set(@$sets){
245
246 if(!(ref($set) && $set->isa('Bio::EnsEMBL::Funcgen::Set') && $set->set_type ne 'data' && $set->dbID)){
247 throw("Need to pass a valid stored Bio::EnsEMBL::Funcgen::Set which is not a DataSet:\t$set");
248 }
249 #set type cannot be data at present as it does not inherit from Set.pm
250
251
252
253 #Only validate if we are dealing with result type data
254 #As we can have various cell/feature_types for compound analyses e.g. RegulatoryFeatures
255
256 $self->_validate_and_set_types($set) if $set->set_type() ne 'feature';
257
258 #should ResultSet/Adaptor contain all the fetch_methods, and leave DataSet as a kind of organisational class as a single point of access.
259 #DataSetAdaptor to perform the ordering according to feature/celltype
260 #This will still not resolve the complex data sets which can be accomodated by the DB.
261 #Maybe we can keep the data sets as simple as there are and confer the association by tracking back to the experiment?
262 #Would there only ever be one experiment for a complex data_set?
263
264
265 #Can have more than one experiment for a compound feature set, would we ever want to display raw data?
266 #This is actually an easier problem unless we are displaying two feature types(i.e. complex and compound)
267
268
269 $self->{'supporting_sets'}->{$set->analysis->dbID()} ||= ();
270 push @{$self->{'supporting_sets'}->{$set->analysis->dbID()}}, $set;
271 }
272
273 return;
274 }
275
276
277 =head2 _validate_and_set_types
278
279 Arg [1] : Bio::EnsEMBL::Feature/ResultSet object
280 Example : $dset->_validate_and_set_types($rset);
281 Description: Validates and sets DataSet cell and feature types
282 Returntype : none
283 Exceptions : Throws if types not valid
284 Caller : General
285 Status : At Risk
286
287 =cut
288
289
290 sub _validate_and_set_types{
291 my ($self, $set) = @_;
292
293 #slightly dodgy bypassing methods, but extendable
294
295 #This currently restricts all set types to one cell and feature type
296 #this is incorrect for feature_set types as we want to munge several feature and possibly cell types
297 #into one combined data set.
298 #this should set it to the FeatureSet type if is feature_set data_set
299 #this only works as we only validate supporting_sets if type is not feature
300
301 for my $type('feature_type', 'cell_type'){
302
303 if(defined $self->{$type}){
304
305 #Need to test isa here? Why is this passing the defined test if not set?
306 if($set->{$type}->name() ne $self->{$type}->name()){
307
308 throw(ref($set)." $type(".$set->{$type}->name().
309 ") does not match DataSet $type(".$self->{$type}->name().")");
310
311 }
312 }
313 else{
314 $self->{$type} = $set->{$type};
315 }
316 }
317
318 return;
319 }
320
321
322
323 =head2 get_supporting_sets_by_Analysis
324
325 Arg [1] : Bio::EnsEMBL::Funcgen:Analysis
326 Arg [2] : (optional) status - e.g 'DISPLAYABLE'
327 Example : my $anal_sets = @{$result_set->get_ResultSets_by_Analysis($analysis)};
328 Description: Getter for the SupportingSet objects of a given Analysis.
329 Returntype : ARRAYREF
330 Exceptions : Throws if arg is not a valid stored Bio::EnsEMBL::Anaylsis
331 Caller : General
332 Status : At Risk
333
334 =cut
335
336 sub get_supporting_sets_by_Analysis {
337 my ($self, $analysis, $status) = @_;
338
339
340 my @rsets;
341
342
343 #should we handle displayable here, and propogate to the ResultSet if update_status is set
344 #is there scope to write a Funcgen::Storable, which provides convenience methods to StatusAdaptor?
345 #would have to make sure Feature object also inherited from Funcgen::Storable aswell as BaseFeature
346
347
348 if (! ($analysis->isa("Bio::EnsEMBL::Analysis") && $analysis->dbID())){
349 throw("Need to pass a valid stored Bio::EnsEMBL::Funcgen::ResultSet");
350 }
351
352 #will have to generate new array of object here if we want to filter displayable
353 #This may result in returning a ref to the stored ResultSets for no status
354 #And a ref to the abstracted/filtered i.e. non-stored ResultSets if we have a status
355 #This could cause problems if people want to edit the real ResultSets via the refs
356 #If we edit the ResultSets like this, we would still store via their adaptor
357 #so would need to refresh DataSet anyway.
358
359 #should ResultSet/Adaptor contain all the fetch_methods, and leave DataSet as a kind of organisational class as a single point of access.
360 #DataSetAdaptor to perform the ordering according to feature/celltype
361 #This will still not resolve the complex data sets which can be accomodated by the DB.
362 #Maybe we can keep the data sets as simple as there are and confer the association by tracking back to the experiment?
363 #Would there only ever be one experiment for a complex data_set?
364
365
366 #Can have more than one experiment for a compound feature set, would we ever want to display raw data?
367 #This is actually an easier problem unless we are displaying two feature types(i.e. complex and compound)
368
369 #could we have >1 rset with the same analysis?
370
371 foreach my $anal_rset(@{$self->{'supporting_sets'}->{$analysis->dbID()}}){
372
373 if(! defined $status){
374 push @rsets, $anal_rset;
375 }
376 elsif($anal_rset->has_status($status)){
377 push @rsets, $anal_rset;
378 }
379 }
380
381 return \@rsets;
382 }
383
384
385
386 =head2 get_supporting_sets
387
388 Arg [1] : (optional) status - e.g 'DISPLAYABLE'
389 Example : my @status_sets = @{$data_set->get_supporting_sets($status)};
390 Description: Getter for the ResultSets for this DataSet.
391 Returntype : Arrayref
392 Exceptions : None
393 Caller : General
394 Status : At Risk
395
396 =cut
397
398 sub get_supporting_sets{
399 my ($self, $status, $set_type) = @_;
400 #swap the args here
401
402 #Add analysis here and make above method wrapper
403
404 #Validate type here
405 if($set_type &&
406 ($set_type ne 'result' &&
407 $set_type ne 'feature' &&
408 $set_type ne 'input')){
409 throw("You have specified an invalid supporting set type:\t$set_type");
410 }
411
412
413 my @ssets;
414
415 foreach my $anal_id(keys %{$self->{'supporting_sets'}}){
416
417 foreach my $sset(@{$self->{'supporting_sets'}->{$anal_id}}){
418
419 if(defined $status &&
420 (! $sset->has_status($status))){
421 next;
422 }
423
424 if(defined $set_type &&
425 ($sset->set_type ne $set_type)){
426 next;
427 }
428
429 push @ssets, $sset;
430 }
431 }
432
433 return \@ssets;
434 }
435
436
437
438
439 =head2 get_displayable_supporting_sets
440
441 Example : my @displayable_rsets = @{$result_set->get_displayable_supporting_sets()};
442 Description: Convenience method for web display
443 Returntype : Arrayref
444 Exceptions : None
445 Caller : General
446 Status : At Risk
447
448 =cut
449
450 sub get_displayable_supporting_sets{
451 my ($self, $set_type) = @_;
452
453 return $self->get_supporting_sets('DISPLAYABLE', $set_type);
454 }
455
456
457
458 =head2 get_displayable_product_FeatureSet
459
460 Example : my $fset = $data_set->get_displayable_product_FeatureSet();
461 Description: Convenience method for web display
462 Returntype : Bio::EnsEMBL::Funcgen::FeatureSet
463 Exceptions : None
464 Caller : General
465 Status : At Risk
466
467 =cut
468
469 sub get_displayable_product_FeatureSet{
470 my $self = shift;
471
472 return $self->product_FeatureSet->has_status('DISPLAYABLE') ? $self->product_FeatureSet() : undef;
473 }
474
475
476
477
478
479 =head2 name
480
481 Example : my $dset->name('DATASET1');
482 Description: Getter/Setter for the name of this DataSet.
483 Returntype : string
484 Exceptions : None
485 Caller : General
486 Status : At Risk
487
488 =cut
489
490 sub name {
491 my $self = shift;
492
493 $self->{'name'} = shift if @_;
494
495 return $self->{'name'};
496 }
497
498
499
500
501 #The following attributes are generated dynamically from the consituent Result/FeatureSets
502
503 =head2 cell_type
504
505 Example : my $dset_ctype_name = $dset->cell_type->name();
506 Description: Getter for the cell_type for this DataSet.
507 Returntype : Bio::EnsEMBL::Funcgen::CellType
508 Exceptions : None
509 Caller : General
510 Status : At Risk
511
512 =cut
513
514 sub cell_type {
515 my $self = shift;
516
517 return $self->{'cell_type'};
518 }
519
520 =head2 feature_type
521
522 Example : my $dset_ftype_name = $dset->feature_type->name();
523 Description: Getter for the feature_type for this DataSet.
524 Returntype : Bio::EnsEMBL::Funcgen::FeatureType
525 Exceptions : None
526 Caller : General
527 Status : At Risk
528
529 =cut
530
531 sub feature_type {
532 my $self = shift;
533
534 return $self->{'feature_type'};
535 }
536
537
538
539
540
541 =head2 display_label
542
543 Example : print $rset->display_label();
544 Description: Getter for the display_label attribute for this DataSet.
545 This is more appropriate for teh predicted_features of the set.
546 Use the individual display_labels for each raw result set.
547 Returntype : str
548 Exceptions : None
549 Caller : General
550 Status : At Risk
551
552 =cut
553
554 sub display_label {
555 my $self = shift;
556
557
558 #Add display label in table?
559
560 if(! $self->{'display_label'}){
561
562 #This does not account for DataSet without a product FeatureSet
563 my $fset = $self->product_FeatureSet;
564
565 if($fset && ($fset->feature_type->class() eq 'Regulatory Feature')){
566 $self->{'display_label'} = 'Regulatory Features';
567 }
568 else{
569
570 $self->{'display_label'} = $self->feature_type->name()." -";
571 $self->{'display_label'} .= " ".($self->cell_type->display_label() ||
572 $self->cell_type->description() ||
573 $self->cell_type()->name());
574 $self->{'display_label'} .= " Enriched Sites";
575 }
576 }
577
578 return $self->{'display_label'};
579 }
580
581
582 #sub get_type_config{
583 # my ($self) = @_;
584 #
585 # if (! defined $self->{type_config}){
586 # $self->{type_config} = $self->adaptor->fetch_type_config_by_DataSet($self);
587 # }
588 #
589 # return $self->{type_config};
590 #}
591
592
593
594 1;
595