comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/InputSet.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 #
2 # Ensembl module for Bio::EnsEMBL::Funcgen::InputSet
3 #
4
5 =head1 LICENSE
6
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and
8 Genome Research Limited. All rights reserved.
9
10 This software is distributed under a modified Apache license.
11 For license details, please see
12
13 http://www.ensembl.org/info/about/code_licence.html
14
15 =head1 CONTACT
16
17 Please email comments or questions to the public Ensembl
18 developers list at <ensembl-dev@ebi.ac.uk>.
19
20 Questions may also be sent to the Ensembl help desk at
21 <helpdesk@ensembl.org>.
22
23 =head1 NAME
24
25 Bio::EnsEMBL::InputSet - A module to represent InputSet object.
26
27
28 =head1 SYNOPSIS
29
30 use Bio::EnsEMBL::Funcgen::InputSet;
31
32 #Create an InputSet
33
34 my $inp_set = Bio::EnsEMBL::Funcgen::InputSet->new
35 (
36 -DBID => $dbID,
37 -ADAPTOR => $self,
38 -EXPERIMENT => $exp,
39 -FEATURE_TYPE => $ftype,
40 -CELL_TYPE => $ctype,
41 -FORMAT => 'READ_FORMAT',
42 -VENDOR => 'SOLEXA',
43 -NAME => 'ExpSet1',
44 -REPLICATE => 1,
45 );
46
47 # Add some InputSubsets
48
49 $inp_set->add_new_subsets($subset_name, $
50
51
52
53
54 =head1 DESCRIPTION
55
56 An InputSet object provides a generic container for any non-array based feature import,
57 allowing tracking of file import via the status table and integration into Data and FeatureSets to
58 provide traceability to the source experiment from a given FeatureSet.
59
60 =cut
61
62 use strict;
63 use warnings;
64
65 package Bio::EnsEMBL::Funcgen::InputSet;
66
67 use Bio::EnsEMBL::Funcgen::InputSubset;
68 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
69 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate);
70 use Bio::EnsEMBL::Funcgen::Set;
71 use Bio::EnsEMBL::Analysis;
72
73 use vars qw(@ISA);
74 @ISA = qw(Bio::EnsEMBL::Funcgen::Set);
75
76
77 =head2 new
78
79
80
81 Example : my $eset = Bio::EnsEMBL::Funcgen::InputSet->new(
82 -EXPERIMENT => $exp,
83 -FEATURE_TYPE => $ftype,
84 -CELL_TYPE => $ctype,
85 -FORMAT => 'READ_FORMAT',
86 -VENDOR => 'SOLEXA',
87 -NAME => 'ExpSet1',
88 -ANALYSIS => $anal,
89 -FEATURE_CLASS => 'annotated',
90 );
91
92 Do we want to define subsets likes this or are we more likely to add them one by one?
93
94 Description: Constructor for InputSet objects.
95 Returntype : Bio::EnsEMBL::Funcgen::InputSet
96 Exceptions : Throws if no Experiment defined
97 Throws if CellType or FeatureType are not valid or stored
98 Caller : General
99 Status : At risk
100
101 =cut
102
103 sub new {
104 my $caller = shift;
105
106 my $class = ref($caller) || $caller;
107
108 #Add set_type here to overwrite default ref parsing in Set::set_type
109 #This need to stay like this until we patch the DB
110 my $self = $class->SUPER::new(@_);
111
112 my ($exp, $format, $vendor, $rep)
113 = rearrange(['EXPERIMENT', 'FORMAT', 'VENDOR', 'REPLICATE'], @_);
114
115 if (! (ref $exp && $exp->isa('Bio::EnsEMBL::Funcgen::Experiment') && $exp->dbID())){
116 throw('Must specify a valid stored Bio::EnsEMBL::Funcgen::Experiment');
117 }
118
119
120 #These are set in Set, just validate here
121 throw ('Must provide a FeatureType') if(! defined $self->feature_type);
122 throw ('Must provide a CellType') if(! defined $self->cell_type);
123
124 my $type = $self->feature_class;
125
126 #Need to move these types to config
127
128 if(! ($type && grep /^${type}$/, ('annotated', 'result', 'segmentation'))){
129 throw("You must define a valid InputSet feature_class e.g. 'annotated' or 'result'");
130 }
131
132 if(($type eq 'result') &&
133 ($format ne 'SEQUENCING')){
134 throw('InputSet does not yet support a result type InputSet which does not have the \'SEQUENCING\' format');
135
136 }
137
138
139 #if(! defined $self->analysis){
140 ##default analysis hack for v47
141 ##Set directly to avoid dbID boolean check
142 #This is to support supporting_set cache in data_set?
143 $self->{'analysis'} = Bio::EnsEMBL::Analysis->new
144 (-logic_name => 'external',
145 -id => 0,#??someone needs to rewrite analysis
146 );
147
148 #Change to direct setting for speed
149 $self->{format} = $format;
150 $self->{vendor} = $vendor;
151 $self->{replicate} = $rep;
152 $self->{experiment} = $exp;
153 $self->{subsets} = {};
154
155 return $self;
156 }
157
158
159 =head2 add_new_subset
160
161 Arg [1] : string - sub set name e.g. the file name (not path as we're restricted to 30 chars)
162 Arg [2] : Bio::EnsEMBL::Funcgen::InputSubset - optional
163 If not defined will create a sparse InputSubset based on the name
164 Example : $expset->add_new_subset($ss_name, $exp_subset);
165 Description: Adds input_subset
166 Returntype : none
167 Exceptions : Throws if set is already present
168 Throws if InputSubset is not valid or stored
169 Caller : General
170 Status : At Risk
171
172 =cut
173
174 #Do we still use the optional subset function?
175
176 sub add_new_subset {
177 my ($self, $ss_name, $exp_sset) = @_;
178
179 #Need to test $ss_name here
180 if(! ($ss_name && ref(\$ss_name) eq 'SCALAR')){#ref($exp_sset) would be 'REF'
181 throw('You must pass a InputSubset name');
182 }
183
184 if($self->get_subset_by_name($ss_name)){
185 throw("Subset $ss_name is already present in this InputSet, maybe you need to alter the filename?");
186 }
187
188 if(defined $exp_sset){
189
190 if(!(ref($exp_sset) && $exp_sset->isa('Bio::EnsEMBL::Funcgen::InputSubset') && $exp_sset->dbID())){
191 throw('InputSubsets must be valid and stored');
192 }
193 }
194 else{
195
196 $exp_sset = Bio::EnsEMBL::Funcgen::InputSubset->new(
197 -name => $ss_name,
198 -input_set => $self,
199 );
200 }
201
202 $self->{subsets}{$ss_name} = $exp_sset;
203
204 return $self->{subsets}{$ss_name};
205 }
206
207
208 =head2 get_Experiment
209
210 Example : my $exp = $exp_set->get_Experiment();
211 Description: Getter for the Experiment of this DataSet.
212 Returntype : Bio::EnsEMBL::Fuuncgen::Experiment
213 Exceptions : None
214 Caller : General
215 Status : At Risk
216
217 =cut
218
219 sub get_Experiment{ return $_[0]->{experiment}; }
220
221
222 =head2 get_InputSubsets
223
224 Example : my @subsets = @{$exp_set->get_InputSubsets()};
225 Description: Getter for the InputSubsets for this InputSet.
226 Returntype : Arrayref
227 Exceptions : None
228 Caller : General
229 Status : At Risk
230
231 =cut
232
233 sub get_InputSubsets{
234 my ($self) = shift;
235
236 return [ values %{$self->{'subsets'}} ];
237 }
238
239
240
241
242 =head2 get_subset_by_name
243
244 Example : my $subsets = $exp_set->get_subset_by_name('subset1');
245 Description: Getter for the subset of a given name for this InputSet.
246 Returntype : Bio::EnsEMBL::Funcgen::InputSubset
247 Exceptions : None
248 Caller : General
249 Status : At Risk
250
251 =cut
252
253 sub get_subset_by_name{
254 my ($self, $name) = @_;
255 return (exists $self->{'subsets'}{$name}) ? $self->{'subsets'}{$name} : undef;
256 }
257
258
259 =head2 get_subset_names
260
261 Example : my @subset_names = @{$exp_set->get_subset_names()};
262 Description: Getter for the subset names for this InputSet.
263 Returntype : Arrayref
264 Exceptions : None
265 Caller : General
266 Status : At Risk
267
268 =cut
269
270 sub get_subset_names{
271 my ($self) = shift;
272 return [ keys %{$self->{'subsets'}} ];
273 }
274
275
276
277
278 =head2 vendor
279
280 Arg[1] : String - vendor e.g. ILLUMINA
281 Example : my $iset_vendor = $iset->vendor;
282 Description: Getter for the vendor attribute of this InputSet.
283 Returntype : String
284 Exceptions : None
285 Caller : General
286 Status : At Risk
287
288 =cut
289
290 sub vendor { return $_[0]->{vendor}; }
291
292
293 =head2 format
294
295 Arg[1] : string - format i.e. product type/format
296 Example : my $iset_format = $iset->format;
297 Description: Getter for the format attribute of this InputSet.
298 Returntype : String
299 Exceptions : None
300 Caller : General
301 Status : At Risk
302
303 =cut
304
305 sub format { return $_[0]->{format}; }
306
307
308 =head2 replicate
309
310 Arg[1] : Integer - replicate 0 = merged or NA, >0 refers to individual replicate
311 Example : if($iset->replicate){ #Do something replicate specific in here }
312 Description: Getter for the replicate attribute of this InputSet.
313 Returntype : Integer
314 Exceptions : None
315 Caller : General
316 Status : At Risk
317
318 =cut
319
320 sub replicate { return $_[0]->{replicate}; }
321
322
323
324 =head2 source_info
325
326 Example : my $source_info = $input_set->source_info;
327 Description: Getter for the experiment source info i.e. [ $label, $url ]
328 Returntype : Listref
329 Exceptions : None
330 Caller : General
331 Status : At risk
332
333 =cut
334
335 #Currently handling redundant/absent InputSubset data
336
337 sub source_info{
338 my $self = shift;
339
340 if(! defined $self->{source_info}){
341 #could have data_url as highest priority here
342 #but we need to ensure removal when adding archive ids
343 #so we link to the archive and not the old data url
344
345 my $exp_group = $self->get_Experiment->experimental_group;
346 my %source_info; #Handles redundant InputSubsets
347 my ($proj_name, $proj_link, $source_label, $source_link);
348
349 if($exp_group->is_project){
350 $proj_name = $exp_group->name;
351 $proj_link = $exp_group->url;
352 }
353
354 foreach my $isset(@{$self->get_InputSubsets}){
355
356 if(defined $isset->archive_id ){
357 $source_label = $isset->archive_id;
358
359 if(! exists $source_info{$source_label}){
360 $source_info{$source_label} = [$source_label, undef];
361 #source_link can is undef here as archive_id overrides display url
362 #undef links will automatically go to the SRA
363 }
364 }
365 elsif(defined $proj_name){
366 #$source_label = $self->experimental_group->name;
367 $source_link = $isset->display_url || $proj_link;
368
369 if(! exists $source_info{$source_link}){
370 $source_info{$source_link} = [$proj_name, $source_link];
371 }
372 }
373 }
374
375 $self->{source_info} = [values %source_info];
376 }
377
378 return $self->{source_info};
379 }
380
381
382
383 1;
384