Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/InputSet.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # | |
| 2 # Ensembl module for Bio::EnsEMBL::Funcgen::InputSet | |
| 3 # | |
| 4 | |
| 5 =head1 LICENSE | |
| 6 | |
| 7 Copyright (c) 1999-2011 The European Bioinformatics Institute and | |
| 8 Genome Research Limited. All rights reserved. | |
| 9 | |
| 10 This software is distributed under a modified Apache license. | |
| 11 For license details, please see | |
| 12 | |
| 13 http://www.ensembl.org/info/about/code_licence.html | |
| 14 | |
| 15 =head1 CONTACT | |
| 16 | |
| 17 Please email comments or questions to the public Ensembl | |
| 18 developers list at <ensembl-dev@ebi.ac.uk>. | |
| 19 | |
| 20 Questions may also be sent to the Ensembl help desk at | |
| 21 <helpdesk@ensembl.org>. | |
| 22 | |
| 23 =head1 NAME | |
| 24 | |
| 25 Bio::EnsEMBL::InputSet - A module to represent InputSet object. | |
| 26 | |
| 27 | |
| 28 =head1 SYNOPSIS | |
| 29 | |
| 30 use Bio::EnsEMBL::Funcgen::InputSet; | |
| 31 | |
| 32 #Create an InputSet | |
| 33 | |
| 34 my $inp_set = Bio::EnsEMBL::Funcgen::InputSet->new | |
| 35 ( | |
| 36 -DBID => $dbID, | |
| 37 -ADAPTOR => $self, | |
| 38 -EXPERIMENT => $exp, | |
| 39 -FEATURE_TYPE => $ftype, | |
| 40 -CELL_TYPE => $ctype, | |
| 41 -FORMAT => 'READ_FORMAT', | |
| 42 -VENDOR => 'SOLEXA', | |
| 43 -NAME => 'ExpSet1', | |
| 44 -REPLICATE => 1, | |
| 45 ); | |
| 46 | |
| 47 # Add some InputSubsets | |
| 48 | |
| 49 $inp_set->add_new_subsets($subset_name, $ | |
| 50 | |
| 51 | |
| 52 | |
| 53 | |
| 54 =head1 DESCRIPTION | |
| 55 | |
| 56 An InputSet object provides a generic container for any non-array based feature import, | |
| 57 allowing tracking of file import via the status table and integration into Data and FeatureSets to | |
| 58 provide traceability to the source experiment from a given FeatureSet. | |
| 59 | |
| 60 =cut | |
| 61 | |
| 62 use strict; | |
| 63 use warnings; | |
| 64 | |
| 65 package Bio::EnsEMBL::Funcgen::InputSet; | |
| 66 | |
| 67 use Bio::EnsEMBL::Funcgen::InputSubset; | |
| 68 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
| 69 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate); | |
| 70 use Bio::EnsEMBL::Funcgen::Set; | |
| 71 use Bio::EnsEMBL::Analysis; | |
| 72 | |
| 73 use vars qw(@ISA); | |
| 74 @ISA = qw(Bio::EnsEMBL::Funcgen::Set); | |
| 75 | |
| 76 | |
| 77 =head2 new | |
| 78 | |
| 79 | |
| 80 | |
| 81 Example : my $eset = Bio::EnsEMBL::Funcgen::InputSet->new( | |
| 82 -EXPERIMENT => $exp, | |
| 83 -FEATURE_TYPE => $ftype, | |
| 84 -CELL_TYPE => $ctype, | |
| 85 -FORMAT => 'READ_FORMAT', | |
| 86 -VENDOR => 'SOLEXA', | |
| 87 -NAME => 'ExpSet1', | |
| 88 -ANALYSIS => $anal, | |
| 89 -FEATURE_CLASS => 'annotated', | |
| 90 ); | |
| 91 | |
| 92 Do we want to define subsets likes this or are we more likely to add them one by one? | |
| 93 | |
| 94 Description: Constructor for InputSet objects. | |
| 95 Returntype : Bio::EnsEMBL::Funcgen::InputSet | |
| 96 Exceptions : Throws if no Experiment defined | |
| 97 Throws if CellType or FeatureType are not valid or stored | |
| 98 Caller : General | |
| 99 Status : At risk | |
| 100 | |
| 101 =cut | |
| 102 | |
| 103 sub new { | |
| 104 my $caller = shift; | |
| 105 | |
| 106 my $class = ref($caller) || $caller; | |
| 107 | |
| 108 #Add set_type here to overwrite default ref parsing in Set::set_type | |
| 109 #This need to stay like this until we patch the DB | |
| 110 my $self = $class->SUPER::new(@_); | |
| 111 | |
| 112 my ($exp, $format, $vendor, $rep) | |
| 113 = rearrange(['EXPERIMENT', 'FORMAT', 'VENDOR', 'REPLICATE'], @_); | |
| 114 | |
| 115 if (! (ref $exp && $exp->isa('Bio::EnsEMBL::Funcgen::Experiment') && $exp->dbID())){ | |
| 116 throw('Must specify a valid stored Bio::EnsEMBL::Funcgen::Experiment'); | |
| 117 } | |
| 118 | |
| 119 | |
| 120 #These are set in Set, just validate here | |
| 121 throw ('Must provide a FeatureType') if(! defined $self->feature_type); | |
| 122 throw ('Must provide a CellType') if(! defined $self->cell_type); | |
| 123 | |
| 124 my $type = $self->feature_class; | |
| 125 | |
| 126 #Need to move these types to config | |
| 127 | |
| 128 if(! ($type && grep /^${type}$/, ('annotated', 'result', 'segmentation'))){ | |
| 129 throw("You must define a valid InputSet feature_class e.g. 'annotated' or 'result'"); | |
| 130 } | |
| 131 | |
| 132 if(($type eq 'result') && | |
| 133 ($format ne 'SEQUENCING')){ | |
| 134 throw('InputSet does not yet support a result type InputSet which does not have the \'SEQUENCING\' format'); | |
| 135 | |
| 136 } | |
| 137 | |
| 138 | |
| 139 #if(! defined $self->analysis){ | |
| 140 ##default analysis hack for v47 | |
| 141 ##Set directly to avoid dbID boolean check | |
| 142 #This is to support supporting_set cache in data_set? | |
| 143 $self->{'analysis'} = Bio::EnsEMBL::Analysis->new | |
| 144 (-logic_name => 'external', | |
| 145 -id => 0,#??someone needs to rewrite analysis | |
| 146 ); | |
| 147 | |
| 148 #Change to direct setting for speed | |
| 149 $self->{format} = $format; | |
| 150 $self->{vendor} = $vendor; | |
| 151 $self->{replicate} = $rep; | |
| 152 $self->{experiment} = $exp; | |
| 153 $self->{subsets} = {}; | |
| 154 | |
| 155 return $self; | |
| 156 } | |
| 157 | |
| 158 | |
| 159 =head2 add_new_subset | |
| 160 | |
| 161 Arg [1] : string - sub set name e.g. the file name (not path as we're restricted to 30 chars) | |
| 162 Arg [2] : Bio::EnsEMBL::Funcgen::InputSubset - optional | |
| 163 If not defined will create a sparse InputSubset based on the name | |
| 164 Example : $expset->add_new_subset($ss_name, $exp_subset); | |
| 165 Description: Adds input_subset | |
| 166 Returntype : none | |
| 167 Exceptions : Throws if set is already present | |
| 168 Throws if InputSubset is not valid or stored | |
| 169 Caller : General | |
| 170 Status : At Risk | |
| 171 | |
| 172 =cut | |
| 173 | |
| 174 #Do we still use the optional subset function? | |
| 175 | |
| 176 sub add_new_subset { | |
| 177 my ($self, $ss_name, $exp_sset) = @_; | |
| 178 | |
| 179 #Need to test $ss_name here | |
| 180 if(! ($ss_name && ref(\$ss_name) eq 'SCALAR')){#ref($exp_sset) would be 'REF' | |
| 181 throw('You must pass a InputSubset name'); | |
| 182 } | |
| 183 | |
| 184 if($self->get_subset_by_name($ss_name)){ | |
| 185 throw("Subset $ss_name is already present in this InputSet, maybe you need to alter the filename?"); | |
| 186 } | |
| 187 | |
| 188 if(defined $exp_sset){ | |
| 189 | |
| 190 if(!(ref($exp_sset) && $exp_sset->isa('Bio::EnsEMBL::Funcgen::InputSubset') && $exp_sset->dbID())){ | |
| 191 throw('InputSubsets must be valid and stored'); | |
| 192 } | |
| 193 } | |
| 194 else{ | |
| 195 | |
| 196 $exp_sset = Bio::EnsEMBL::Funcgen::InputSubset->new( | |
| 197 -name => $ss_name, | |
| 198 -input_set => $self, | |
| 199 ); | |
| 200 } | |
| 201 | |
| 202 $self->{subsets}{$ss_name} = $exp_sset; | |
| 203 | |
| 204 return $self->{subsets}{$ss_name}; | |
| 205 } | |
| 206 | |
| 207 | |
| 208 =head2 get_Experiment | |
| 209 | |
| 210 Example : my $exp = $exp_set->get_Experiment(); | |
| 211 Description: Getter for the Experiment of this DataSet. | |
| 212 Returntype : Bio::EnsEMBL::Fuuncgen::Experiment | |
| 213 Exceptions : None | |
| 214 Caller : General | |
| 215 Status : At Risk | |
| 216 | |
| 217 =cut | |
| 218 | |
| 219 sub get_Experiment{ return $_[0]->{experiment}; } | |
| 220 | |
| 221 | |
| 222 =head2 get_InputSubsets | |
| 223 | |
| 224 Example : my @subsets = @{$exp_set->get_InputSubsets()}; | |
| 225 Description: Getter for the InputSubsets for this InputSet. | |
| 226 Returntype : Arrayref | |
| 227 Exceptions : None | |
| 228 Caller : General | |
| 229 Status : At Risk | |
| 230 | |
| 231 =cut | |
| 232 | |
| 233 sub get_InputSubsets{ | |
| 234 my ($self) = shift; | |
| 235 | |
| 236 return [ values %{$self->{'subsets'}} ]; | |
| 237 } | |
| 238 | |
| 239 | |
| 240 | |
| 241 | |
| 242 =head2 get_subset_by_name | |
| 243 | |
| 244 Example : my $subsets = $exp_set->get_subset_by_name('subset1'); | |
| 245 Description: Getter for the subset of a given name for this InputSet. | |
| 246 Returntype : Bio::EnsEMBL::Funcgen::InputSubset | |
| 247 Exceptions : None | |
| 248 Caller : General | |
| 249 Status : At Risk | |
| 250 | |
| 251 =cut | |
| 252 | |
| 253 sub get_subset_by_name{ | |
| 254 my ($self, $name) = @_; | |
| 255 return (exists $self->{'subsets'}{$name}) ? $self->{'subsets'}{$name} : undef; | |
| 256 } | |
| 257 | |
| 258 | |
| 259 =head2 get_subset_names | |
| 260 | |
| 261 Example : my @subset_names = @{$exp_set->get_subset_names()}; | |
| 262 Description: Getter for the subset names for this InputSet. | |
| 263 Returntype : Arrayref | |
| 264 Exceptions : None | |
| 265 Caller : General | |
| 266 Status : At Risk | |
| 267 | |
| 268 =cut | |
| 269 | |
| 270 sub get_subset_names{ | |
| 271 my ($self) = shift; | |
| 272 return [ keys %{$self->{'subsets'}} ]; | |
| 273 } | |
| 274 | |
| 275 | |
| 276 | |
| 277 | |
| 278 =head2 vendor | |
| 279 | |
| 280 Arg[1] : String - vendor e.g. ILLUMINA | |
| 281 Example : my $iset_vendor = $iset->vendor; | |
| 282 Description: Getter for the vendor attribute of this InputSet. | |
| 283 Returntype : String | |
| 284 Exceptions : None | |
| 285 Caller : General | |
| 286 Status : At Risk | |
| 287 | |
| 288 =cut | |
| 289 | |
| 290 sub vendor { return $_[0]->{vendor}; } | |
| 291 | |
| 292 | |
| 293 =head2 format | |
| 294 | |
| 295 Arg[1] : string - format i.e. product type/format | |
| 296 Example : my $iset_format = $iset->format; | |
| 297 Description: Getter for the format attribute of this InputSet. | |
| 298 Returntype : String | |
| 299 Exceptions : None | |
| 300 Caller : General | |
| 301 Status : At Risk | |
| 302 | |
| 303 =cut | |
| 304 | |
| 305 sub format { return $_[0]->{format}; } | |
| 306 | |
| 307 | |
| 308 =head2 replicate | |
| 309 | |
| 310 Arg[1] : Integer - replicate 0 = merged or NA, >0 refers to individual replicate | |
| 311 Example : if($iset->replicate){ #Do something replicate specific in here } | |
| 312 Description: Getter for the replicate attribute of this InputSet. | |
| 313 Returntype : Integer | |
| 314 Exceptions : None | |
| 315 Caller : General | |
| 316 Status : At Risk | |
| 317 | |
| 318 =cut | |
| 319 | |
| 320 sub replicate { return $_[0]->{replicate}; } | |
| 321 | |
| 322 | |
| 323 | |
| 324 =head2 source_info | |
| 325 | |
| 326 Example : my $source_info = $input_set->source_info; | |
| 327 Description: Getter for the experiment source info i.e. [ $label, $url ] | |
| 328 Returntype : Listref | |
| 329 Exceptions : None | |
| 330 Caller : General | |
| 331 Status : At risk | |
| 332 | |
| 333 =cut | |
| 334 | |
| 335 #Currently handling redundant/absent InputSubset data | |
| 336 | |
| 337 sub source_info{ | |
| 338 my $self = shift; | |
| 339 | |
| 340 if(! defined $self->{source_info}){ | |
| 341 #could have data_url as highest priority here | |
| 342 #but we need to ensure removal when adding archive ids | |
| 343 #so we link to the archive and not the old data url | |
| 344 | |
| 345 my $exp_group = $self->get_Experiment->experimental_group; | |
| 346 my %source_info; #Handles redundant InputSubsets | |
| 347 my ($proj_name, $proj_link, $source_label, $source_link); | |
| 348 | |
| 349 if($exp_group->is_project){ | |
| 350 $proj_name = $exp_group->name; | |
| 351 $proj_link = $exp_group->url; | |
| 352 } | |
| 353 | |
| 354 foreach my $isset(@{$self->get_InputSubsets}){ | |
| 355 | |
| 356 if(defined $isset->archive_id ){ | |
| 357 $source_label = $isset->archive_id; | |
| 358 | |
| 359 if(! exists $source_info{$source_label}){ | |
| 360 $source_info{$source_label} = [$source_label, undef]; | |
| 361 #source_link can is undef here as archive_id overrides display url | |
| 362 #undef links will automatically go to the SRA | |
| 363 } | |
| 364 } | |
| 365 elsif(defined $proj_name){ | |
| 366 #$source_label = $self->experimental_group->name; | |
| 367 $source_link = $isset->display_url || $proj_link; | |
| 368 | |
| 369 if(! exists $source_info{$source_link}){ | |
| 370 $source_info{$source_link} = [$proj_name, $source_link]; | |
| 371 } | |
| 372 } | |
| 373 } | |
| 374 | |
| 375 $self->{source_info} = [values %source_info]; | |
| 376 } | |
| 377 | |
| 378 return $self->{source_info}; | |
| 379 } | |
| 380 | |
| 381 | |
| 382 | |
| 383 1; | |
| 384 |
