Mercurial > repos > mahtabm > ensembl
view variant_effect_predictor/Bio/EnsEMBL/Funcgen/InputSet.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line source
# # Ensembl module for Bio::EnsEMBL::Funcgen::InputSet # =head1 LICENSE Copyright (c) 1999-2011 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <ensembl-dev@ebi.ac.uk>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =head1 NAME Bio::EnsEMBL::InputSet - A module to represent InputSet object. =head1 SYNOPSIS use Bio::EnsEMBL::Funcgen::InputSet; #Create an InputSet my $inp_set = Bio::EnsEMBL::Funcgen::InputSet->new ( -DBID => $dbID, -ADAPTOR => $self, -EXPERIMENT => $exp, -FEATURE_TYPE => $ftype, -CELL_TYPE => $ctype, -FORMAT => 'READ_FORMAT', -VENDOR => 'SOLEXA', -NAME => 'ExpSet1', -REPLICATE => 1, ); # Add some InputSubsets $inp_set->add_new_subsets($subset_name, $ =head1 DESCRIPTION An InputSet object provides a generic container for any non-array based feature import, allowing tracking of file import via the status table and integration into Data and FeatureSets to provide traceability to the source experiment from a given FeatureSet. =cut use strict; use warnings; package Bio::EnsEMBL::Funcgen::InputSet; use Bio::EnsEMBL::Funcgen::InputSubset; use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate); use Bio::EnsEMBL::Funcgen::Set; use Bio::EnsEMBL::Analysis; use vars qw(@ISA); @ISA = qw(Bio::EnsEMBL::Funcgen::Set); =head2 new Example : my $eset = Bio::EnsEMBL::Funcgen::InputSet->new( -EXPERIMENT => $exp, -FEATURE_TYPE => $ftype, -CELL_TYPE => $ctype, -FORMAT => 'READ_FORMAT', -VENDOR => 'SOLEXA', -NAME => 'ExpSet1', -ANALYSIS => $anal, -FEATURE_CLASS => 'annotated', ); Do we want to define subsets likes this or are we more likely to add them one by one? Description: Constructor for InputSet objects. Returntype : Bio::EnsEMBL::Funcgen::InputSet Exceptions : Throws if no Experiment defined Throws if CellType or FeatureType are not valid or stored Caller : General Status : At risk =cut sub new { my $caller = shift; my $class = ref($caller) || $caller; #Add set_type here to overwrite default ref parsing in Set::set_type #This need to stay like this until we patch the DB my $self = $class->SUPER::new(@_); my ($exp, $format, $vendor, $rep) = rearrange(['EXPERIMENT', 'FORMAT', 'VENDOR', 'REPLICATE'], @_); if (! (ref $exp && $exp->isa('Bio::EnsEMBL::Funcgen::Experiment') && $exp->dbID())){ throw('Must specify a valid stored Bio::EnsEMBL::Funcgen::Experiment'); } #These are set in Set, just validate here throw ('Must provide a FeatureType') if(! defined $self->feature_type); throw ('Must provide a CellType') if(! defined $self->cell_type); my $type = $self->feature_class; #Need to move these types to config if(! ($type && grep /^${type}$/, ('annotated', 'result', 'segmentation'))){ throw("You must define a valid InputSet feature_class e.g. 'annotated' or 'result'"); } if(($type eq 'result') && ($format ne 'SEQUENCING')){ throw('InputSet does not yet support a result type InputSet which does not have the \'SEQUENCING\' format'); } #if(! defined $self->analysis){ ##default analysis hack for v47 ##Set directly to avoid dbID boolean check #This is to support supporting_set cache in data_set? $self->{'analysis'} = Bio::EnsEMBL::Analysis->new (-logic_name => 'external', -id => 0,#??someone needs to rewrite analysis ); #Change to direct setting for speed $self->{format} = $format; $self->{vendor} = $vendor; $self->{replicate} = $rep; $self->{experiment} = $exp; $self->{subsets} = {}; return $self; } =head2 add_new_subset Arg [1] : string - sub set name e.g. the file name (not path as we're restricted to 30 chars) Arg [2] : Bio::EnsEMBL::Funcgen::InputSubset - optional If not defined will create a sparse InputSubset based on the name Example : $expset->add_new_subset($ss_name, $exp_subset); Description: Adds input_subset Returntype : none Exceptions : Throws if set is already present Throws if InputSubset is not valid or stored Caller : General Status : At Risk =cut #Do we still use the optional subset function? sub add_new_subset { my ($self, $ss_name, $exp_sset) = @_; #Need to test $ss_name here if(! ($ss_name && ref(\$ss_name) eq 'SCALAR')){#ref($exp_sset) would be 'REF' throw('You must pass a InputSubset name'); } if($self->get_subset_by_name($ss_name)){ throw("Subset $ss_name is already present in this InputSet, maybe you need to alter the filename?"); } if(defined $exp_sset){ if(!(ref($exp_sset) && $exp_sset->isa('Bio::EnsEMBL::Funcgen::InputSubset') && $exp_sset->dbID())){ throw('InputSubsets must be valid and stored'); } } else{ $exp_sset = Bio::EnsEMBL::Funcgen::InputSubset->new( -name => $ss_name, -input_set => $self, ); } $self->{subsets}{$ss_name} = $exp_sset; return $self->{subsets}{$ss_name}; } =head2 get_Experiment Example : my $exp = $exp_set->get_Experiment(); Description: Getter for the Experiment of this DataSet. Returntype : Bio::EnsEMBL::Fuuncgen::Experiment Exceptions : None Caller : General Status : At Risk =cut sub get_Experiment{ return $_[0]->{experiment}; } =head2 get_InputSubsets Example : my @subsets = @{$exp_set->get_InputSubsets()}; Description: Getter for the InputSubsets for this InputSet. Returntype : Arrayref Exceptions : None Caller : General Status : At Risk =cut sub get_InputSubsets{ my ($self) = shift; return [ values %{$self->{'subsets'}} ]; } =head2 get_subset_by_name Example : my $subsets = $exp_set->get_subset_by_name('subset1'); Description: Getter for the subset of a given name for this InputSet. Returntype : Bio::EnsEMBL::Funcgen::InputSubset Exceptions : None Caller : General Status : At Risk =cut sub get_subset_by_name{ my ($self, $name) = @_; return (exists $self->{'subsets'}{$name}) ? $self->{'subsets'}{$name} : undef; } =head2 get_subset_names Example : my @subset_names = @{$exp_set->get_subset_names()}; Description: Getter for the subset names for this InputSet. Returntype : Arrayref Exceptions : None Caller : General Status : At Risk =cut sub get_subset_names{ my ($self) = shift; return [ keys %{$self->{'subsets'}} ]; } =head2 vendor Arg[1] : String - vendor e.g. ILLUMINA Example : my $iset_vendor = $iset->vendor; Description: Getter for the vendor attribute of this InputSet. Returntype : String Exceptions : None Caller : General Status : At Risk =cut sub vendor { return $_[0]->{vendor}; } =head2 format Arg[1] : string - format i.e. product type/format Example : my $iset_format = $iset->format; Description: Getter for the format attribute of this InputSet. Returntype : String Exceptions : None Caller : General Status : At Risk =cut sub format { return $_[0]->{format}; } =head2 replicate Arg[1] : Integer - replicate 0 = merged or NA, >0 refers to individual replicate Example : if($iset->replicate){ #Do something replicate specific in here } Description: Getter for the replicate attribute of this InputSet. Returntype : Integer Exceptions : None Caller : General Status : At Risk =cut sub replicate { return $_[0]->{replicate}; } =head2 source_info Example : my $source_info = $input_set->source_info; Description: Getter for the experiment source info i.e. [ $label, $url ] Returntype : Listref Exceptions : None Caller : General Status : At risk =cut #Currently handling redundant/absent InputSubset data sub source_info{ my $self = shift; if(! defined $self->{source_info}){ #could have data_url as highest priority here #but we need to ensure removal when adding archive ids #so we link to the archive and not the old data url my $exp_group = $self->get_Experiment->experimental_group; my %source_info; #Handles redundant InputSubsets my ($proj_name, $proj_link, $source_label, $source_link); if($exp_group->is_project){ $proj_name = $exp_group->name; $proj_link = $exp_group->url; } foreach my $isset(@{$self->get_InputSubsets}){ if(defined $isset->archive_id ){ $source_label = $isset->archive_id; if(! exists $source_info{$source_label}){ $source_info{$source_label} = [$source_label, undef]; #source_link can is undef here as archive_id overrides display url #undef links will automatically go to the SRA } } elsif(defined $proj_name){ #$source_label = $self->experimental_group->name; $source_link = $isset->display_url || $proj_link; if(! exists $source_info{$source_link}){ $source_info{$source_link} = [$proj_name, $source_link]; } } } $self->{source_info} = [values %source_info]; } return $self->{source_info}; } 1;