Mercurial > repos > mahtabm > ensembl
view variant_effect_predictor/Bio/EnsEMBL/Funcgen/ResultSet.pm @ 3:d30fa12e4cc5 default tip
Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author | devteam <devteam@galaxyproject.org> |
---|---|
date | Mon, 13 Jan 2014 10:38:30 -0500 |
parents | 1f6dce3d34e0 |
children |
line wrap: on
line source
# # Ensembl module for Bio::EnsEMBL::Funcgen::ResultSet # =head1 LICENSE Copyright (c) 1999-2011 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <ensembl-dev@ebi.ac.uk>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =head1 NAME Bio::EnsEMBL::ResultSet - A module to represent ResultSet. =head1 SYNOPSIS use Bio::EnsEMBL::Funcgen::ResultSet; my $result_set = Bio::EnsEMBL::Funcgen::ResultSet->new( -dbid => $dbid, -analysis => $analysis, -table_name => 'experimental_chip', -table_id => $ec_id, ); =head1 DESCRIPTION A ResultSet object provides access to a set raw results from an Experiment. A set will be one or more contiguous chips to be treated as one set, with the same analysis. Duplicate sets will form a separate result set, as will the same raw data analysed or normalised in a different manner. =cut #To do #Change add_table_id to add_ExperimentalChip_Channel? use strict; use warnings; package Bio::EnsEMBL::Funcgen::ResultSet; use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Utils::Exception qw( throw deprecate); use Bio::EnsEMBL::Funcgen::Set; use vars qw(@ISA); @ISA = qw(Bio::EnsEMBL::Funcgen::Set); =head2 new Arg [-ANALYSIS] : Example : my $feature = Bio::EnsEMBL::Funcgen::ResultSet->new( -dbid => $dbid, -analysis => $analysis, -table_name => 'experimental_chip', -table_id => $ec_id, -result_feature_set => 1, ); Description: Constructor for ResultSet objects. Returntype : Bio::EnsEMBL::Funcgen::ResultSet Exceptions : Throws if no experiment_id defined Caller : General Status : At risk =cut sub new { my $caller = shift; my $class = ref($caller) || $caller; my $self = $class->SUPER::new(@_, ('-feature_class' => 'result')); my ($table_name, $table_id, $rf_set, $dbfile_data_dir) = rearrange(['TABLE_NAME', 'TABLE_ID', 'RESULT_FEATURE_SET', 'DBFILE_DATA_DIR'], @_); $self->{'table_id_hash'} = {}; #maybe don't need tha analysis args as mandatory as we're testing in the adaptor store method if (! $table_name){ throw("Need to pass the following arg:\t-table_name"); } #do we need some control of creating new objects with dbID and adding result_groups/feature_sets and them storing/updating them #potential for someone to create one from new using a duplicate dbID and then linking incorrect data to a pre-existing ResultGroup #we need to verify that each table_name/id in the set is from the same experiment $self->table_name($table_name); $self->add_table_id($table_id) if $table_id; $self->result_feature_set($rf_set) if $rf_set; $self->dbfile_data_dir($dbfile_data_dir) if $dbfile_data_dir; return $self; } #These are CollectionContainer? methods #For a core track the would probably be in the Analysis #All other collection methods are in ResultFeatureAdaptor(and parents) =head2 get_dbfile_path_by_window_size Arg[1] : int - window size Arg[2] : OPTIONAL Bio::EnsEMBL::Slice Used when generating individual seq_region Collections Example : my $filepath = $self->get_dbfile_path_by_ResultSet_window_size($rset, $wsize); Description: Generates the default dbfile path for a given ResultSet and window_size Returntype : string Exceptions : Throws if Slice is not valid Caller : general Status : At risk =cut sub get_dbfile_path_by_window_size{ my ($self, $window_size, $slice) = @_; if($slice){ if(! (ref($slice) && $slice->isa("Bio::EnsEMBL::Slice"))){ throw('You must provide a valid Bio::EnsEMBL::Slice'); } $window_size .= '.'.$slice->seq_region_name; } return $self->dbfile_data_dir.'/result_features.'.$self->name.'.'.$window_size.'.col'; } =head2 dbfile_data_dir Arg[1] : OPTIONAL string - data directory for this ResultSet Example : my $dbfile_data_dir = $self->dbfile_data_dir; Description: Getter/Setter for the root dbfile data directory for this ResultSet Returntype : string Exceptions : None Caller : self Status : at risk =cut sub dbfile_data_dir{ my ($self, $data_dir) = @_; $self->{'dbfile_data_dir'} = $data_dir if defined $data_dir; return $self->{'dbfile_data_dir'}; } =head2 result_feature_set Arg [1] : optional - boolean 0 or 1. Example : if($rset->result_feature_set){ ...use result_feature table ...}; Description: Getter and setter for the result_feature_set attribute. Returntype : boolean Exceptions : None Caller : General Status : At Risk =cut sub result_feature_set{ my $self = shift; $self->{'result_feature_set'} = shift if @_;; return $self->{'result_feature_set'}; } =head2 table_name Arg [1] : (optional) string - table_name (experimental_chip, channel or input_set) Example : $result_set->experiment_id($exp_id); Description: Getter and setter for the table_name for this ResultSet. Returntype : string Exceptions : None Caller : General Status : At Risk =cut sub table_name{ my $self = shift; if (@_){ if($self->{'table_name'} && ($self->{'table_name'} ne $_[0])){ throw("Cannot mix table name/types of a ResultSet"); } $self->{'table_name'} = $_[0]; } return $self->{'table_name'}; } =head2 add_table_id Example : $result_set->add_table_id($ec_id, $cc_id); Description: Caches table_id result_set_input_id to the ResultSet. In the case of an array ResultSet, the unique result_set_input_id is used to key into the result table, it also reduces redundancy and enable mapping of results to chips rather than just the ResultSet. This enables result retrieval based on chips in the same set which have a differing status. In the case of a sequencing ResultSet, this simply refers to the InputSet ids. Returntype : None Exceptions : Throws if no table_id defined Caller : General Status : At Risk =cut sub add_table_id { my ($self, $table_id, $cc_id) = @_; if (! defined $table_id){ throw("Need to pass a table_id"); }else{ if((exists $self->{'table_id_hash'}->{$table_id}) && (defined $self->{'table_id_hash'}->{$table_id})){ throw("You are attempting to redefine a result_set_input_id which is already defined"); } $self->{'table_id_hash'}->{$table_id} = $cc_id; } return; } =head2 table_ids Example : $result_set->feature_group_id($fg_id); Description: Getter and setter for the feature_group_id for this ResultSet. Returntype : int Exceptions : None Caller : General Status : At Risk =cut sub table_ids { my $self = shift; return [ keys %{$self->{'table_id_hash'}} ]; } sub chip_channel_ids { my $self = shift; deprecate('ResultSet::chip_channel_ids is deprecated, please use result_set_input_ids'); return $self->result_set_input_ids; } =head2 result_set_input_ids Example : my @rset_rsi_ids = @{$result_set->result_set_input_ids()}; Description: Getter for the input ids for this ResultSet. Returntype : arrayref Exceptions : None Caller : General Status : At Risk =cut sub result_set_input_ids { my $self = shift; return [ values %{$self->{'table_id_hash'}} ]; } =head2 contains Example : if($result_set->contains($chip_or_channel)){...do some chip or channel erpartions here...}; Description: Returns true if the given Channel or ExperimentalChip is part of this ResultSet Returntype : boolean Exceptions : warns if ResultSet table name is not of argument type Caller : General Status : At Risk =cut sub contains{ my ($self, $chip_channel) = @_; my $contains = 0; my @tables = $chip_channel->adaptor->_tables(); my ($table_name, undef) = @{$tables[0]}; if($table_name ne $self->table_name()){ warn("ResultSet(".$self->table_name().") cannot contain ${table_name}s"); }else{ $contains = 1 if (exists $self->{'table_id_hash'}->{$chip_channel->dbID()}); } return $contains; } =head2 get_result_set_input_id Arg [1] : int - dbID (experimental_chip, channel or input_set) Example : $result_set->get_result_set_input_id($ec_id); Description: Retrieves a result_set_input_id from the cache given a dbID Returntype : int Exceptions : none Caller : General Status : At Risk =cut sub get_result_set_input_id{ my ($self, $table_id) = @_; return (exists $self->{'table_id_hash'}->{$table_id}) ? $self->{'table_id_hash'}->{$table_id} : undef; } sub get_chip_channel_id{ my ($self, $table_id) = @_; deprecate('ResultSet::get_chip_channel_ids is dperecated, please us get_result_set_input_id'); return $self->get_result_set_input_id($table_id); } =head2 get_InputSets Example : my @ecs = @{$result_set->get_ExperimentalChips()}; Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID Returntype : Listref of ExperimentalChip object Exceptions : warns is not an experimental_chip ResultSet Caller : General Status : At Risk =cut sub get_InputSets{ my $self = shift; if($self->table_name ne 'input_set'){ warn 'Cannot get_InputSets for an array based ResultSet'; return; } if(! defined $self->{'input_sets'}){ my $is_adaptor = $self->adaptor->db->get_InputSetAdaptor(); foreach my $is_id(@{$self->table_ids()}){ push @{$self->{'input_sets'}}, $is_adaptor->fetch_by_dbID($is_id); } } return $self->{'input_sets'}; } =head2 get_ExperimentalChips Example : my @ecs = @{$result_set->get_ExperimentalChips()}; Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID Returntype : Listref of ExperimentalChip object Exceptions : warns is not an experimental_chip ResultSet Caller : General Status : At Risk =cut sub get_ExperimentalChips{ my $self = shift; if($self->table_name eq 'input_set'){ warn 'Cannot get_ExperimentalChips for an InputSet ResultSet'; return; } if(! defined $self->{'experimental_chips'}){ my $ec_adaptor = $self->adaptor->db->get_ExperimentalChipAdaptor(); if($self->table_name() eq "experimental_chip"){ foreach my $ec_id(@{$self->table_ids()}){ #warn "Getting ec with id $ec_id"; push @{$self->{'experimental_chips'}}, $ec_adaptor->fetch_by_dbID($ec_id); #should this be hashed on chip_channel_id? } }else{ #warn("Retrieving ExperimentalChips for a Channel ResultSet"); my %echips; my $chan_adaptor = $self->adaptor->db->get_ChannelAdaptor(); foreach my $chan_id(@{$self->table_ids()}){ my $chan = $chan_adaptor->fetch_by_dbID($chan_id); $echips{$chan->experimental_chip_id} ||= $ec_adaptor->fetch_by_dbID($chan->experimental_chip_id); } @{$self->{'experimental_chips'}} = values %echips; } } return $self->{'experimental_chips'}; } =head2 get_replicate_set_by_result_set_input_id Arg[0] : int - chip_channel_id Example : my $rep_set_name = $result_set->get_replicate_set_by_result_set_input_id($cc_id); Description: Retrieves the replicate set name defined by the corresponding ExperimentalChip Returntype : String - replicate set name Exceptions : Caller : General Status : At Risk - implement for Channels? =cut #Where is this used? sub get_replicate_set_by_result_set_input_id{ my ($self, $cc_id) = @_; if( ! defined $self->{'_replicate_cache'}){ warn "Generating replicate cache!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"; foreach my $ec (@{$self->get_ExperimentalChips()}){ $self->{'_replicate_cache'}{$self->get_result_set_input_id($ec->dbID())} = $ec->replicate(); } } #warn here of absent replicate info? return (exists $self->{'_replicate_cache'}{$cc_id}) ? $self->{'_replicate_cache'}{$cc_id} : undef; } sub get_replicate_set_by_chip_channel_id{ my ($self, $cc_id) = @_; deprecate('Please use get_replicate_set_by_result_set_input_id instead'); return $self->get_replicate_set_by_result_set_input_id($cc_id); } =head2 get_displayable_ResultFeatures_by_Slice Arg[1] : Bio::EnsEMBL::Slice Arg[2] : Boolean - with probe flag, will nest Probe object in ResultFeature Example : my @results = @{$ResultSet->get_all_displayable_ResultFeatures_by_Slice($slice)}; Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet Returntype : Arrayref of ResultFeatures Exceptions : None Caller : General Status : At Risk =cut sub get_displayable_ResultFeatures_by_Slice{ my ($self, $slice, $with_probe, $max_bins, $window_size, $constraint) = @_; return $self->adaptor->fetch_ResultFeatures_by_Slice_ResultSet($slice, $self, 'DISPLAYABLE', $with_probe, $max_bins, $window_size, $constraint); } =head2 get_ResultFeatures_by_Slice Arg[1] : Bio::EnsEMBL::Slice Arg[2] : string - Status name e.g. 'DISPLAYABLE' Arg[3] : Boolean - with probe flag, will nest Probe object in ResultFeature Arg[4] : int - Max bins i.e. pixel width of display Arg[5] : int - window_size Arg[6] : string - constraint Example : my @rfs_with_rpobe = @{$ResultSet->get_all_ResultFeatures_by_Slice($slice, undef, 1)}; Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet Returntype : Arrayref of ResultFeatures Exceptions : None Caller : General Status : At Risk =cut sub get_ResultFeatures_by_Slice{ my ($self, $slice, $status, $with_probe, $max_bins, $window_size, $constraint) = @_; return $self->adaptor->db->get_ResultFeatureAdaptor->fetch_all_by_Slice_ResultSet($slice, $self, $status, $with_probe, $max_bins, $window_size, $constraint); } #Floats unpack inaccurately so need 3 sigfiging #This should match the format in which they are originally stored #This is dependant on ResultSet type i.e. reads or intensity? #No format for reads! #Should this be set in the ResultSet instead? #It may be more efficient for the caller to test for format first rather than blindly printf'ing #even if there is no format? #This needs setting in new, so we don't have to eval for every score. sub score_format{ return '%.3f'; } =head2 log_label Example : print $rset->log_label(); Description: Get a string of the unique key fields for logging purposes Returntype : string Exceptions : None Caller : General Status : At Risk =cut sub log_label { my $self = shift; my $label; if(defined $self->feature_type()){ $label = $self->feature_type->name.":"; }else{ $label = "Unknown FeatureType:"; } if(defined $self->cell_type()){ $label .= $self->cell_type->name; }else{ $label .= "Uknown CellType"; } return $self->name.":".$self->analysis->logic_name.":".$label; } 1;