diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/ResultSet.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/ResultSet.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,585 @@
+#
+# Ensembl module for Bio::EnsEMBL::Funcgen::ResultSet
+#
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2011 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+
+=head1 NAME
+
+Bio::EnsEMBL::ResultSet - A module to represent ResultSet.
+ 
+
+=head1 SYNOPSIS
+
+use Bio::EnsEMBL::Funcgen::ResultSet;
+
+my $result_set = Bio::EnsEMBL::Funcgen::ResultSet->new(
+                                                       -dbid        => $dbid,
+                                                       -analysis    => $analysis,
+                                                       -table_name  => 'experimental_chip',
+                                                       -table_id    => $ec_id,
+); 
+
+
+
+=head1 DESCRIPTION
+
+A ResultSet object provides access to a set raw results from an Experiment. A set will be one or more 
+contiguous chips to be treated as one set, with the same analysis. Duplicate sets will form a separate
+result set, as will the same raw data analysed or normalised in a different manner.
+
+=cut
+
+#To do
+#Change add_table_id to add_ExperimentalChip_Channel?
+
+
+use strict;
+use warnings;
+
+package Bio::EnsEMBL::Funcgen::ResultSet;
+
+use Bio::EnsEMBL::Utils::Argument qw( rearrange );
+use Bio::EnsEMBL::Utils::Exception qw( throw deprecate);
+use Bio::EnsEMBL::Funcgen::Set;
+
+use vars qw(@ISA);
+@ISA = qw(Bio::EnsEMBL::Funcgen::Set);
+
+
+=head2 new
+
+  Arg [-ANALYSIS] :
+
+
+
+  Example    : my $feature = Bio::EnsEMBL::Funcgen::ResultSet->new(
+                                                                   -dbid        => $dbid,
+                                                                   -analysis    => $analysis,
+                                                                   -table_name  => 'experimental_chip',
+                                                                   -table_id       => $ec_id,
+                                                                   -result_feature_set => 1,
+			                                          ); 
+  Description: Constructor for ResultSet objects.
+  Returntype : Bio::EnsEMBL::Funcgen::ResultSet
+  Exceptions : Throws if no experiment_id defined
+  Caller     : General
+  Status     : At risk
+
+=cut
+
+sub new {
+  my $caller = shift;
+	
+  my $class = ref($caller) || $caller;
+	
+  my $self = $class->SUPER::new(@_, ('-feature_class' => 'result'));
+	
+  my ($table_name, $table_id, $rf_set, $dbfile_data_dir)
+    = rearrange(['TABLE_NAME', 'TABLE_ID', 'RESULT_FEATURE_SET', 'DBFILE_DATA_DIR'], @_);
+
+  $self->{'table_id_hash'} = {};
+
+  #maybe don't need tha analysis args as mandatory as we're testing in the adaptor store method
+  if (! $table_name){
+    throw("Need to pass the following arg:\t-table_name");
+  }
+  
+  #do we need some control of creating new objects with dbID and adding result_groups/feature_sets and them storing/updating them
+  #potential for someone to create one from new using a duplicate dbID and then linking incorrect data to a pre-existing ResultGroup
+  #we need to verify that each table_name/id in the set is from the same experiment
+   $self->table_name($table_name);
+  $self->add_table_id($table_id) if $table_id;
+  $self->result_feature_set($rf_set) if $rf_set;
+  $self->dbfile_data_dir($dbfile_data_dir) if $dbfile_data_dir;
+
+  return $self;
+}
+
+
+#These are CollectionContainer? methods
+#For a core track the would probably be in the Analysis
+#All other collection methods are in ResultFeatureAdaptor(and parents)
+
+=head2 get_dbfile_path_by_window_size
+
+  Arg[1]     : int - window size
+  Arg[2]     : OPTIONAL Bio::EnsEMBL::Slice Used when generating individual seq_region Collections
+  Example    : my $filepath = $self->get_dbfile_path_by_ResultSet_window_size($rset, $wsize);
+  Description: Generates the default dbfile path for a given ResultSet and window_size
+  Returntype : string
+  Exceptions : Throws if Slice is not valid
+  Caller     : general
+  Status     : At risk
+
+=cut
+
+sub get_dbfile_path_by_window_size{
+  my ($self, $window_size, $slice) = @_;
+
+  if($slice){
+   
+	if(! (ref($slice) && $slice->isa("Bio::EnsEMBL::Slice"))){
+	  throw('You must provide a valid Bio::EnsEMBL::Slice');
+	}
+
+	$window_size .= '.'.$slice->seq_region_name;
+  }
+
+  return $self->dbfile_data_dir.'/result_features.'.$self->name.'.'.$window_size.'.col';
+}
+
+
+=head2 dbfile_data_dir
+
+  Arg[1]     : OPTIONAL string - data directory for this ResultSet
+  Example    : my $dbfile_data_dir = $self->dbfile_data_dir;
+  Description: Getter/Setter for the root dbfile data directory for this ResultSet
+  Returntype : string
+  Exceptions : None
+  Caller     : self
+  Status     : at risk
+
+=cut
+
+
+
+sub dbfile_data_dir{
+  my ($self, $data_dir) = @_;
+
+  $self->{'dbfile_data_dir'} = $data_dir if defined $data_dir;
+
+  return $self->{'dbfile_data_dir'};
+}
+
+
+
+=head2 result_feature_set
+
+  Arg [1]    : optional - boolean 0 or 1.
+  Example    : if($rset->result_feature_set){ ...use result_feature table ...};
+  Description: Getter and setter for the result_feature_set attribute.
+  Returntype : boolean
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+
+sub result_feature_set{
+  my $self = shift;
+
+  $self->{'result_feature_set'} = shift if @_;;
+  return $self->{'result_feature_set'};
+}
+
+
+=head2 table_name
+
+  Arg [1]    : (optional) string - table_name (experimental_chip, channel or input_set)
+  Example    : $result_set->experiment_id($exp_id);
+  Description: Getter and setter for the table_name for this ResultSet.
+  Returntype : string
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+
+sub table_name{
+    my $self = shift;
+
+    if (@_){
+      
+      if($self->{'table_name'} && ($self->{'table_name'} ne $_[0])){
+		throw("Cannot mix  table name/types of a ResultSet");
+      }
+	
+      $self->{'table_name'} = $_[0];
+    }
+
+    return $self->{'table_name'};
+}
+
+
+
+=head2 add_table_id
+
+  Example    : $result_set->add_table_id($ec_id, $cc_id);
+  Description: Caches table_id result_set_input_id to the ResultSet. In the case of an 
+               array ResultSet, the unique result_set_input_id is used to key into the 
+               result table, it also reduces redundancy and enable mapping of results to chips
+               rather than just the ResultSet.  This enables result retrieval
+               based on chips in the same set which  have a differing status.
+               In the case of a sequencing ResultSet, this simply refers to the InputSet ids.
+  Returntype : None
+  Exceptions : Throws if no table_id defined
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub add_table_id {
+  my ($self, $table_id, $cc_id) = @_;
+  
+  if (! defined $table_id){	
+    throw("Need to pass a table_id");
+  }else{
+    
+    if((exists $self->{'table_id_hash'}->{$table_id}) && (defined $self->{'table_id_hash'}->{$table_id})){
+      throw("You are attempting to redefine a result_set_input_id which is already defined");
+    }
+    
+    $self->{'table_id_hash'}->{$table_id} = $cc_id;    
+    
+  }
+
+  return;
+}
+
+
+=head2 table_ids
+
+  Example    : $result_set->feature_group_id($fg_id);
+  Description: Getter and setter for the feature_group_id for this ResultSet.
+  Returntype : int
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub table_ids {
+  my $self = shift;
+  
+  return [ keys %{$self->{'table_id_hash'}} ];
+}
+
+
+sub chip_channel_ids {
+  my $self = shift;
+
+  deprecate('ResultSet::chip_channel_ids is deprecated, please use result_set_input_ids');
+  
+  return $self->result_set_input_ids;
+}
+
+=head2 result_set_input_ids
+
+  Example    : my @rset_rsi_ids = @{$result_set->result_set_input_ids()};
+  Description: Getter for the input ids for this ResultSet.
+  Returntype : arrayref
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+
+sub result_set_input_ids {
+  my $self = shift;
+  
+  return [ values %{$self->{'table_id_hash'}} ];
+}
+
+
+
+
+=head2 contains
+
+  Example    : if($result_set->contains($chip_or_channel)){...do some chip or channel erpartions here...};
+  Description: Returns true if the given Channel or ExperimentalChip is part of this ResultSet
+  Returntype : boolean
+  Exceptions : warns if ResultSet table name is not of argument type
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+
+sub contains{
+  my ($self, $chip_channel) = @_;
+
+  my $contains = 0;
+  my @tables = $chip_channel->adaptor->_tables();
+  my ($table_name, undef) = @{$tables[0]};
+
+  if($table_name ne $self->table_name()){
+    warn("ResultSet(".$self->table_name().") cannot contain ${table_name}s");
+  }else{
+    $contains = 1 if (exists $self->{'table_id_hash'}->{$chip_channel->dbID()});
+  }
+  
+  return $contains;
+}
+
+=head2 get_result_set_input_id
+
+  Arg [1]    : int - dbID (experimental_chip, channel or input_set)
+  Example    : $result_set->get_result_set_input_id($ec_id);
+  Description: Retrieves a result_set_input_id from the cache given a dbID
+  Returntype : int
+  Exceptions : none
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub get_result_set_input_id{
+  my ($self, $table_id) = @_;
+  
+  return (exists $self->{'table_id_hash'}->{$table_id}) ?  $self->{'table_id_hash'}->{$table_id} : undef;
+}
+
+
+sub get_chip_channel_id{
+  my ($self, $table_id) = @_;
+  
+  deprecate('ResultSet::get_chip_channel_ids is dperecated, please us get_result_set_input_id');
+  return $self->get_result_set_input_id($table_id);
+}
+
+
+
+=head2 get_InputSets
+
+  Example    : my @ecs = @{$result_set->get_ExperimentalChips()};
+  Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID
+  Returntype : Listref of ExperimentalChip object
+  Exceptions : warns is not an experimental_chip ResultSet
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub get_InputSets{
+  my $self = shift;
+  
+  if($self->table_name ne 'input_set'){
+	warn 'Cannot get_InputSets for an array based ResultSet';
+	return;
+  }
+
+  
+
+  if(! defined $self->{'input_sets'}){
+    my $is_adaptor = $self->adaptor->db->get_InputSetAdaptor();
+    
+	foreach my $is_id(@{$self->table_ids()}){	
+	  push @{$self->{'input_sets'}}, $is_adaptor->fetch_by_dbID($is_id);
+	}
+  }
+
+  return $self->{'input_sets'};
+}
+
+
+=head2 get_ExperimentalChips
+
+  Example    : my @ecs = @{$result_set->get_ExperimentalChips()};
+  Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID
+  Returntype : Listref of ExperimentalChip object
+  Exceptions : warns is not an experimental_chip ResultSet
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub get_ExperimentalChips{
+  my $self = shift;
+  
+  if($self->table_name eq 'input_set'){
+	warn 'Cannot get_ExperimentalChips for an InputSet ResultSet';
+	return;
+  }
+
+  if(! defined $self->{'experimental_chips'}){
+    my $ec_adaptor = $self->adaptor->db->get_ExperimentalChipAdaptor();
+    
+	if($self->table_name() eq "experimental_chip"){
+
+	  foreach my $ec_id(@{$self->table_ids()}){
+		#warn "Getting ec with id $ec_id";
+		push @{$self->{'experimental_chips'}}, $ec_adaptor->fetch_by_dbID($ec_id);
+		#should this be hashed on chip_channel_id?
+	  }
+	}else{
+	  #warn("Retrieving ExperimentalChips for a Channel ResultSet");
+	  
+	  my %echips;
+	  my $chan_adaptor = $self->adaptor->db->get_ChannelAdaptor(); 
+		  
+	  foreach my $chan_id(@{$self->table_ids()}){
+		my $chan = $chan_adaptor->fetch_by_dbID($chan_id);
+		$echips{$chan->experimental_chip_id} ||= $ec_adaptor->fetch_by_dbID($chan->experimental_chip_id);
+	  }
+	  
+	  @{$self->{'experimental_chips'}} = values %echips;
+	}
+  }
+
+  return $self->{'experimental_chips'};
+}
+
+
+
+=head2 get_replicate_set_by_result_set_input_id
+
+  Arg[0]     : int - chip_channel_id
+  Example    : my $rep_set_name = $result_set->get_replicate_set_by_result_set_input_id($cc_id);
+  Description: Retrieves the replicate set name defined by the corresponding ExperimentalChip
+  Returntype : String - replicate set name
+  Exceptions : 
+  Caller     : General
+  Status     : At Risk - implement for Channels?
+
+=cut
+
+#Where is this used?
+
+sub get_replicate_set_by_result_set_input_id{
+  my ($self, $cc_id) = @_;
+
+  if( ! defined $self->{'_replicate_cache'}){
+
+	warn "Generating replicate cache!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
+
+
+	foreach my $ec (@{$self->get_ExperimentalChips()}){
+	  
+	  $self->{'_replicate_cache'}{$self->get_result_set_input_id($ec->dbID())} = $ec->replicate();
+	  
+
+	}
+  }
+
+
+  #warn here of absent replicate info?
+
+  return (exists $self->{'_replicate_cache'}{$cc_id}) ?  $self->{'_replicate_cache'}{$cc_id} : undef;
+
+}
+
+sub get_replicate_set_by_chip_channel_id{
+  my ($self, $cc_id) = @_;
+
+  deprecate('Please use get_replicate_set_by_result_set_input_id instead');
+  return $self->get_replicate_set_by_result_set_input_id($cc_id);
+}
+
+
+=head2 get_displayable_ResultFeatures_by_Slice
+
+  Arg[1]     : Bio::EnsEMBL::Slice
+  Arg[2]     : Boolean - with probe flag, will nest Probe object in ResultFeature 
+  Example    : my @results = @{$ResultSet->get_all_displayable_ResultFeatures_by_Slice($slice)};
+  Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet
+  Returntype : Arrayref of ResultFeatures
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+
+sub get_displayable_ResultFeatures_by_Slice{
+  my ($self, $slice, $with_probe, $max_bins, $window_size, $constraint) = @_;
+  return $self->adaptor->fetch_ResultFeatures_by_Slice_ResultSet($slice, $self, 'DISPLAYABLE', $with_probe, $max_bins, $window_size, $constraint);
+}
+
+
+
+
+=head2 get_ResultFeatures_by_Slice
+
+  Arg[1]     : Bio::EnsEMBL::Slice
+  Arg[2]     : string - Status name e.g. 'DISPLAYABLE'
+  Arg[3]     : Boolean - with probe flag, will nest Probe object in ResultFeature 
+  Arg[4]     : int - Max bins i.e. pixel width of display
+  Arg[5]     : int - window_size
+  Arg[6]     : string - constraint
+  Example    : my @rfs_with_rpobe = @{$ResultSet->get_all_ResultFeatures_by_Slice($slice, undef, 1)};
+  Description: Simple wrapper method for ResultFeatureAdaptor::fetch_all_by_Slice_ResultSet
+  Returntype : Arrayref of ResultFeatures
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub get_ResultFeatures_by_Slice{
+  my ($self, $slice, $status, $with_probe, $max_bins, $window_size, $constraint) = @_;
+  return $self->adaptor->db->get_ResultFeatureAdaptor->fetch_all_by_Slice_ResultSet($slice, $self, $status, $with_probe, $max_bins, $window_size, $constraint);
+}
+
+
+
+#Floats unpack inaccurately so need 3 sigfiging
+#This should match the format in which they are originally stored
+#This is dependant on ResultSet type i.e. reads or intensity?
+#No format for reads!
+#Should this be set in the ResultSet instead?
+#It may be more efficient for the caller to test for format first rather than blindly printf'ing
+#even if there is no format?
+#This needs setting in new, so we don't have to eval for every score.
+
+sub score_format{
+  return '%.3f';
+}
+
+
+
+
+=head2 log_label
+
+  Example    : print $rset->log_label();
+  Description: Get a string of the unique key fields for logging purposes
+  Returntype : string
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub log_label {
+  my $self = shift;
+
+  my $label;
+  
+  if(defined $self->feature_type()){
+	$label = $self->feature_type->name.":";
+  }else{
+	$label = "Unknown FeatureType:";
+  }
+
+  if(defined $self->cell_type()){
+	$label .= $self->cell_type->name;
+  }else{
+	$label .= "Uknown CellType";
+  }
+
+  return $self->name.":".$self->analysis->logic_name.":".$label;
+}
+
+
+
+1;