diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/MotifFeature.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/MotifFeature.pm	Fri Aug 03 10:04:48 2012 -0400
@@ -0,0 +1,360 @@
+#
+# Ensembl module for Bio::EnsEMBL::Funcgen::MotifFeature
+#
+# You may distribute this module under the same terms as Perl itself
+
+=head1 NAME
+
+Bio::EnsEMBL::MotifFeature - A module to represent a feature mapping as based
+on a binding matrix e.g position weight matrix
+
+=head1 SYNOPSIS
+
+use Bio::EnsEMBL::Funcgen::MotifFeature;
+
+my $feature = Bio::EnsEMBL::Funcgen::MotifFeature->new(
+	-SLICE         => $chr_1_slice,
+	-START         => 1_000_000,
+	-END           => 1_000_024,
+	-STRAND        => -1,
+    -DISPLAY_LABEL => $text,
+    -SCORE         => $score,
+    -FEATURE_TYPE  => $ftype,
+    -INTERDB_STABLE_ID    => 1,
+); 
+
+
+
+=head1 DESCRIPTION
+
+A MotifFeature object represents the genomic placement of a sequence motif.
+For example a transcription factor binding site motif associated with a
+position weight matrix. These are generally associated with AnnotatedFeatures
+of the corresponding FeatureType.
+
+
+=head1 SEE ALSO
+
+Bio::EnsEMBL::Funcgen::DBSQL::MotifFeatureAdaptor
+
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2009 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+
+=cut
+
+use strict;
+use warnings;
+
+package Bio::EnsEMBL::Funcgen::MotifFeature;
+
+use Bio::EnsEMBL::Utils::Argument qw( rearrange );
+use Bio::EnsEMBL::Utils::Exception qw( throw );
+use Bio::EnsEMBL::Feature;
+use Bio::EnsEMBL::Funcgen::Storable;
+
+
+use vars qw(@ISA);
+@ISA = qw(Bio::EnsEMBL::Feature Bio::EnsEMBL::Funcgen::Storable);
+
+
+=head2 new
+
+ 
+  Arg [-SCORE]          : (optional) int - Score assigned by analysis pipeline
+  Arg [-SLICE]          : Bio::EnsEMBL::Slice - The slice on which this feature is.
+  Arg [-BINDING_MATRIX] : Bio::EnsEMBL::Funcgen::BindingMatrix - Binding Matrix associated to this feature.
+  Arg [-START]          : int - The start coordinate of this feature relative to the start of the slice
+		                it is sitting on. Coordinates start at 1 and are inclusive.
+  Arg [-END]            : int -The end coordinate of this feature relative to the start of the slice
+	                    it is sitting on. Coordinates start at 1 and are inclusive.
+  Arg [-DISPLAY_LABEL]  : string - Display label for this feature
+  Arg [-STRAND]         : int - The orientation of this feature. Valid values are 1, -1 and 0.
+  Arg [-dbID]           : (optional) int - Internal database ID.
+  Arg [-ADAPTOR]        : (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor - Database adaptor.
+
+  Example    : my $feature = Bio::EnsEMBL::Funcgen::MotifFeature->new(
+                                									  -SLICE          => $chr_1_slice,
+								                                	  -START          => 1_000_000,
+                                									  -END            => 1_000_024,
+								                                	  -STRAND         => -1,
+                                									  -BINDING_MATRIX => $bm,
+								                                	  -DISPLAY_LABEL  => $text,
+                                  									  -SCORE          => $score,
+                                                                      -INTERDB_STABLE_ID     => 1,
+                                                                     );
+
+
+  Description: Constructor for MotifFeature objects.
+  Returntype : Bio::EnsEMBL::Funcgen::MotifFeature
+  Exceptions : Throws if BindingMatrix not valid
+  Caller     : General
+  Status     : Medium Risk
+
+=cut
+
+sub new {
+  my $caller = shift;
+	
+  my $class = ref($caller) || $caller;
+  my $self = $class->SUPER::new(@_);
+  my $bmatrix;
+  ($self->{'score'}, $bmatrix,   $self->{'display_label'}, $self->{'interdb_stable_id'}) 
+	= rearrange(['SCORE', 'BINDING_MATRIX', 'DISPLAY_LABEL', 'INTERDB_STABLE_ID'], @_);
+    
+
+  if(! (ref($bmatrix) && $bmatrix->isa('Bio::EnsEMBL::Funcgen::BindingMatrix'))){
+	throw('You must pass be a valid Bio::EnsEMBL::Funcgen::BindingMatrix');
+  }
+
+  $self->{'binding_matrix'} = $bmatrix;
+
+  return $self;
+}
+
+=head2 new_fast
+
+  Args       : Hashref with all internal attributes set
+  Example    : none
+  Description: Quick and dirty version of new. Only works if the calling code 
+               is very disciplined.
+  Returntype : Bio::EnsEMBL::Funcgen::MotifFeature
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub new_fast {
+  return bless ($_[1], $_[0]);
+}
+
+
+
+=head2 binding_matrix
+
+  Example    : my $bmatrix_name = $mfeat->binding_matrix->name;
+  Description: Getter for the BindingMatrix attribute for this feature.
+  Returntype : Bio::EnsEMBL::Funcgen::BindingMatrix
+  Exceptions : None
+  Caller     : General
+  Status     : At risk
+
+=cut
+
+sub binding_matrix{
+  return $_[0]->{'binding_matrix'};
+}
+
+=head2 score
+
+  Example    : my $score = $feature->score();
+  Description: Getter for the score attribute for this feature. 
+  Returntype : double
+  Exceptions : None
+  Caller     : General
+  Status     : Low Risk
+
+=cut
+
+sub score {
+  return $_[0]->{'score'};
+}
+
+
+=head2 display_label
+
+  Example    : my $label = $feature->display_label();
+  Description: Getter for the display label of this feature.
+  Returntype : str
+  Exceptions : None
+  Caller     : General
+  Status     : Medium Risk
+
+=cut
+
+sub display_label {
+  #If not set in new before store, a default is stored as:
+  #$mf->binding_matrix->feature_type->name.':'.$mf->binding_matrix->name();
+
+  return $_[0]->{'display_label'};
+}
+
+
+
+=head2 associated_annotated_features
+
+  Example    : my @associated_afs = @{$feature->associated_annotated_features()};
+  Description: Getter/Setter for associated AnntoatedFeatures.
+  Returntype : ARRAYREF of Bio::EnsEMBL::Funcgen:AnnotatedFeature objects
+  Exceptions : None
+  Caller     : General
+  Status     : At risk - may change to associated_transcript_factor_features
+
+=cut
+
+sub associated_annotated_features{
+  my ($self, $afs) = @_;
+  
+  #Lazy load as we don't want to have to do a join on all features when most will not have any
+
+ 
+  if(defined $afs){
+
+	if(ref($afs) eq 'ARRAY'){
+
+	  foreach my $af(@$afs){
+	
+		if( ! $af->isa('Bio::EnsEMBL::Funcgen::AnnotatedFeature') ){
+		  throw('You must pass and ARRAYREF of stored Bio::EnsEMBL::Funcgen::AnnotatedFeature objects');
+		}
+		#test is stored in adaptor
+	  }
+
+	  if(defined $self->{'associated_annotated_features'}){
+		warn('You are overwriting associated_annotated_features for the MotifFeature');
+		#we could simply add the new ones and make them NR.
+	  }
+
+	  $self->{'associated_annotated_features'} = $afs;
+	}
+	else{
+	  throw('You must pass and ARRAYREF of stored Bio::EnsEMBL::Funcgen::AnnotatedFeature objects');
+	}
+  }
+
+
+  if(! defined $self->{'associated_annotated_features'}){
+
+	if(defined $self->adaptor){
+	  $self->{'associated_annotated_features'} = 
+		$self->adaptor->db->get_AnnotatedFeatureAdaptor->fetch_all_by_associated_MotifFeature($self);
+	}
+  }
+  
+  #This has the potential to return undef, or an arrayref which may be empty.
+  return $self->{'associated_annotated_features'};
+}
+
+
+=head2 is_position_informative
+
+  Arg [1]    : int - 1-based position within the Motif
+  Example    : $mf->is_position_informative($pos);
+  Description: Indicates if a given position within the motif is highly informative
+  Returntype : boolean
+  Exceptions : throws if position out of bounds ( < 1 or > length of motif)
+  Caller     : General
+  Status     : At High risk
+
+=cut
+
+sub is_position_informative {
+  my ($self,$position) = (shift,shift);
+  throw "Need a position" if(!defined($position));
+  throw "Position out of bounds" if(($position<1) || ($position>$self->binding_matrix->length));
+  #if on the opposite strand, then need to reverse complement the position
+  if($self->strand < 0){ $position = $self->binding_matrix->length - $position + 1; }
+  return $self->binding_matrix->is_position_informative($position);
+}
+
+
+=head2 infer_variation_consequence
+
+  Arg [1]    : Bio::EnsEMBL::Variation::VariationFeature
+  Arg [2]    : boolean - 1 if results in linear scale (default is log scale)
+  Example    : my $vfs = $vf_adaptor->fetch_all_by_Slice($slice_adaptor->fetch_by_region('toplevel',$mf->seq_region_name,$mf->start,$mf->end,$mf->strand));
+               foreach my $vf (@{$vfs}){
+                   print $mf->infer_variation_consequence($vf)."\n";
+               }
+
+  Description: Calculates the potential influence of a given variation in a motif feature.
+               Returns a value between -100% (lost) and +100% (gain) indicating the difference 
+               in strength between the motif in the reference and after the variation.
+
+               The variation feature slice needs to be the motif feature, including the strand
+  Returntype : float
+  Exceptions : throws if argument is not a  Bio::EnsEMBL::Variation::VariationFeature
+               throws if the variation feature is not contained in the motif feature
+  Caller     : General
+  Status     : At High risk
+
+=cut
+
+sub infer_variation_consequence{
+  my ($self, $vf, $linear) = (shift, shift, shift);
+
+  if(! $vf->isa('Bio::EnsEMBL::Variation::VariationFeature')){
+    throw "We expect a Bio::EnsEMBL::Variation::VariationFeature object, not a ".$vf->class;
+  }
+
+  #See if these checks are required or if there are more efficient ways to do the checks...
+  #if(($self->slice->seq_region_name ne $vf->slice->seq_region_name) ||
+  #   ($self->slice->start != $vf->slice->start) || 
+  #   ($self->slice->end != $vf->slice->end) ){
+  #  throw "Variation and Motif are on distinct slices";
+  #}
+  #if(!(($vf->start >= $self->start) && ($vf->end <= $self->end ))){
+  #  throw "Variation should be entirely contained in the Motif";
+  #}
+
+  if( ($vf->start < 1) || ($vf->end > $self->binding_matrix->length)){ throw "Variation not entirely contained in the motif feature"; }
+
+  if(!($vf->allele_string =~ /^[ACTG]\/[ACTG]$/)){ throw "Currently only SNPs are supported"; }
+
+  my $ref_seq = $self->seq;
+
+  my $variant = $vf->allele_string;
+  $variant =~ s/^.*\///;
+  $variant =~ s/\s*$//;
+
+  my ($vf_start,$vf_end) = ($vf->start, $vf->end);
+  if($vf->strand == -1){
+    #Needed for insertions
+    $variant = reverse($variant);
+    $variant =~ tr/ACGT/TGCA/;
+  }
+  my $var_seq = substr($ref_seq,0, $vf_start - 1).$variant.substr($ref_seq, $vf_start+length($variant)-1);
+
+  my $bm = $self->{'binding_matrix'};
+  return 100 * ($bm->relative_affinity($var_seq,$linear) - $bm->relative_affinity($ref_seq,$linear));
+  
+}
+
+=head2 interdb_stable_id
+
+  Arg [1]    : (optional) int - stable_id e.g 1
+  Example    : my $idb_sid = $feature->interdb_stable_id();
+  Description: Getter for the interdb_stable_id attribute for this feature.
+               This is simply to avoid using internal db IDs for inter DB linking
+  Returntype : int
+  Exceptions : None
+  Caller     : General
+  Status     : At Risk
+
+=cut
+
+sub interdb_stable_id {
+  return $_[0]->{'interdb_stable_id'};
+}
+
+
+
+1;
+