diff variant_effect_predictor/Bio/EnsEMBL/Variation/MotifFeatureVariationAllele.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/MotifFeatureVariationAllele.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,230 @@
+=head1 LICENSE
+
+ Copyright (c) 1999-2012 The European Bioinformatics Institute and
+ Genome Research Limited.  All rights reserved.
+
+ This software is distributed under a modified Apache license.
+ For license details, please see
+
+   http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+ Please email comments or questions to the public Ensembl
+ developers list at <dev@ensembl.org>.
+
+ Questions may also be sent to the Ensembl help desk at
+ <helpdesk@ensembl.org>.
+
+=cut
+
+package Bio::EnsEMBL::Variation::MotifFeatureVariationAllele;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
+use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
+
+use base qw(Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele);
+
+sub new_fast {
+    my ($self, $hashref) = @_;
+    
+    # swap a motif_feature_variation argument for a variation_feature_overlap one
+
+    if ($hashref->{motif_feature_variation}) {
+        $hashref->{variation_feature_overlap} = delete $hashref->{motif_feature_variation};
+    }
+    
+    # and call the superclass
+
+    return $self->SUPER::new_fast($hashref);
+}
+
+=head2 motif_feature_variation
+
+  Description: Get/set the associated MotifFeatureVariation
+  Returntype : Bio::EnsEMBL::Variation::MotifFeatureVariation
+  Status     : At Risk
+
+=cut
+
+sub motif_feature_variation {
+    my ($self, $mfv) = shift;
+    assert_ref($mfv, 'Bio::EnsEMBL::Variation::MotifFeatureVariation') if $mfv;
+    return $self->variation_feature_overlap($mfv);
+}
+
+=head2 motif_feature
+
+  Description: Get/set the associated MotifFeature
+  Returntype : Bio::EnsEMBL::Funcgen::MotifFeature
+  Status     : At Risk
+
+=cut
+
+sub motif_feature {
+    my $self = shift;
+    return $self->motif_feature_variation->motif_feature;
+}
+
+=head2 motif_start
+
+  Description: Get the (1-based) relative position of the variation feature start in the motif
+  Returntype : integer
+  Status     : At Risk
+
+=cut
+
+sub motif_start {
+
+    my $self = shift;
+
+    my $mf = $self->motif_feature;
+    my $vf = $self->variation_feature;
+    
+    return undef unless defined $vf->seq_region_start && defined $mf->seq_region_start;
+   
+    my $mf_start = $vf->seq_region_start - $mf->seq_region_start + 1;
+
+    # adjust if the motif is on the reverse strand
+
+    $mf_start = $mf->binding_matrix->length - $mf_start + 1 if $mf->strand < 0;
+    
+    # check that we're in bounds
+
+    return undef if $mf_start > $mf->length;
+
+    return $mf_start;
+}
+
+=head2 motif_end
+
+  Description: Get the (1-based) relative position of the variation feature end in the motif
+  Returntype : integer
+  Status     : At Risk
+
+=cut
+
+sub motif_end {
+
+    my $self = shift;
+
+    my $mf = $self->motif_feature;
+    my $vf = $self->variation_feature;
+    
+    return undef unless defined $vf->seq_region_end && defined $mf->seq_region_start;
+   
+    my $mf_end = $vf->seq_region_end - $mf->seq_region_start + 1;
+
+    # adjust if the motif is on the reverse strand
+
+    $mf_end = $mf->binding_matrix->length - $mf_end + 1 if $mf->strand < 0;
+    
+    # check that we're in bounds
+
+    return undef if $mf_end < 1;
+
+    return $mf_end;
+}
+
+=head2 in_informative_position
+
+  Description: Identify if the variation feature falls in a high information position of the motif
+  Returntype : boolean
+  Status     : At Risk
+
+=cut
+
+sub in_informative_position {
+    my $self = shift;
+
+    # we can only call this for true SNPs
+
+    my $vf = $self->variation_feature;
+
+    unless (($vf->start == $vf->end) && ($self->variation_feature_seq ne '-')) {
+        return undef;
+    }
+
+    # get the 1-based position
+
+    my $start = $self->motif_start;
+
+    return undef unless defined $start && $start >= 1 && $start <= $self->motif_feature->length;
+
+    return $self->motif_feature->binding_matrix->is_position_informative($start);
+}
+
+=head2 motif_score_delta
+
+  Description: Calculate the difference in motif score between the reference and variant sequences
+  Returntype : float
+  Status     : At Risk
+
+=cut
+
+sub motif_score_delta {
+
+    my $self    = shift;
+    my $linear  = shift;
+    
+    unless ($self->{motif_score_delta}) {
+
+        my $vf = $self->motif_feature_variation->variation_feature;
+        my $mf = $self->motif_feature;
+
+        my $allele_seq      = $self->feature_seq;
+        my $ref_allele_seq  = $self->motif_feature_variation->get_reference_MotifFeatureVariationAllele->feature_seq;
+
+        if ($allele_seq eq '-' || 
+            $ref_allele_seq eq '-' || 
+            length($allele_seq) != length($ref_allele_seq)) {
+            # we can't call a score because the sequence will change length
+            return undef;
+        }
+
+        my ($mf_start, $mf_end) = ($self->motif_start, $self->motif_end);
+
+        return undef unless defined $mf_start && defined $mf_end;
+
+        my $mf_seq = $self->motif_feature_variation->_motif_feature_seq;
+        my $mf_seq_length = length($mf_seq);
+        
+        # trim allele_seq
+        if($mf_start < 1) {
+            $allele_seq = substr($allele_seq, 1 - $mf_start);
+            $mf_start = 1;
+        }
+        
+        if($mf_end > $mf_seq_length) {
+            $allele_seq = substr($allele_seq, 0, $mf_seq_length - $mf_start + 1);
+            $mf_end = $mf_seq_length;
+        }
+
+        my $var_len = length($allele_seq);
+
+        return undef if $var_len > $mf->length;
+
+        my $matrix = $mf->binding_matrix;
+
+        # get the binding affinity of the reference sequence
+        my $ref_affinity = $matrix->relative_affinity($mf_seq, $linear);
+
+        # splice in the variant sequence (0-based)
+        substr($mf_seq, $mf_start - 1, $var_len) = $allele_seq;
+        
+        # check length hasn't changed
+        return undef if length($mf_seq) != $mf_seq_length;
+
+        # and get the affinity of the variant sequence
+        my $var_affinity = $matrix->relative_affinity($mf_seq, $linear);
+
+        $self->{motif_score_delta} = ($var_affinity - $ref_affinity);
+    }
+
+    return $self->{motif_score_delta};
+}
+
+1;