diff dir_plugins/Downstream.pm @ 0:e545d0a25ffe draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:17:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dir_plugins/Downstream.pm	Mon Jul 15 05:17:17 2019 -0400
@@ -0,0 +1,137 @@
+=head1 LICENSE
+
+Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+Copyright [2016-2018] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=head1 CONTACT
+
+ Ensembl <http://www.ensembl.org/info/about/contact/index.html>
+    
+=cut
+
+=head1 NAME
+
+ Downstream
+
+=head1 SYNOPSIS
+
+ mv Downstream.pm ~/.vep/Plugins
+ ./vep -i variations.vcf --plugin Downstream
+
+=head1 DESCRIPTION
+
+ This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
+ predicts the downstream effects of a frameshift variant on the protein
+ sequence of a transcript. It provides the predicted downstream protein
+ sequence (including any amino acids overlapped by the variant itself),
+ and the change in length relative to the reference protein.
+ 
+ Note that changes in splicing are not predicted - only the existing
+ translateable (i.e. spliced) sequence is used as a source of
+ translation. Any variants with a splice site consequence type are
+ ignored.
+
+=cut
+
+package Downstream;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin;
+use POSIX qw(ceil);
+
+use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
+
+sub version {
+    return '2.3';
+}
+
+sub feature_types {
+    return ['Transcript'];
+}
+
+sub variant_feature_types {
+    return ['VariationFeature'];
+}
+
+sub get_header_info {
+    return {
+        DownstreamProtein   => "Predicted downstream translation for frameshift mutations",
+        ProteinLengthChange => "Predicted change in protein product length",
+    };
+}
+
+sub run {
+    my ($self, $tva) = @_;
+    
+    my @ocs = @{$tva->get_all_OverlapConsequences};
+    
+    if(grep {$_->SO_term eq 'frameshift_variant'} @ocs) {
+        
+        # can't do it for splice sites
+        return {} if grep {$_->SO_term =~ /splice/} @ocs;
+        
+        my $tv = $tva->transcript_variation;
+        my $tr = $tv->transcript;
+        my $cds_seq = defined($tr->{_variation_effect_feature_cache}) ? $tr->{_variation_effect_feature_cache}->{translateable_seq} : $tr->translateable_seq;
+        
+        # get the sequence to translate
+        my ($low_pos, $high_pos) = sort {$a <=> $b} ($tv->cds_start, $tv->cds_end);
+        my $is_insertion         = $tv->cds_start > $tv->cds_end ? 1 : 0;
+        my $last_complete_codon  = (ceil($low_pos / 3) - 1) * 3;
+        my $before_var_seq       = substr $cds_seq, $last_complete_codon, $low_pos - $last_complete_codon - ($is_insertion ? 0 : 1);
+        my $after_var_seq        = substr $cds_seq, $high_pos - ($is_insertion ? 1 : 0);
+        my $to_translate         = $before_var_seq.$tva->feature_seq.$after_var_seq;
+        my $three_prime_utr_seq  = $tr->three_prime_utr->seq() if ($tr->three_prime_utr);
+        $to_translate            = $to_translate.$three_prime_utr_seq if ($three_prime_utr_seq);
+        $to_translate            =~ s/\-//g;
+        
+        # create a bioperl object
+        my $codon_seq = Bio::Seq->new(
+          -seq      => $to_translate,
+          -moltype  => 'dna',
+          -alphabet => 'dna'
+        );
+        
+        # get codon table
+        my $codon_table;
+        if(defined($tr->{_variation_effect_feature_cache})) {
+            $codon_table = $tr->{_variation_effect_feature_cache}->{codon_table} || 1;
+        }
+        else {
+            my ($attrib) = @{$tr->slice->get_all_Attributes('codon_table')};
+            $codon_table = $attrib ? $attrib->value || 1 : 1;
+        }
+        
+        # translate
+        my $new_pep = $codon_seq->translate(undef, undef, undef, $codon_table)->seq();
+        $new_pep =~ s/\*.*//;
+        
+        # compare lengths
+        my $translation = defined($tr->{_variation_effect_feature_cache}) && defined($tr->{_variation_effect_feature_cache}->{peptide}) ? $tr->{_variation_effect_feature_cache}->{peptide} : $tr->translation->seq;
+        my $new_length = ($tv->translation_start < $tv->translation_end ? $tv->translation_start : $tv->translation_end) + length($new_pep);
+        
+        return {
+            DownstreamProtein   => $new_pep,
+            ProteinLengthChange => $new_length - length($translation),
+        };
+    }
+
+    return {};
+}
+
+1;
+