Mercurial > repos > dvanzessen > vep_emc
diff dir_plugins/Downstream.pm @ 3:49397129aec0 draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:20:39 -0400 |
| parents | e545d0a25ffe |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dir_plugins/Downstream.pm Mon Jul 15 05:20:39 2019 -0400 @@ -0,0 +1,137 @@ +=head1 LICENSE + +Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +Copyright [2016-2018] EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=head1 CONTACT + + Ensembl <http://www.ensembl.org/info/about/contact/index.html> + +=cut + +=head1 NAME + + Downstream + +=head1 SYNOPSIS + + mv Downstream.pm ~/.vep/Plugins + ./vep -i variations.vcf --plugin Downstream + +=head1 DESCRIPTION + + This is a plugin for the Ensembl Variant Effect Predictor (VEP) that + predicts the downstream effects of a frameshift variant on the protein + sequence of a transcript. It provides the predicted downstream protein + sequence (including any amino acids overlapped by the variant itself), + and the change in length relative to the reference protein. + + Note that changes in splicing are not predicted - only the existing + translateable (i.e. spliced) sequence is used as a source of + translation. Any variants with a splice site consequence type are + ignored. + +=cut + +package Downstream; + +use strict; +use warnings; + +use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin; +use POSIX qw(ceil); + +use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); + +sub version { + return '2.3'; +} + +sub feature_types { + return ['Transcript']; +} + +sub variant_feature_types { + return ['VariationFeature']; +} + +sub get_header_info { + return { + DownstreamProtein => "Predicted downstream translation for frameshift mutations", + ProteinLengthChange => "Predicted change in protein product length", + }; +} + +sub run { + my ($self, $tva) = @_; + + my @ocs = @{$tva->get_all_OverlapConsequences}; + + if(grep {$_->SO_term eq 'frameshift_variant'} @ocs) { + + # can't do it for splice sites + return {} if grep {$_->SO_term =~ /splice/} @ocs; + + my $tv = $tva->transcript_variation; + my $tr = $tv->transcript; + my $cds_seq = defined($tr->{_variation_effect_feature_cache}) ? $tr->{_variation_effect_feature_cache}->{translateable_seq} : $tr->translateable_seq; + + # get the sequence to translate + my ($low_pos, $high_pos) = sort {$a <=> $b} ($tv->cds_start, $tv->cds_end); + my $is_insertion = $tv->cds_start > $tv->cds_end ? 1 : 0; + my $last_complete_codon = (ceil($low_pos / 3) - 1) * 3; + my $before_var_seq = substr $cds_seq, $last_complete_codon, $low_pos - $last_complete_codon - ($is_insertion ? 0 : 1); + my $after_var_seq = substr $cds_seq, $high_pos - ($is_insertion ? 1 : 0); + my $to_translate = $before_var_seq.$tva->feature_seq.$after_var_seq; + my $three_prime_utr_seq = $tr->three_prime_utr->seq() if ($tr->three_prime_utr); + $to_translate = $to_translate.$three_prime_utr_seq if ($three_prime_utr_seq); + $to_translate =~ s/\-//g; + + # create a bioperl object + my $codon_seq = Bio::Seq->new( + -seq => $to_translate, + -moltype => 'dna', + -alphabet => 'dna' + ); + + # get codon table + my $codon_table; + if(defined($tr->{_variation_effect_feature_cache})) { + $codon_table = $tr->{_variation_effect_feature_cache}->{codon_table} || 1; + } + else { + my ($attrib) = @{$tr->slice->get_all_Attributes('codon_table')}; + $codon_table = $attrib ? $attrib->value || 1 : 1; + } + + # translate + my $new_pep = $codon_seq->translate(undef, undef, undef, $codon_table)->seq(); + $new_pep =~ s/\*.*//; + + # compare lengths + my $translation = defined($tr->{_variation_effect_feature_cache}) && defined($tr->{_variation_effect_feature_cache}->{peptide}) ? $tr->{_variation_effect_feature_cache}->{peptide} : $tr->translation->seq; + my $new_length = ($tv->translation_start < $tv->translation_end ? $tv->translation_start : $tv->translation_end) + length($new_pep); + + return { + DownstreamProtein => $new_pep, + ProteinLengthChange => $new_length - length($translation), + }; + } + + return {}; +} + +1; +
