diff dir_plugins/SpliceRegion.pm @ 0:e545d0a25ffe draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:17:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dir_plugins/SpliceRegion.pm	Mon Jul 15 05:17:17 2019 -0400
@@ -0,0 +1,184 @@
+=head1 LICENSE
+
+Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+Copyright [2016-2018] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=head1 CONTACT
+
+ Ensembl <dev@ensembl.org>
+  
+=cut
+
+=head1 NAME
+
+ SpliceRegion
+
+=head1 SYNOPSIS
+
+ mv SpliceRegion.pm ~/.vep/Plugins
+ ./vep -i variations.vcf --plugin SpliceRegion
+ 
+ To only show the additional consequence extended_intronic_splice_region_variant, use: 
+ ./vep -i variations.vcf --plugin SpliceRegion,Extended
+
+=head1 DESCRIPTION
+
+ This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
+ provides more granular predictions of splicing effects.
+
+ Three additional terms may be added:
+
+ # splice_donor_5th_base_variant : variant falls in the 5th base after the splice donor junction (5' end of intron)
+
+             v
+ ...EEEEEIIIIIIIIII...
+
+ (E = exon, I = intron, v = variant location)
+
+ # splice_donor_region_variant : variant falls in region between 3rd and 6th base after splice junction (5' end of intron)
+
+           vv vvv
+ ...EEEEEIIIIIIIIII...
+
+ # splice_polypyrimidine_tract_variant : variant falls in polypyrimidine tract at 3' end of intron, between 17 and 3 bases from the end
+
+      vvvvvvvvvvvvvvv
+ ...IIIIIIIIIIIIIIIIIIIIEEEEE...
+
+
+=cut
+
+package SpliceRegion;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
+use Bio::EnsEMBL::Variation::Utils::Constants qw(%OVERLAP_CONSEQUENCES);
+
+use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
+
+my %TERM_RANK = (
+  splice_donor_5th_base_variant => 1,
+  splice_donor_region_variant => 2,
+  splice_polypyrimidine_tract_variant => 3,
+  extended_intronic_splice_region_variant_5prime => 4,
+  extended_intronic_splice_region_variant_3prime => 5,
+);
+
+sub feature_types {
+  return ['Transcript'];
+}
+
+sub get_header_info {
+  return {
+    SpliceRegion => "SpliceRegion predictions",
+  };
+}
+
+sub run {
+  my ($self, $tva) = @_;
+
+  my $vf = $tva->variation_feature;
+  my ($vf_start, $vf_end) = ($vf->{start}, $vf->{end});
+
+  my $is_insertion = 0;
+  if($vf_start > $vf_end) {
+    ($vf_start, $vf_end) = ($vf_end, $vf_start);
+    $is_insertion = 1;
+  }
+
+  my $tv = $tva->transcript_variation;
+  my $tr = $tv->transcript;
+  my $vf_tr_seq = $tva->feature_seq;
+
+  # define some variables depending on transcript strand
+  my ($strand_mod, $donor_coord, $acc_coord);
+  if($tr->strand > 0) {
+    $strand_mod = 1;
+    $donor_coord = 'start';
+    $acc_coord = 'end';
+  }
+  else {
+    $strand_mod = -1;
+    $donor_coord = 'end';
+    $acc_coord = 'start';
+  }
+
+  my %results;
+
+  my @terms;
+  my $extended_flag = lc($self->params->[0] || "") eq 'extended';
+  for my $intron(@{$tv->_overlapped_introns($vf_start, $vf_end)}) {
+
+    # define terms to check for and their regions
+    @terms = (
+      {
+        term => 'splice_donor_5th_base_variant',
+        region => [$intron->{$donor_coord} + (4 * $strand_mod), $intron->{$donor_coord} + (4 * $strand_mod)]
+      },
+      {
+        term => 'splice_donor_region_variant',
+        region => [$intron->{$donor_coord} + (2 * $strand_mod), $intron->{$donor_coord} + (5 * $strand_mod)]
+      },
+      {
+        term => 'splice_polypyrimidine_tract_variant',
+        region => [$intron->{$acc_coord} + (-16 * $strand_mod), $intron->{$acc_coord} + (-2 * $strand_mod)],
+        # allele_specific_mod => {
+        #   A => '_to_purine',
+        #   G => '_to_purine',
+        # }
+      },
+    ) unless $extended_flag;
+    
+    @terms = (
+      {
+        term => 'extended_intronic_splice_region_variant_5prime',
+        region => [$intron->{$donor_coord}, $intron->{$donor_coord} + (9 * $strand_mod)]
+      },
+      {
+        term => 'extended_intronic_splice_region_variant_3prime',
+        region => [$intron->{$acc_coord} + (-9 * $strand_mod), $intron->{$acc_coord} ],
+        # allele_specific_mod => {
+        #   A => '_to_purine',
+        #   G => '_to_purine',
+        # }
+      },
+    ) if $extended_flag;
+    
+
+    foreach my $term_hash(@terms) {
+      my $pass = overlap($vf_start, $vf_end, sort {$a <=> $b} @{$term_hash->{region}});
+      if($pass) {
+        my $term = $term_hash->{term};
+        $term = 'extended_intronic_splice_region_variant' if $extended_flag;
+
+        # if(my $allele_specific_mods = $term_hash->{allele_specific_mod}) {
+        #   $term .= $allele_specific_mods->{$vf_tr_seq} || '';
+        # }
+
+        $results{$term}++;
+        last;
+      }
+    }
+  }
+
+  return {} unless %results;
+
+  return { SpliceRegion => [sort {$TERM_RANK{$a} <=> $TERM_RANK{$b}} keys %results]};
+}
+
+1;
+