| 0 | 1 =head1 LICENSE | 
|  | 2 | 
|  | 3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute | 
|  | 4 Copyright [2016-2018] EMBL-European Bioinformatics Institute | 
|  | 5 | 
|  | 6 Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 7 you may not use this file except in compliance with the License. | 
|  | 8 You may obtain a copy of the License at | 
|  | 9 | 
|  | 10    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 11 | 
|  | 12 Unless required by applicable law or agreed to in writing, software | 
|  | 13 distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 15 See the License for the specific language governing permissions and | 
|  | 16 limitations under the License. | 
|  | 17 | 
|  | 18 =head1 CONTACT | 
|  | 19 | 
|  | 20  Ensembl <dev@ensembl.org> | 
|  | 21 | 
|  | 22 =cut | 
|  | 23 | 
|  | 24 =head1 NAME | 
|  | 25 | 
|  | 26  SpliceRegion | 
|  | 27 | 
|  | 28 =head1 SYNOPSIS | 
|  | 29 | 
|  | 30  mv SpliceRegion.pm ~/.vep/Plugins | 
|  | 31  ./vep -i variations.vcf --plugin SpliceRegion | 
|  | 32 | 
|  | 33  To only show the additional consequence extended_intronic_splice_region_variant, use: | 
|  | 34  ./vep -i variations.vcf --plugin SpliceRegion,Extended | 
|  | 35 | 
|  | 36 =head1 DESCRIPTION | 
|  | 37 | 
|  | 38  This is a plugin for the Ensembl Variant Effect Predictor (VEP) that | 
|  | 39  provides more granular predictions of splicing effects. | 
|  | 40 | 
|  | 41  Three additional terms may be added: | 
|  | 42 | 
|  | 43  # splice_donor_5th_base_variant : variant falls in the 5th base after the splice donor junction (5' end of intron) | 
|  | 44 | 
|  | 45              v | 
|  | 46  ...EEEEEIIIIIIIIII... | 
|  | 47 | 
|  | 48  (E = exon, I = intron, v = variant location) | 
|  | 49 | 
|  | 50  # splice_donor_region_variant : variant falls in region between 3rd and 6th base after splice junction (5' end of intron) | 
|  | 51 | 
|  | 52            vv vvv | 
|  | 53  ...EEEEEIIIIIIIIII... | 
|  | 54 | 
|  | 55  # splice_polypyrimidine_tract_variant : variant falls in polypyrimidine tract at 3' end of intron, between 17 and 3 bases from the end | 
|  | 56 | 
|  | 57       vvvvvvvvvvvvvvv | 
|  | 58  ...IIIIIIIIIIIIIIIIIIIIEEEEE... | 
|  | 59 | 
|  | 60 | 
|  | 61 =cut | 
|  | 62 | 
|  | 63 package SpliceRegion; | 
|  | 64 | 
|  | 65 use strict; | 
|  | 66 use warnings; | 
|  | 67 | 
|  | 68 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap); | 
|  | 69 use Bio::EnsEMBL::Variation::Utils::Constants qw(%OVERLAP_CONSEQUENCES); | 
|  | 70 | 
|  | 71 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); | 
|  | 72 | 
|  | 73 my %TERM_RANK = ( | 
|  | 74   splice_donor_5th_base_variant => 1, | 
|  | 75   splice_donor_region_variant => 2, | 
|  | 76   splice_polypyrimidine_tract_variant => 3, | 
|  | 77   extended_intronic_splice_region_variant_5prime => 4, | 
|  | 78   extended_intronic_splice_region_variant_3prime => 5, | 
|  | 79 ); | 
|  | 80 | 
|  | 81 sub feature_types { | 
|  | 82   return ['Transcript']; | 
|  | 83 } | 
|  | 84 | 
|  | 85 sub get_header_info { | 
|  | 86   return { | 
|  | 87     SpliceRegion => "SpliceRegion predictions", | 
|  | 88   }; | 
|  | 89 } | 
|  | 90 | 
|  | 91 sub run { | 
|  | 92   my ($self, $tva) = @_; | 
|  | 93 | 
|  | 94   my $vf = $tva->variation_feature; | 
|  | 95   my ($vf_start, $vf_end) = ($vf->{start}, $vf->{end}); | 
|  | 96 | 
|  | 97   my $is_insertion = 0; | 
|  | 98   if($vf_start > $vf_end) { | 
|  | 99     ($vf_start, $vf_end) = ($vf_end, $vf_start); | 
|  | 100     $is_insertion = 1; | 
|  | 101   } | 
|  | 102 | 
|  | 103   my $tv = $tva->transcript_variation; | 
|  | 104   my $tr = $tv->transcript; | 
|  | 105   my $vf_tr_seq = $tva->feature_seq; | 
|  | 106 | 
|  | 107   # define some variables depending on transcript strand | 
|  | 108   my ($strand_mod, $donor_coord, $acc_coord); | 
|  | 109   if($tr->strand > 0) { | 
|  | 110     $strand_mod = 1; | 
|  | 111     $donor_coord = 'start'; | 
|  | 112     $acc_coord = 'end'; | 
|  | 113   } | 
|  | 114   else { | 
|  | 115     $strand_mod = -1; | 
|  | 116     $donor_coord = 'end'; | 
|  | 117     $acc_coord = 'start'; | 
|  | 118   } | 
|  | 119 | 
|  | 120   my %results; | 
|  | 121 | 
|  | 122   my @terms; | 
|  | 123   my $extended_flag = lc($self->params->[0] || "") eq 'extended'; | 
|  | 124   for my $intron(@{$tv->_overlapped_introns($vf_start, $vf_end)}) { | 
|  | 125 | 
|  | 126     # define terms to check for and their regions | 
|  | 127     @terms = ( | 
|  | 128       { | 
|  | 129         term => 'splice_donor_5th_base_variant', | 
|  | 130         region => [$intron->{$donor_coord} + (4 * $strand_mod), $intron->{$donor_coord} + (4 * $strand_mod)] | 
|  | 131       }, | 
|  | 132       { | 
|  | 133         term => 'splice_donor_region_variant', | 
|  | 134         region => [$intron->{$donor_coord} + (2 * $strand_mod), $intron->{$donor_coord} + (5 * $strand_mod)] | 
|  | 135       }, | 
|  | 136       { | 
|  | 137         term => 'splice_polypyrimidine_tract_variant', | 
|  | 138         region => [$intron->{$acc_coord} + (-16 * $strand_mod), $intron->{$acc_coord} + (-2 * $strand_mod)], | 
|  | 139         # allele_specific_mod => { | 
|  | 140         #   A => '_to_purine', | 
|  | 141         #   G => '_to_purine', | 
|  | 142         # } | 
|  | 143       }, | 
|  | 144     ) unless $extended_flag; | 
|  | 145 | 
|  | 146     @terms = ( | 
|  | 147       { | 
|  | 148         term => 'extended_intronic_splice_region_variant_5prime', | 
|  | 149         region => [$intron->{$donor_coord}, $intron->{$donor_coord} + (9 * $strand_mod)] | 
|  | 150       }, | 
|  | 151       { | 
|  | 152         term => 'extended_intronic_splice_region_variant_3prime', | 
|  | 153         region => [$intron->{$acc_coord} + (-9 * $strand_mod), $intron->{$acc_coord} ], | 
|  | 154         # allele_specific_mod => { | 
|  | 155         #   A => '_to_purine', | 
|  | 156         #   G => '_to_purine', | 
|  | 157         # } | 
|  | 158       }, | 
|  | 159     ) if $extended_flag; | 
|  | 160 | 
|  | 161 | 
|  | 162     foreach my $term_hash(@terms) { | 
|  | 163       my $pass = overlap($vf_start, $vf_end, sort {$a <=> $b} @{$term_hash->{region}}); | 
|  | 164       if($pass) { | 
|  | 165         my $term = $term_hash->{term}; | 
|  | 166         $term = 'extended_intronic_splice_region_variant' if $extended_flag; | 
|  | 167 | 
|  | 168         # if(my $allele_specific_mods = $term_hash->{allele_specific_mod}) { | 
|  | 169         #   $term .= $allele_specific_mods->{$vf_tr_seq} || ''; | 
|  | 170         # } | 
|  | 171 | 
|  | 172         $results{$term}++; | 
|  | 173         last; | 
|  | 174       } | 
|  | 175     } | 
|  | 176   } | 
|  | 177 | 
|  | 178   return {} unless %results; | 
|  | 179 | 
|  | 180   return { SpliceRegion => [sort {$TERM_RANK{$a} <=> $TERM_RANK{$b}} keys %results]}; | 
|  | 181 } | 
|  | 182 | 
|  | 183 1; | 
|  | 184 |