|
0
|
1 =head1 LICENSE
|
|
|
2
|
|
|
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
|
|
|
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
|
|
|
5
|
|
|
6 Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
7 you may not use this file except in compliance with the License.
|
|
|
8 You may obtain a copy of the License at
|
|
|
9
|
|
|
10 http://www.apache.org/licenses/LICENSE-2.0
|
|
|
11
|
|
|
12 Unless required by applicable law or agreed to in writing, software
|
|
|
13 distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
15 See the License for the specific language governing permissions and
|
|
|
16 limitations under the License.
|
|
|
17
|
|
|
18 =head1 CONTACT
|
|
|
19
|
|
|
20 Ensembl <dev@ensembl.org>
|
|
|
21
|
|
|
22 =cut
|
|
|
23
|
|
|
24 =head1 NAME
|
|
|
25
|
|
|
26 SpliceRegion
|
|
|
27
|
|
|
28 =head1 SYNOPSIS
|
|
|
29
|
|
|
30 mv SpliceRegion.pm ~/.vep/Plugins
|
|
|
31 ./vep -i variations.vcf --plugin SpliceRegion
|
|
|
32
|
|
|
33 To only show the additional consequence extended_intronic_splice_region_variant, use:
|
|
|
34 ./vep -i variations.vcf --plugin SpliceRegion,Extended
|
|
|
35
|
|
|
36 =head1 DESCRIPTION
|
|
|
37
|
|
|
38 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
|
|
|
39 provides more granular predictions of splicing effects.
|
|
|
40
|
|
|
41 Three additional terms may be added:
|
|
|
42
|
|
|
43 # splice_donor_5th_base_variant : variant falls in the 5th base after the splice donor junction (5' end of intron)
|
|
|
44
|
|
|
45 v
|
|
|
46 ...EEEEEIIIIIIIIII...
|
|
|
47
|
|
|
48 (E = exon, I = intron, v = variant location)
|
|
|
49
|
|
|
50 # splice_donor_region_variant : variant falls in region between 3rd and 6th base after splice junction (5' end of intron)
|
|
|
51
|
|
|
52 vv vvv
|
|
|
53 ...EEEEEIIIIIIIIII...
|
|
|
54
|
|
|
55 # splice_polypyrimidine_tract_variant : variant falls in polypyrimidine tract at 3' end of intron, between 17 and 3 bases from the end
|
|
|
56
|
|
|
57 vvvvvvvvvvvvvvv
|
|
|
58 ...IIIIIIIIIIIIIIIIIIIIEEEEE...
|
|
|
59
|
|
|
60
|
|
|
61 =cut
|
|
|
62
|
|
|
63 package SpliceRegion;
|
|
|
64
|
|
|
65 use strict;
|
|
|
66 use warnings;
|
|
|
67
|
|
|
68 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
|
|
|
69 use Bio::EnsEMBL::Variation::Utils::Constants qw(%OVERLAP_CONSEQUENCES);
|
|
|
70
|
|
|
71 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
|
|
|
72
|
|
|
73 my %TERM_RANK = (
|
|
|
74 splice_donor_5th_base_variant => 1,
|
|
|
75 splice_donor_region_variant => 2,
|
|
|
76 splice_polypyrimidine_tract_variant => 3,
|
|
|
77 extended_intronic_splice_region_variant_5prime => 4,
|
|
|
78 extended_intronic_splice_region_variant_3prime => 5,
|
|
|
79 );
|
|
|
80
|
|
|
81 sub feature_types {
|
|
|
82 return ['Transcript'];
|
|
|
83 }
|
|
|
84
|
|
|
85 sub get_header_info {
|
|
|
86 return {
|
|
|
87 SpliceRegion => "SpliceRegion predictions",
|
|
|
88 };
|
|
|
89 }
|
|
|
90
|
|
|
91 sub run {
|
|
|
92 my ($self, $tva) = @_;
|
|
|
93
|
|
|
94 my $vf = $tva->variation_feature;
|
|
|
95 my ($vf_start, $vf_end) = ($vf->{start}, $vf->{end});
|
|
|
96
|
|
|
97 my $is_insertion = 0;
|
|
|
98 if($vf_start > $vf_end) {
|
|
|
99 ($vf_start, $vf_end) = ($vf_end, $vf_start);
|
|
|
100 $is_insertion = 1;
|
|
|
101 }
|
|
|
102
|
|
|
103 my $tv = $tva->transcript_variation;
|
|
|
104 my $tr = $tv->transcript;
|
|
|
105 my $vf_tr_seq = $tva->feature_seq;
|
|
|
106
|
|
|
107 # define some variables depending on transcript strand
|
|
|
108 my ($strand_mod, $donor_coord, $acc_coord);
|
|
|
109 if($tr->strand > 0) {
|
|
|
110 $strand_mod = 1;
|
|
|
111 $donor_coord = 'start';
|
|
|
112 $acc_coord = 'end';
|
|
|
113 }
|
|
|
114 else {
|
|
|
115 $strand_mod = -1;
|
|
|
116 $donor_coord = 'end';
|
|
|
117 $acc_coord = 'start';
|
|
|
118 }
|
|
|
119
|
|
|
120 my %results;
|
|
|
121
|
|
|
122 my @terms;
|
|
|
123 my $extended_flag = lc($self->params->[0] || "") eq 'extended';
|
|
|
124 for my $intron(@{$tv->_overlapped_introns($vf_start, $vf_end)}) {
|
|
|
125
|
|
|
126 # define terms to check for and their regions
|
|
|
127 @terms = (
|
|
|
128 {
|
|
|
129 term => 'splice_donor_5th_base_variant',
|
|
|
130 region => [$intron->{$donor_coord} + (4 * $strand_mod), $intron->{$donor_coord} + (4 * $strand_mod)]
|
|
|
131 },
|
|
|
132 {
|
|
|
133 term => 'splice_donor_region_variant',
|
|
|
134 region => [$intron->{$donor_coord} + (2 * $strand_mod), $intron->{$donor_coord} + (5 * $strand_mod)]
|
|
|
135 },
|
|
|
136 {
|
|
|
137 term => 'splice_polypyrimidine_tract_variant',
|
|
|
138 region => [$intron->{$acc_coord} + (-16 * $strand_mod), $intron->{$acc_coord} + (-2 * $strand_mod)],
|
|
|
139 # allele_specific_mod => {
|
|
|
140 # A => '_to_purine',
|
|
|
141 # G => '_to_purine',
|
|
|
142 # }
|
|
|
143 },
|
|
|
144 ) unless $extended_flag;
|
|
|
145
|
|
|
146 @terms = (
|
|
|
147 {
|
|
|
148 term => 'extended_intronic_splice_region_variant_5prime',
|
|
|
149 region => [$intron->{$donor_coord}, $intron->{$donor_coord} + (9 * $strand_mod)]
|
|
|
150 },
|
|
|
151 {
|
|
|
152 term => 'extended_intronic_splice_region_variant_3prime',
|
|
|
153 region => [$intron->{$acc_coord} + (-9 * $strand_mod), $intron->{$acc_coord} ],
|
|
|
154 # allele_specific_mod => {
|
|
|
155 # A => '_to_purine',
|
|
|
156 # G => '_to_purine',
|
|
|
157 # }
|
|
|
158 },
|
|
|
159 ) if $extended_flag;
|
|
|
160
|
|
|
161
|
|
|
162 foreach my $term_hash(@terms) {
|
|
|
163 my $pass = overlap($vf_start, $vf_end, sort {$a <=> $b} @{$term_hash->{region}});
|
|
|
164 if($pass) {
|
|
|
165 my $term = $term_hash->{term};
|
|
|
166 $term = 'extended_intronic_splice_region_variant' if $extended_flag;
|
|
|
167
|
|
|
168 # if(my $allele_specific_mods = $term_hash->{allele_specific_mod}) {
|
|
|
169 # $term .= $allele_specific_mods->{$vf_tr_seq} || '';
|
|
|
170 # }
|
|
|
171
|
|
|
172 $results{$term}++;
|
|
|
173 last;
|
|
|
174 }
|
|
|
175 }
|
|
|
176 }
|
|
|
177
|
|
|
178 return {} unless %results;
|
|
|
179
|
|
|
180 return { SpliceRegion => [sort {$TERM_RANK{$a} <=> $TERM_RANK{$b}} keys %results]};
|
|
|
181 }
|
|
|
182
|
|
|
183 1;
|
|
|
184
|