comparison dir_plugins/SpliceRegion.pm @ 0:e545d0a25ffe draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:17:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e545d0a25ffe
1 =head1 LICENSE
2
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 =head1 CONTACT
19
20 Ensembl <dev@ensembl.org>
21
22 =cut
23
24 =head1 NAME
25
26 SpliceRegion
27
28 =head1 SYNOPSIS
29
30 mv SpliceRegion.pm ~/.vep/Plugins
31 ./vep -i variations.vcf --plugin SpliceRegion
32
33 To only show the additional consequence extended_intronic_splice_region_variant, use:
34 ./vep -i variations.vcf --plugin SpliceRegion,Extended
35
36 =head1 DESCRIPTION
37
38 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
39 provides more granular predictions of splicing effects.
40
41 Three additional terms may be added:
42
43 # splice_donor_5th_base_variant : variant falls in the 5th base after the splice donor junction (5' end of intron)
44
45 v
46 ...EEEEEIIIIIIIIII...
47
48 (E = exon, I = intron, v = variant location)
49
50 # splice_donor_region_variant : variant falls in region between 3rd and 6th base after splice junction (5' end of intron)
51
52 vv vvv
53 ...EEEEEIIIIIIIIII...
54
55 # splice_polypyrimidine_tract_variant : variant falls in polypyrimidine tract at 3' end of intron, between 17 and 3 bases from the end
56
57 vvvvvvvvvvvvvvv
58 ...IIIIIIIIIIIIIIIIIIIIEEEEE...
59
60
61 =cut
62
63 package SpliceRegion;
64
65 use strict;
66 use warnings;
67
68 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
69 use Bio::EnsEMBL::Variation::Utils::Constants qw(%OVERLAP_CONSEQUENCES);
70
71 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
72
73 my %TERM_RANK = (
74 splice_donor_5th_base_variant => 1,
75 splice_donor_region_variant => 2,
76 splice_polypyrimidine_tract_variant => 3,
77 extended_intronic_splice_region_variant_5prime => 4,
78 extended_intronic_splice_region_variant_3prime => 5,
79 );
80
81 sub feature_types {
82 return ['Transcript'];
83 }
84
85 sub get_header_info {
86 return {
87 SpliceRegion => "SpliceRegion predictions",
88 };
89 }
90
91 sub run {
92 my ($self, $tva) = @_;
93
94 my $vf = $tva->variation_feature;
95 my ($vf_start, $vf_end) = ($vf->{start}, $vf->{end});
96
97 my $is_insertion = 0;
98 if($vf_start > $vf_end) {
99 ($vf_start, $vf_end) = ($vf_end, $vf_start);
100 $is_insertion = 1;
101 }
102
103 my $tv = $tva->transcript_variation;
104 my $tr = $tv->transcript;
105 my $vf_tr_seq = $tva->feature_seq;
106
107 # define some variables depending on transcript strand
108 my ($strand_mod, $donor_coord, $acc_coord);
109 if($tr->strand > 0) {
110 $strand_mod = 1;
111 $donor_coord = 'start';
112 $acc_coord = 'end';
113 }
114 else {
115 $strand_mod = -1;
116 $donor_coord = 'end';
117 $acc_coord = 'start';
118 }
119
120 my %results;
121
122 my @terms;
123 my $extended_flag = lc($self->params->[0] || "") eq 'extended';
124 for my $intron(@{$tv->_overlapped_introns($vf_start, $vf_end)}) {
125
126 # define terms to check for and their regions
127 @terms = (
128 {
129 term => 'splice_donor_5th_base_variant',
130 region => [$intron->{$donor_coord} + (4 * $strand_mod), $intron->{$donor_coord} + (4 * $strand_mod)]
131 },
132 {
133 term => 'splice_donor_region_variant',
134 region => [$intron->{$donor_coord} + (2 * $strand_mod), $intron->{$donor_coord} + (5 * $strand_mod)]
135 },
136 {
137 term => 'splice_polypyrimidine_tract_variant',
138 region => [$intron->{$acc_coord} + (-16 * $strand_mod), $intron->{$acc_coord} + (-2 * $strand_mod)],
139 # allele_specific_mod => {
140 # A => '_to_purine',
141 # G => '_to_purine',
142 # }
143 },
144 ) unless $extended_flag;
145
146 @terms = (
147 {
148 term => 'extended_intronic_splice_region_variant_5prime',
149 region => [$intron->{$donor_coord}, $intron->{$donor_coord} + (9 * $strand_mod)]
150 },
151 {
152 term => 'extended_intronic_splice_region_variant_3prime',
153 region => [$intron->{$acc_coord} + (-9 * $strand_mod), $intron->{$acc_coord} ],
154 # allele_specific_mod => {
155 # A => '_to_purine',
156 # G => '_to_purine',
157 # }
158 },
159 ) if $extended_flag;
160
161
162 foreach my $term_hash(@terms) {
163 my $pass = overlap($vf_start, $vf_end, sort {$a <=> $b} @{$term_hash->{region}});
164 if($pass) {
165 my $term = $term_hash->{term};
166 $term = 'extended_intronic_splice_region_variant' if $extended_flag;
167
168 # if(my $allele_specific_mods = $term_hash->{allele_specific_mod}) {
169 # $term .= $allele_specific_mods->{$vf_tr_seq} || '';
170 # }
171
172 $results{$term}++;
173 last;
174 }
175 }
176 }
177
178 return {} unless %results;
179
180 return { SpliceRegion => [sort {$TERM_RANK{$a} <=> $TERM_RANK{$b}} keys %results]};
181 }
182
183 1;
184