annotate dir_plugins/SpliceRegion.pm @ 10:f594c6bed58f draft default tip

Uploaded
author dvanzessen
date Tue, 21 Apr 2020 11:40:19 +0000
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 LICENSE
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 Licensed under the Apache License, Version 2.0 (the "License");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7 you may not use this file except in compliance with the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 You may obtain a copy of the License at
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 http://www.apache.org/licenses/LICENSE-2.0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 Unless required by applicable law or agreed to in writing, software
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13 distributed under the License is distributed on an "AS IS" BASIS,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 See the License for the specific language governing permissions and
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 limitations under the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20 Ensembl <dev@ensembl.org>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26 SpliceRegion
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30 mv SpliceRegion.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31 ./vep -i variations.vcf --plugin SpliceRegion
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33 To only show the additional consequence extended_intronic_splice_region_variant, use:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34 ./vep -i variations.vcf --plugin SpliceRegion,Extended
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39 provides more granular predictions of splicing effects.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41 Three additional terms may be added:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43 # splice_donor_5th_base_variant : variant falls in the 5th base after the splice donor junction (5' end of intron)
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45 v
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46 ...EEEEEIIIIIIIIII...
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48 (E = exon, I = intron, v = variant location)
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50 # splice_donor_region_variant : variant falls in region between 3rd and 6th base after splice junction (5' end of intron)
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52 vv vvv
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53 ...EEEEEIIIIIIIIII...
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55 # splice_polypyrimidine_tract_variant : variant falls in polypyrimidine tract at 3' end of intron, between 17 and 3 bases from the end
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57 vvvvvvvvvvvvvvv
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58 ...IIIIIIIIIIIIIIIIIIIIEEEEE...
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63 package SpliceRegion;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69 use Bio::EnsEMBL::Variation::Utils::Constants qw(%OVERLAP_CONSEQUENCES);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73 my %TERM_RANK = (
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74 splice_donor_5th_base_variant => 1,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75 splice_donor_region_variant => 2,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76 splice_polypyrimidine_tract_variant => 3,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77 extended_intronic_splice_region_variant_5prime => 4,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78 extended_intronic_splice_region_variant_3prime => 5,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79 );
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81 sub feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82 return ['Transcript'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85 sub get_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86 return {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87 SpliceRegion => "SpliceRegion predictions",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94 my $vf = $tva->variation_feature;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95 my ($vf_start, $vf_end) = ($vf->{start}, $vf->{end});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97 my $is_insertion = 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 if($vf_start > $vf_end) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 ($vf_start, $vf_end) = ($vf_end, $vf_start);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100 $is_insertion = 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103 my $tv = $tva->transcript_variation;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 my $tr = $tv->transcript;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105 my $vf_tr_seq = $tva->feature_seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107 # define some variables depending on transcript strand
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
108 my ($strand_mod, $donor_coord, $acc_coord);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
109 if($tr->strand > 0) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
110 $strand_mod = 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
111 $donor_coord = 'start';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
112 $acc_coord = 'end';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
113 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
114 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
115 $strand_mod = -1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
116 $donor_coord = 'end';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
117 $acc_coord = 'start';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
118 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
119
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
120 my %results;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
121
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
122 my @terms;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
123 my $extended_flag = lc($self->params->[0] || "") eq 'extended';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
124 for my $intron(@{$tv->_overlapped_introns($vf_start, $vf_end)}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
125
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
126 # define terms to check for and their regions
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
127 @terms = (
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
128 {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
129 term => 'splice_donor_5th_base_variant',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
130 region => [$intron->{$donor_coord} + (4 * $strand_mod), $intron->{$donor_coord} + (4 * $strand_mod)]
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
131 },
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
132 {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
133 term => 'splice_donor_region_variant',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
134 region => [$intron->{$donor_coord} + (2 * $strand_mod), $intron->{$donor_coord} + (5 * $strand_mod)]
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
135 },
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
136 {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
137 term => 'splice_polypyrimidine_tract_variant',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
138 region => [$intron->{$acc_coord} + (-16 * $strand_mod), $intron->{$acc_coord} + (-2 * $strand_mod)],
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
139 # allele_specific_mod => {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
140 # A => '_to_purine',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
141 # G => '_to_purine',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
142 # }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
143 },
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
144 ) unless $extended_flag;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
145
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
146 @terms = (
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
147 {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
148 term => 'extended_intronic_splice_region_variant_5prime',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
149 region => [$intron->{$donor_coord}, $intron->{$donor_coord} + (9 * $strand_mod)]
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
150 },
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
151 {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
152 term => 'extended_intronic_splice_region_variant_3prime',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
153 region => [$intron->{$acc_coord} + (-9 * $strand_mod), $intron->{$acc_coord} ],
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
154 # allele_specific_mod => {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
155 # A => '_to_purine',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
156 # G => '_to_purine',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
157 # }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
158 },
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
159 ) if $extended_flag;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
160
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
161
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
162 foreach my $term_hash(@terms) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
163 my $pass = overlap($vf_start, $vf_end, sort {$a <=> $b} @{$term_hash->{region}});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
164 if($pass) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
165 my $term = $term_hash->{term};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
166 $term = 'extended_intronic_splice_region_variant' if $extended_flag;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
167
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
168 # if(my $allele_specific_mods = $term_hash->{allele_specific_mod}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
169 # $term .= $allele_specific_mods->{$vf_tr_seq} || '';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
170 # }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
171
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
172 $results{$term}++;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
173 last;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
174 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
175 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
176 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
177
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
178 return {} unless %results;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
179
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
180 return { SpliceRegion => [sort {$TERM_RANK{$a} <=> $TERM_RANK{$b}} keys %results]};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
181 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
182
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
183 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
184