comparison dir_plugins/Carol.pm @ 3:49397129aec0 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:20:39 -0400
parents e545d0a25ffe
children
comparison
equal deleted inserted replaced
2:17c98d091710 3:49397129aec0
1 =head1 LICENSE
2
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 =head1 CONTACT
19
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
21
22 =cut
23
24 =head1 NAME
25
26 Carol
27
28 =head1 SYNOPSIS
29
30 mv Carol.pm ~/.vep/Plugins
31 ./vep -i variations.vcf --plugin Carol
32
33 =head1 DESCRIPTION
34
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that calculates
36 the Combined Annotation scoRing toOL (CAROL) score (1) for a missense mutation
37 based on the pre-calculated SIFT (2) and PolyPhen-2 (3) scores from the Ensembl
38 API (4). It adds one new entry class to the VEP's Extra column, CAROL which is
39 the calculated CAROL score. Note that this module is a perl reimplementation of
40 the original R script, available at:
41
42 http://www.sanger.ac.uk/resources/software/carol/
43
44 I believe that both versions implement the same algorithm, but if there are any
45 discrepancies the R version should be treated as the reference implementation.
46 Bug reports are welcome.
47
48 References:
49
50 (1) Lopes MC, Joyce C, Ritchie GRS, John SL, Cunningham F, Asimit J, Zeggini E.
51 A combined functional annotation score for non-synonymous variants
52 Human Heredity (in press)
53
54 (2) Kumar P, Henikoff S, Ng PC.
55 Predicting the effects of coding non-synonymous variants on protein function using the SIFT algorithm
56 Nature Protocols 4(8):1073-1081 (2009)
57 doi:10.1038/nprot.2009.86
58
59 (3) Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR.
60 A method and server for predicting damaging missense mutations
61 Nature Methods 7(4):248-249 (2010)
62 doi:10.1038/nmeth0410-248
63
64 (4) Flicek P, et al.
65 Ensembl 2012
66 Nucleic Acids Research (2011)
67 doi: 10.1093/nar/gkr991
68
69 =cut
70
71 package Carol;
72
73 use strict;
74 use warnings;
75
76 use Math::CDF qw(pnorm qnorm);
77
78 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
79
80 my $CAROL_CUTOFF = 0.98;
81
82 sub version {
83 return '2.3';
84 }
85
86 sub feature_types {
87 return ['Transcript'];
88 }
89
90 sub get_header_info {
91 return {
92 CAROL => "Combined Annotation scoRing toOL prediction",
93 };
94 }
95
96 sub run {
97 my ($self, $tva) = @_;
98
99 my $pph_pred = $tva->polyphen_prediction;
100 my $pph_score = $pph_pred ? ($pph_pred eq 'unknown' ? undef: $tva->polyphen_score) : undef;
101 my $sift_score = $tva->sift_score;
102
103 my ($carol_pred, $carol_score) = compute_carol($pph_score, $sift_score);
104
105 my $results = {};
106
107 if (defined $carol_pred) {
108
109 $carol_score = sprintf "%.3f", $carol_score;
110
111 my $result = "$carol_pred($carol_score)";
112
113 if (@{ $self->params } > 0) {
114 $result = $carol_pred if ($self->params->[0] =~ /^p/i);
115 $result = $carol_score if ($self->params->[0] =~ /^s/i);
116 }
117
118 $results = {
119 CAROL => $result,
120 };
121 }
122
123 return $results;
124 }
125
126 sub compute_carol {
127
128 my ($pph_score, $sift_score) = @_;
129
130 my $carol_score;
131
132 if (defined $pph_score) {
133 $pph_score = 0.999 if $pph_score == 1;
134 $pph_score = 0.0001 if $pph_score == 0;
135 }
136
137 if (defined $sift_score) {
138 $sift_score = 1 - $sift_score;
139 $sift_score = 0.999 if $sift_score == 1;
140 $sift_score = 0.0001 if $sift_score == 0;
141 }
142
143 if (defined $pph_score && defined $sift_score) {
144
145 my $pph_weight = log(1/(1-$pph_score));
146 my $sift_weight = log(1/(1-$sift_score));
147
148 # we take -qnorm, because the R script uses qnorm(..., lower.tail = FALSE)
149
150 my $pph_z = -qnorm($pph_score);
151 my $sift_z = -qnorm($sift_score);
152
153 my $numerator = ($pph_weight * $pph_z) + ($sift_weight * $sift_z);
154 my $denominator = sqrt( ($pph_weight ** 2) + ($sift_weight ** 2) );
155
156 # likewise we take 1 - pnorm
157
158 $carol_score = 1 - pnorm($numerator / $denominator);
159 }
160 elsif (defined $pph_score) {
161 $carol_score = $pph_score;
162 }
163 else {
164 $carol_score = $sift_score;
165 }
166
167 if (defined $carol_score) {
168 my $carol_pred = $carol_score < $CAROL_CUTOFF ? 'Neutral' : 'Deleterious';
169 return ($carol_pred, $carol_score);
170 }
171 else {
172 return undef;
173 }
174 }
175
176 1;
177