|
0
|
1 =head1 LICENSE
|
|
|
2
|
|
|
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
|
|
|
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
|
|
|
5
|
|
|
6 Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
7 you may not use this file except in compliance with the License.
|
|
|
8 You may obtain a copy of the License at
|
|
|
9
|
|
|
10 http://www.apache.org/licenses/LICENSE-2.0
|
|
|
11
|
|
|
12 Unless required by applicable law or agreed to in writing, software
|
|
|
13 distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
15 See the License for the specific language governing permissions and
|
|
|
16 limitations under the License.
|
|
|
17
|
|
|
18 =head1 CONTACT
|
|
|
19
|
|
|
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
|
|
|
21
|
|
|
22 =cut
|
|
|
23
|
|
|
24 =head1 NAME
|
|
|
25
|
|
|
26 Condel
|
|
|
27
|
|
|
28 =head1 SYNOPSIS
|
|
|
29
|
|
|
30 mv Condel.pm ~/.vep/Plugins
|
|
|
31 ./vep -i variations.vcf --plugin Condel,/path/to/config/Condel/config,b
|
|
|
32
|
|
|
33 =head1 DESCRIPTION
|
|
|
34
|
|
|
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that calculates
|
|
|
36 the Consensus Deleteriousness (Condel) score (1) for a missense mutation
|
|
|
37 based on the pre-calculated SIFT (2) and PolyPhen-2 (3) scores from the Ensembl
|
|
|
38 API (4). It adds one new entry class to the VEP's Extra column, Condel which is
|
|
|
39 the calculated Condel score. This version of Condel was developed by the Biomedical Genomics Group
|
|
|
40 of the Universitat Pompeu Fabra, at the Barcelona Biomedical Research Park and available at
|
|
|
41 (http://bg.upf.edu/condel) until April 2014. The code in this plugin is based on a script provided by this
|
|
|
42 group and slightly reformatted to fit into the Ensembl API.
|
|
|
43
|
|
|
44 The plugin takes 3 command line arguments, the first is the path to a Condel
|
|
|
45 configuration directory which contains cutoffs and the distribution files etc.,
|
|
|
46 the second is either "s", "p", or "b" to output the Condel score, prediction or
|
|
|
47 both (the default is both), and the third argument is either 1 or 2 to use the
|
|
|
48 original version of Condel (1), or the newer version (2) - 2 is the default and
|
|
|
49 is recommended to avoid false positive predictions from Condel in some
|
|
|
50 circumstances.
|
|
|
51
|
|
|
52 An example Condel configuration file and a set of distribution files can be found
|
|
|
53 in the config/Condel directory in this repository. You should edit the
|
|
|
54 config/Condel/config/condel_SP.conf file and set the 'condel.dir' parameter to
|
|
|
55 the full path to the location of the config/Condel directory on your system.
|
|
|
56
|
|
|
57 References:
|
|
|
58
|
|
|
59 (1) Gonzalez-Perez A, Lopez-Bigas N.
|
|
|
60 Improving the assessment of the outcome of non-synonymous SNVs with a Consensus deleteriousness score (Condel)
|
|
|
61 Am J Hum Genet 88(4):440-449 (2011)
|
|
|
62 doi:10.1016/j.ajhg.2011.03.004
|
|
|
63
|
|
|
64 (2) Kumar P, Henikoff S, Ng PC.
|
|
|
65 Predicting the effects of coding non-synonymous variants on protein function using the SIFT algorithm
|
|
|
66 Nature Protocols 4(8):1073-1081 (2009)
|
|
|
67 doi:10.1038/nprot.2009.86
|
|
|
68
|
|
|
69 (3) Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR.
|
|
|
70 A method and server for predicting damaging missense mutations
|
|
|
71 Nature Methods 7(4):248-249 (2010)
|
|
|
72 doi:10.1038/nmeth0410-248
|
|
|
73
|
|
|
74 (4) Flicek P, et al.
|
|
|
75 Ensembl 2012
|
|
|
76 Nucleic Acids Research (2011)
|
|
|
77 doi: 10.1093/nar/gkr991
|
|
|
78
|
|
|
79 =cut
|
|
|
80
|
|
|
81 package Condel;
|
|
|
82
|
|
|
83 use strict;
|
|
|
84 use warnings;
|
|
|
85
|
|
|
86 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin;
|
|
|
87
|
|
|
88 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
|
|
|
89
|
|
|
90 sub version {
|
|
|
91 return '2.4';
|
|
|
92 }
|
|
|
93
|
|
|
94 sub feature_types {
|
|
|
95 return ['Transcript'];
|
|
|
96 }
|
|
|
97
|
|
|
98 sub get_header_info {
|
|
|
99 return {
|
|
|
100 Condel => "Consensus deleteriousness score for an amino acid substitution based on SIFT and PolyPhen-2",
|
|
|
101 };
|
|
|
102 }
|
|
|
103
|
|
|
104 sub new {
|
|
|
105 my $class = shift;
|
|
|
106
|
|
|
107 my $self = $class->SUPER::new(@_);
|
|
|
108
|
|
|
109 # parse the config file and distribution files
|
|
|
110
|
|
|
111 my $config_dir = $self->params->[0];
|
|
|
112 $self->{output} = $self->params->[1] || 'b';
|
|
|
113 $self->{version} = $self->params->[2] || 2;
|
|
|
114
|
|
|
115 # find config dir
|
|
|
116 unless($config_dir && -d $config_dir) {
|
|
|
117 $config_dir = $INC{'Condel.pm'};
|
|
|
118 $config_dir =~ s/Condel\.pm/config\/Condel\/config/;
|
|
|
119 die "ERROR: Unable to find Condel config path\n" unless -d $config_dir;
|
|
|
120 }
|
|
|
121
|
|
|
122 my $config_file = "$config_dir/condel_SP.conf";
|
|
|
123
|
|
|
124 open(CONF, "<$config_file") || die "Could not open $config_file";
|
|
|
125
|
|
|
126 my @conf = <CONF>;
|
|
|
127
|
|
|
128 my $safe_conf = 0;
|
|
|
129
|
|
|
130 for (my $i = 0; $i < @conf; $i++){
|
|
|
131 if ($conf[$i] =~ /condel\.dir=\'(\S+)\'/){
|
|
|
132 $self->{config}->{'condel.dir'} = $1;
|
|
|
133
|
|
|
134 # user has not edited config, attempt to get correct dir
|
|
|
135 if($self->{config}->{'condel.dir'} eq 'path/to/config/Condel/') {
|
|
|
136 my $dir = $INC{'Condel.pm'};
|
|
|
137 $dir =~ s/Condel\.pm/config\/Condel/;
|
|
|
138 $self->{config}->{'condel.dir'} = $dir;
|
|
|
139 }
|
|
|
140 $safe_conf++ if -d $self->{config}->{'condel.dir'};
|
|
|
141 }
|
|
|
142 elsif ($conf[$i] =~ /(cutoff\.HumVar\.\w+)=\'(\S+)\'/){
|
|
|
143 $self->{config}->{$1} = $2;
|
|
|
144 $safe_conf++;
|
|
|
145 }
|
|
|
146 elsif ($conf[$i] =~ /(max\.HumVar\.\w+)=\'(\S+)\'/){
|
|
|
147 $self->{config}->{$1} = $2;
|
|
|
148 $safe_conf++;
|
|
|
149 }
|
|
|
150 }
|
|
|
151
|
|
|
152 if ($safe_conf < 3){
|
|
|
153 die "Malformed config file!!!\n\n";
|
|
|
154 }
|
|
|
155
|
|
|
156 open(SIFT, $self->{config}->{'condel.dir'}."/methdist/sift.data");
|
|
|
157 my @sift = <SIFT>;
|
|
|
158 close SIFT;
|
|
|
159
|
|
|
160 for (my $i = 0; $i < @sift; $i++){
|
|
|
161 if ($sift[$i] =~ /(\S+)\s+(\S+)\s+(\S+)/){
|
|
|
162 $self->{sift}->{tp}->{$1} = $2;
|
|
|
163 $self->{sift}->{tn}->{$1} = $3;
|
|
|
164 }
|
|
|
165 }
|
|
|
166
|
|
|
167 open(POLYPHEN, $self->{config}->{'condel.dir'}."/methdist/polyphen.data");
|
|
|
168 my @polyphen = <POLYPHEN>;
|
|
|
169 close POLYPHEN;
|
|
|
170
|
|
|
171 for (my $i = 0; $i < @polyphen; $i++){
|
|
|
172 if ($polyphen[$i] =~ /(\S+)\s+(\S+)\s+(\S+)/){
|
|
|
173 $self->{polyphen}->{tp}->{$1} = $2;
|
|
|
174 $self->{polyphen}->{tn}->{$1} = $3;
|
|
|
175 }
|
|
|
176 }
|
|
|
177
|
|
|
178 return $self;
|
|
|
179 }
|
|
|
180
|
|
|
181 sub run {
|
|
|
182 my ($self, $tva) = @_;
|
|
|
183
|
|
|
184 my $pph_score = $tva->polyphen_score;
|
|
|
185 my $pph_pred = $tva->polyphen_prediction;
|
|
|
186 my $sift_score = $tva->sift_score;
|
|
|
187
|
|
|
188 my $results = {};
|
|
|
189
|
|
|
190 if (defined $pph_score && defined $sift_score && $pph_pred ne 'unknown') {
|
|
|
191
|
|
|
192 my ($condel_pred, $condel_score) = $self->compute_condel($sift_score, $pph_score);
|
|
|
193
|
|
|
194 $condel_score = sprintf "%.3f", $condel_score;
|
|
|
195
|
|
|
196 my $output = "$condel_pred($condel_score)";
|
|
|
197
|
|
|
198 $output = $condel_pred if ($self->{output} =~ /^p/);
|
|
|
199 $output = $condel_score if ($self->{output} =~ /^s/);
|
|
|
200
|
|
|
201 $results = {
|
|
|
202 Condel => $output,
|
|
|
203 };
|
|
|
204 }
|
|
|
205
|
|
|
206 return $results;
|
|
|
207 }
|
|
|
208
|
|
|
209 sub compute_condel {
|
|
|
210
|
|
|
211 my ($self, $sift_score, $polyphen_score) = @_;
|
|
|
212
|
|
|
213 my $USE_V2 = $self->{version} == 2;
|
|
|
214
|
|
|
215 my $class;
|
|
|
216
|
|
|
217 my $base = 0;
|
|
|
218 my $int_score = 0;
|
|
|
219
|
|
|
220 $sift_score = sprintf("%.3f", $sift_score);
|
|
|
221 $polyphen_score = sprintf("%.3f", $polyphen_score);
|
|
|
222
|
|
|
223 if ($sift_score <= $self->{config}->{'cutoff.HumVar.sift'}){
|
|
|
224 $int_score += sprintf("%.3f", (1 - $sift_score/$self->{config}->{'max.HumVar.sift'})*(1-$self->{sift}->{tn}->{$sift_score}));
|
|
|
225 $base += $USE_V2 ? 1 : 1-$self->{sift}->{tn}->{$sift_score};
|
|
|
226 }
|
|
|
227 else {
|
|
|
228 $int_score += sprintf("%.3f", (1 - $sift_score/$self->{config}->{'max.HumVar.sift'})*(1-$self->{sift}->{tp}->{$sift_score}));
|
|
|
229 $base += $USE_V2 ? 1 : 1-$self->{sift}->{tp}->{$sift_score};
|
|
|
230 }
|
|
|
231
|
|
|
232 if ($polyphen_score >= $self->{config}->{'cutoff.HumVar.polyphen'}){
|
|
|
233 $int_score += sprintf("%.3f", $polyphen_score/$self->{config}->{'max.HumVar.polyphen'}*(1-$self->{polyphen}->{tn}->{$polyphen_score}));
|
|
|
234 $base += $USE_V2 ? 1 : 1-$self->{polyphen}->{tn}->{$polyphen_score};
|
|
|
235 }
|
|
|
236 else {
|
|
|
237 $int_score += sprintf("%.3f", $polyphen_score/$self->{config}->{'max.HumVar.polyphen'}*(1-$self->{polyphen}->{tp}->{$polyphen_score}));
|
|
|
238 $base += $USE_V2 ? 1 : 1-$self->{polyphen}->{tp}->{$polyphen_score};
|
|
|
239 }
|
|
|
240
|
|
|
241 if ($base == 0){
|
|
|
242 $int_score = -1;
|
|
|
243 $class = 'not_computable_was';
|
|
|
244 }
|
|
|
245 else {
|
|
|
246 $int_score = sprintf("%.3f", $int_score/$base);
|
|
|
247 }
|
|
|
248
|
|
|
249 if ($int_score >= 0.469){
|
|
|
250 $class = 'deleterious';
|
|
|
251 }
|
|
|
252 elsif ($int_score >= 0 && $int_score < 0.469) {
|
|
|
253 $class = 'neutral';
|
|
|
254 }
|
|
|
255
|
|
|
256 # if the user wants an array, return the class and score, otherwise just return the class
|
|
|
257
|
|
|
258 return ($class, $int_score);
|
|
|
259 }
|
|
|
260
|
|
|
261 1;
|