comparison dir_plugins/Condel.pm @ 3:49397129aec0 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:20:39 -0400
parents e545d0a25ffe
children
comparison
equal deleted inserted replaced
2:17c98d091710 3:49397129aec0
1 =head1 LICENSE
2
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 =head1 CONTACT
19
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
21
22 =cut
23
24 =head1 NAME
25
26 Condel
27
28 =head1 SYNOPSIS
29
30 mv Condel.pm ~/.vep/Plugins
31 ./vep -i variations.vcf --plugin Condel,/path/to/config/Condel/config,b
32
33 =head1 DESCRIPTION
34
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that calculates
36 the Consensus Deleteriousness (Condel) score (1) for a missense mutation
37 based on the pre-calculated SIFT (2) and PolyPhen-2 (3) scores from the Ensembl
38 API (4). It adds one new entry class to the VEP's Extra column, Condel which is
39 the calculated Condel score. This version of Condel was developed by the Biomedical Genomics Group
40 of the Universitat Pompeu Fabra, at the Barcelona Biomedical Research Park and available at
41 (http://bg.upf.edu/condel) until April 2014. The code in this plugin is based on a script provided by this
42 group and slightly reformatted to fit into the Ensembl API.
43
44 The plugin takes 3 command line arguments, the first is the path to a Condel
45 configuration directory which contains cutoffs and the distribution files etc.,
46 the second is either "s", "p", or "b" to output the Condel score, prediction or
47 both (the default is both), and the third argument is either 1 or 2 to use the
48 original version of Condel (1), or the newer version (2) - 2 is the default and
49 is recommended to avoid false positive predictions from Condel in some
50 circumstances.
51
52 An example Condel configuration file and a set of distribution files can be found
53 in the config/Condel directory in this repository. You should edit the
54 config/Condel/config/condel_SP.conf file and set the 'condel.dir' parameter to
55 the full path to the location of the config/Condel directory on your system.
56
57 References:
58
59 (1) Gonzalez-Perez A, Lopez-Bigas N.
60 Improving the assessment of the outcome of non-synonymous SNVs with a Consensus deleteriousness score (Condel)
61 Am J Hum Genet 88(4):440-449 (2011)
62 doi:10.1016/j.ajhg.2011.03.004
63
64 (2) Kumar P, Henikoff S, Ng PC.
65 Predicting the effects of coding non-synonymous variants on protein function using the SIFT algorithm
66 Nature Protocols 4(8):1073-1081 (2009)
67 doi:10.1038/nprot.2009.86
68
69 (3) Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR.
70 A method and server for predicting damaging missense mutations
71 Nature Methods 7(4):248-249 (2010)
72 doi:10.1038/nmeth0410-248
73
74 (4) Flicek P, et al.
75 Ensembl 2012
76 Nucleic Acids Research (2011)
77 doi: 10.1093/nar/gkr991
78
79 =cut
80
81 package Condel;
82
83 use strict;
84 use warnings;
85
86 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin;
87
88 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
89
90 sub version {
91 return '2.4';
92 }
93
94 sub feature_types {
95 return ['Transcript'];
96 }
97
98 sub get_header_info {
99 return {
100 Condel => "Consensus deleteriousness score for an amino acid substitution based on SIFT and PolyPhen-2",
101 };
102 }
103
104 sub new {
105 my $class = shift;
106
107 my $self = $class->SUPER::new(@_);
108
109 # parse the config file and distribution files
110
111 my $config_dir = $self->params->[0];
112 $self->{output} = $self->params->[1] || 'b';
113 $self->{version} = $self->params->[2] || 2;
114
115 # find config dir
116 unless($config_dir && -d $config_dir) {
117 $config_dir = $INC{'Condel.pm'};
118 $config_dir =~ s/Condel\.pm/config\/Condel\/config/;
119 die "ERROR: Unable to find Condel config path\n" unless -d $config_dir;
120 }
121
122 my $config_file = "$config_dir/condel_SP.conf";
123
124 open(CONF, "<$config_file") || die "Could not open $config_file";
125
126 my @conf = <CONF>;
127
128 my $safe_conf = 0;
129
130 for (my $i = 0; $i < @conf; $i++){
131 if ($conf[$i] =~ /condel\.dir=\'(\S+)\'/){
132 $self->{config}->{'condel.dir'} = $1;
133
134 # user has not edited config, attempt to get correct dir
135 if($self->{config}->{'condel.dir'} eq 'path/to/config/Condel/') {
136 my $dir = $INC{'Condel.pm'};
137 $dir =~ s/Condel\.pm/config\/Condel/;
138 $self->{config}->{'condel.dir'} = $dir;
139 }
140 $safe_conf++ if -d $self->{config}->{'condel.dir'};
141 }
142 elsif ($conf[$i] =~ /(cutoff\.HumVar\.\w+)=\'(\S+)\'/){
143 $self->{config}->{$1} = $2;
144 $safe_conf++;
145 }
146 elsif ($conf[$i] =~ /(max\.HumVar\.\w+)=\'(\S+)\'/){
147 $self->{config}->{$1} = $2;
148 $safe_conf++;
149 }
150 }
151
152 if ($safe_conf < 3){
153 die "Malformed config file!!!\n\n";
154 }
155
156 open(SIFT, $self->{config}->{'condel.dir'}."/methdist/sift.data");
157 my @sift = <SIFT>;
158 close SIFT;
159
160 for (my $i = 0; $i < @sift; $i++){
161 if ($sift[$i] =~ /(\S+)\s+(\S+)\s+(\S+)/){
162 $self->{sift}->{tp}->{$1} = $2;
163 $self->{sift}->{tn}->{$1} = $3;
164 }
165 }
166
167 open(POLYPHEN, $self->{config}->{'condel.dir'}."/methdist/polyphen.data");
168 my @polyphen = <POLYPHEN>;
169 close POLYPHEN;
170
171 for (my $i = 0; $i < @polyphen; $i++){
172 if ($polyphen[$i] =~ /(\S+)\s+(\S+)\s+(\S+)/){
173 $self->{polyphen}->{tp}->{$1} = $2;
174 $self->{polyphen}->{tn}->{$1} = $3;
175 }
176 }
177
178 return $self;
179 }
180
181 sub run {
182 my ($self, $tva) = @_;
183
184 my $pph_score = $tva->polyphen_score;
185 my $pph_pred = $tva->polyphen_prediction;
186 my $sift_score = $tva->sift_score;
187
188 my $results = {};
189
190 if (defined $pph_score && defined $sift_score && $pph_pred ne 'unknown') {
191
192 my ($condel_pred, $condel_score) = $self->compute_condel($sift_score, $pph_score);
193
194 $condel_score = sprintf "%.3f", $condel_score;
195
196 my $output = "$condel_pred($condel_score)";
197
198 $output = $condel_pred if ($self->{output} =~ /^p/);
199 $output = $condel_score if ($self->{output} =~ /^s/);
200
201 $results = {
202 Condel => $output,
203 };
204 }
205
206 return $results;
207 }
208
209 sub compute_condel {
210
211 my ($self, $sift_score, $polyphen_score) = @_;
212
213 my $USE_V2 = $self->{version} == 2;
214
215 my $class;
216
217 my $base = 0;
218 my $int_score = 0;
219
220 $sift_score = sprintf("%.3f", $sift_score);
221 $polyphen_score = sprintf("%.3f", $polyphen_score);
222
223 if ($sift_score <= $self->{config}->{'cutoff.HumVar.sift'}){
224 $int_score += sprintf("%.3f", (1 - $sift_score/$self->{config}->{'max.HumVar.sift'})*(1-$self->{sift}->{tn}->{$sift_score}));
225 $base += $USE_V2 ? 1 : 1-$self->{sift}->{tn}->{$sift_score};
226 }
227 else {
228 $int_score += sprintf("%.3f", (1 - $sift_score/$self->{config}->{'max.HumVar.sift'})*(1-$self->{sift}->{tp}->{$sift_score}));
229 $base += $USE_V2 ? 1 : 1-$self->{sift}->{tp}->{$sift_score};
230 }
231
232 if ($polyphen_score >= $self->{config}->{'cutoff.HumVar.polyphen'}){
233 $int_score += sprintf("%.3f", $polyphen_score/$self->{config}->{'max.HumVar.polyphen'}*(1-$self->{polyphen}->{tn}->{$polyphen_score}));
234 $base += $USE_V2 ? 1 : 1-$self->{polyphen}->{tn}->{$polyphen_score};
235 }
236 else {
237 $int_score += sprintf("%.3f", $polyphen_score/$self->{config}->{'max.HumVar.polyphen'}*(1-$self->{polyphen}->{tp}->{$polyphen_score}));
238 $base += $USE_V2 ? 1 : 1-$self->{polyphen}->{tp}->{$polyphen_score};
239 }
240
241 if ($base == 0){
242 $int_score = -1;
243 $class = 'not_computable_was';
244 }
245 else {
246 $int_score = sprintf("%.3f", $int_score/$base);
247 }
248
249 if ($int_score >= 0.469){
250 $class = 'deleterious';
251 }
252 elsif ($int_score >= 0 && $int_score < 0.469) {
253 $class = 'neutral';
254 }
255
256 # if the user wants an array, return the class and score, otherwise just return the class
257
258 return ($class, $int_score);
259 }
260
261 1;