Mercurial > repos > dvanzessen > vep_emc
comparison dir_plugins/Carol.pm @ 0:e545d0a25ffe draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:17:17 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e545d0a25ffe |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute | |
| 4 Copyright [2016-2018] EMBL-European Bioinformatics Institute | |
| 5 | |
| 6 Licensed under the Apache License, Version 2.0 (the "License"); | |
| 7 you may not use this file except in compliance with the License. | |
| 8 You may obtain a copy of the License at | |
| 9 | |
| 10 http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 | |
| 12 Unless required by applicable law or agreed to in writing, software | |
| 13 distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 See the License for the specific language governing permissions and | |
| 16 limitations under the License. | |
| 17 | |
| 18 =head1 CONTACT | |
| 19 | |
| 20 Ensembl <http://www.ensembl.org/info/about/contact/index.html> | |
| 21 | |
| 22 =cut | |
| 23 | |
| 24 =head1 NAME | |
| 25 | |
| 26 Carol | |
| 27 | |
| 28 =head1 SYNOPSIS | |
| 29 | |
| 30 mv Carol.pm ~/.vep/Plugins | |
| 31 ./vep -i variations.vcf --plugin Carol | |
| 32 | |
| 33 =head1 DESCRIPTION | |
| 34 | |
| 35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that calculates | |
| 36 the Combined Annotation scoRing toOL (CAROL) score (1) for a missense mutation | |
| 37 based on the pre-calculated SIFT (2) and PolyPhen-2 (3) scores from the Ensembl | |
| 38 API (4). It adds one new entry class to the VEP's Extra column, CAROL which is | |
| 39 the calculated CAROL score. Note that this module is a perl reimplementation of | |
| 40 the original R script, available at: | |
| 41 | |
| 42 http://www.sanger.ac.uk/resources/software/carol/ | |
| 43 | |
| 44 I believe that both versions implement the same algorithm, but if there are any | |
| 45 discrepancies the R version should be treated as the reference implementation. | |
| 46 Bug reports are welcome. | |
| 47 | |
| 48 References: | |
| 49 | |
| 50 (1) Lopes MC, Joyce C, Ritchie GRS, John SL, Cunningham F, Asimit J, Zeggini E. | |
| 51 A combined functional annotation score for non-synonymous variants | |
| 52 Human Heredity (in press) | |
| 53 | |
| 54 (2) Kumar P, Henikoff S, Ng PC. | |
| 55 Predicting the effects of coding non-synonymous variants on protein function using the SIFT algorithm | |
| 56 Nature Protocols 4(8):1073-1081 (2009) | |
| 57 doi:10.1038/nprot.2009.86 | |
| 58 | |
| 59 (3) Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR. | |
| 60 A method and server for predicting damaging missense mutations | |
| 61 Nature Methods 7(4):248-249 (2010) | |
| 62 doi:10.1038/nmeth0410-248 | |
| 63 | |
| 64 (4) Flicek P, et al. | |
| 65 Ensembl 2012 | |
| 66 Nucleic Acids Research (2011) | |
| 67 doi: 10.1093/nar/gkr991 | |
| 68 | |
| 69 =cut | |
| 70 | |
| 71 package Carol; | |
| 72 | |
| 73 use strict; | |
| 74 use warnings; | |
| 75 | |
| 76 use Math::CDF qw(pnorm qnorm); | |
| 77 | |
| 78 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); | |
| 79 | |
| 80 my $CAROL_CUTOFF = 0.98; | |
| 81 | |
| 82 sub version { | |
| 83 return '2.3'; | |
| 84 } | |
| 85 | |
| 86 sub feature_types { | |
| 87 return ['Transcript']; | |
| 88 } | |
| 89 | |
| 90 sub get_header_info { | |
| 91 return { | |
| 92 CAROL => "Combined Annotation scoRing toOL prediction", | |
| 93 }; | |
| 94 } | |
| 95 | |
| 96 sub run { | |
| 97 my ($self, $tva) = @_; | |
| 98 | |
| 99 my $pph_pred = $tva->polyphen_prediction; | |
| 100 my $pph_score = $pph_pred ? ($pph_pred eq 'unknown' ? undef: $tva->polyphen_score) : undef; | |
| 101 my $sift_score = $tva->sift_score; | |
| 102 | |
| 103 my ($carol_pred, $carol_score) = compute_carol($pph_score, $sift_score); | |
| 104 | |
| 105 my $results = {}; | |
| 106 | |
| 107 if (defined $carol_pred) { | |
| 108 | |
| 109 $carol_score = sprintf "%.3f", $carol_score; | |
| 110 | |
| 111 my $result = "$carol_pred($carol_score)"; | |
| 112 | |
| 113 if (@{ $self->params } > 0) { | |
| 114 $result = $carol_pred if ($self->params->[0] =~ /^p/i); | |
| 115 $result = $carol_score if ($self->params->[0] =~ /^s/i); | |
| 116 } | |
| 117 | |
| 118 $results = { | |
| 119 CAROL => $result, | |
| 120 }; | |
| 121 } | |
| 122 | |
| 123 return $results; | |
| 124 } | |
| 125 | |
| 126 sub compute_carol { | |
| 127 | |
| 128 my ($pph_score, $sift_score) = @_; | |
| 129 | |
| 130 my $carol_score; | |
| 131 | |
| 132 if (defined $pph_score) { | |
| 133 $pph_score = 0.999 if $pph_score == 1; | |
| 134 $pph_score = 0.0001 if $pph_score == 0; | |
| 135 } | |
| 136 | |
| 137 if (defined $sift_score) { | |
| 138 $sift_score = 1 - $sift_score; | |
| 139 $sift_score = 0.999 if $sift_score == 1; | |
| 140 $sift_score = 0.0001 if $sift_score == 0; | |
| 141 } | |
| 142 | |
| 143 if (defined $pph_score && defined $sift_score) { | |
| 144 | |
| 145 my $pph_weight = log(1/(1-$pph_score)); | |
| 146 my $sift_weight = log(1/(1-$sift_score)); | |
| 147 | |
| 148 # we take -qnorm, because the R script uses qnorm(..., lower.tail = FALSE) | |
| 149 | |
| 150 my $pph_z = -qnorm($pph_score); | |
| 151 my $sift_z = -qnorm($sift_score); | |
| 152 | |
| 153 my $numerator = ($pph_weight * $pph_z) + ($sift_weight * $sift_z); | |
| 154 my $denominator = sqrt( ($pph_weight ** 2) + ($sift_weight ** 2) ); | |
| 155 | |
| 156 # likewise we take 1 - pnorm | |
| 157 | |
| 158 $carol_score = 1 - pnorm($numerator / $denominator); | |
| 159 } | |
| 160 elsif (defined $pph_score) { | |
| 161 $carol_score = $pph_score; | |
| 162 } | |
| 163 else { | |
| 164 $carol_score = $sift_score; | |
| 165 } | |
| 166 | |
| 167 if (defined $carol_score) { | |
| 168 my $carol_pred = $carol_score < $CAROL_CUTOFF ? 'Neutral' : 'Deleterious'; | |
| 169 return ($carol_pred, $carol_score); | |
| 170 } | |
| 171 else { | |
| 172 return undef; | |
| 173 } | |
| 174 } | |
| 175 | |
| 176 1; | |
| 177 |
