Mercurial > repos > dvanzessen > vep_emc
view dir_plugins/Carol.pm @ 3:49397129aec0 draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:20:39 -0400 |
| parents | e545d0a25ffe |
| children |
line wrap: on
line source
=head1 LICENSE Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute Copyright [2016-2018] EMBL-European Bioinformatics Institute Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. =head1 CONTACT Ensembl <http://www.ensembl.org/info/about/contact/index.html> =cut =head1 NAME Carol =head1 SYNOPSIS mv Carol.pm ~/.vep/Plugins ./vep -i variations.vcf --plugin Carol =head1 DESCRIPTION This is a plugin for the Ensembl Variant Effect Predictor (VEP) that calculates the Combined Annotation scoRing toOL (CAROL) score (1) for a missense mutation based on the pre-calculated SIFT (2) and PolyPhen-2 (3) scores from the Ensembl API (4). It adds one new entry class to the VEP's Extra column, CAROL which is the calculated CAROL score. Note that this module is a perl reimplementation of the original R script, available at: http://www.sanger.ac.uk/resources/software/carol/ I believe that both versions implement the same algorithm, but if there are any discrepancies the R version should be treated as the reference implementation. Bug reports are welcome. References: (1) Lopes MC, Joyce C, Ritchie GRS, John SL, Cunningham F, Asimit J, Zeggini E. A combined functional annotation score for non-synonymous variants Human Heredity (in press) (2) Kumar P, Henikoff S, Ng PC. Predicting the effects of coding non-synonymous variants on protein function using the SIFT algorithm Nature Protocols 4(8):1073-1081 (2009) doi:10.1038/nprot.2009.86 (3) Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR. A method and server for predicting damaging missense mutations Nature Methods 7(4):248-249 (2010) doi:10.1038/nmeth0410-248 (4) Flicek P, et al. Ensembl 2012 Nucleic Acids Research (2011) doi: 10.1093/nar/gkr991 =cut package Carol; use strict; use warnings; use Math::CDF qw(pnorm qnorm); use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); my $CAROL_CUTOFF = 0.98; sub version { return '2.3'; } sub feature_types { return ['Transcript']; } sub get_header_info { return { CAROL => "Combined Annotation scoRing toOL prediction", }; } sub run { my ($self, $tva) = @_; my $pph_pred = $tva->polyphen_prediction; my $pph_score = $pph_pred ? ($pph_pred eq 'unknown' ? undef: $tva->polyphen_score) : undef; my $sift_score = $tva->sift_score; my ($carol_pred, $carol_score) = compute_carol($pph_score, $sift_score); my $results = {}; if (defined $carol_pred) { $carol_score = sprintf "%.3f", $carol_score; my $result = "$carol_pred($carol_score)"; if (@{ $self->params } > 0) { $result = $carol_pred if ($self->params->[0] =~ /^p/i); $result = $carol_score if ($self->params->[0] =~ /^s/i); } $results = { CAROL => $result, }; } return $results; } sub compute_carol { my ($pph_score, $sift_score) = @_; my $carol_score; if (defined $pph_score) { $pph_score = 0.999 if $pph_score == 1; $pph_score = 0.0001 if $pph_score == 0; } if (defined $sift_score) { $sift_score = 1 - $sift_score; $sift_score = 0.999 if $sift_score == 1; $sift_score = 0.0001 if $sift_score == 0; } if (defined $pph_score && defined $sift_score) { my $pph_weight = log(1/(1-$pph_score)); my $sift_weight = log(1/(1-$sift_score)); # we take -qnorm, because the R script uses qnorm(..., lower.tail = FALSE) my $pph_z = -qnorm($pph_score); my $sift_z = -qnorm($sift_score); my $numerator = ($pph_weight * $pph_z) + ($sift_weight * $sift_z); my $denominator = sqrt( ($pph_weight ** 2) + ($sift_weight ** 2) ); # likewise we take 1 - pnorm $carol_score = 1 - pnorm($numerator / $denominator); } elsif (defined $pph_score) { $carol_score = $pph_score; } else { $carol_score = $sift_score; } if (defined $carol_score) { my $carol_pred = $carol_score < $CAROL_CUTOFF ? 'Neutral' : 'Deleterious'; return ($carol_pred, $carol_score); } else { return undef; } } 1;
