comparison dir_plugins/ExACpLI.pm @ 3:49397129aec0 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:20:39 -0400
parents e545d0a25ffe
children
comparison
equal deleted inserted replaced
2:17c98d091710 3:49397129aec0
1 =head1 LICENSE
2
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 =head1 CONTACT
19
20 Please email comments or questions to the public Ensembl
21 developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22
23 Questions may also be sent to the Ensembl help desk at
24 <http://www.ensembl.org/Help/Contact>.
25
26 =cut
27
28 =head1 NAME
29
30 ExACpLI - Add ExAC pLI to the VEP output
31
32 =head1 SYNOPSIS
33
34 mv ExACpLI.pm ~/.vep/Plugins
35 mv ExACpLI_values.txt ~/.vep/Plugins
36 ./vep -i variants.vcf --plugin ExACpLI
37
38 =head1 DESCRIPTION
39
40
41 A VEP plugin that adds the probabililty of a gene being
42 loss-of-function intolerant (pLI) to the VEP output.
43
44 Lek et al. (2016) estimated pLI using the expectation-maximization
45 (EM) algorithm and data from 60,706 individuals from
46 ExAC (http://exac.broadinstitute.org/about). The closer pLI is to 1,
47 the more likely the gene is loss-of-function (LoF) intolerant.
48
49 Note: the pLI was calculated using a representative transcript and
50 is reported by gene in the plugin.
51
52 The data for the plugin is provided by Kaitlin Samocha and Daniel MacArthur.
53 See https://www.ncbi.nlm.nih.gov/pubmed/27535533 for a description
54 of the dataset and analysis.
55
56 The ExACpLI_values.txt file is found alongside the plugin in the
57 VEP_plugins GitHub repository. The file contains the fields gene and pLI
58 extracted from the file at
59
60 ftp://ftp.broadinstitute.org/pub/ExAC_release/release0.3/functional_gene_constraint/
61 fordist_cleaned_exac_r03_march16_z_pli_rec_null_data.txt
62
63 To use another values file, add it as a parameter i.e.
64
65 ./vep -i variants.vcf --plugin ExACpLI,values_file.txt
66
67
68 =cut
69
70 package ExACpLI;
71
72 use strict;
73 use warnings;
74
75 use DBI;
76
77 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
78
79 sub new {
80 my $class = shift;
81
82 my $self = $class->SUPER::new(@_);
83
84 my $file = $self->params->[0];
85
86 if(!$file) {
87 my $plugin_dir = $INC{'ExACpLI.pm'};
88 $plugin_dir =~ s/ExACpLI\.pm//i;
89 $file = $plugin_dir.'/ExACpLI_values.txt';
90 }
91
92 die("ERROR: ExACpLI values file $file not found\n") unless $file && -e $file;
93
94 open my $fh, "<", $file;
95 my %scores;
96
97 while(<$fh>) {
98 chomp;
99 my ($gene, $score) = split;
100 next if $score eq 'pLI';
101 $scores{lc($gene)} = sprintf("%.2f", $score);
102 }
103
104 close $fh;
105
106 die("ERROR: No scores read from $file\n") unless scalar keys %scores;
107
108 $self->{scores} = \%scores;
109
110 return $self;
111 }
112
113 sub feature_types {
114 return ['Transcript'];
115 }
116
117 sub get_header_info {
118 return {
119 ExACpLI => "ExACpLI value for gene"
120 };
121 }
122
123 sub run {
124 my $self = shift;
125 my $tva = shift;
126
127 my $symbol = $tva->transcript->{_gene_symbol} || $tva->transcript->{_gene_hgnc};
128 return {} unless $symbol;
129
130 return $self->{scores}->{lc($symbol)} ? { ExACpLI => $self->{scores}->{lc($symbol)}} : {};
131 }
132
133 1;
134