comparison dir_plugins/MPC.pm @ 3:49397129aec0 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:20:39 -0400
parents e545d0a25ffe
children
comparison
equal deleted inserted replaced
2:17c98d091710 3:49397129aec0
1 =head1 LICENSE
2
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 =head1 CONTACT
19
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
21
22 =cut
23
24 =head1 NAME
25
26 MPC
27
28 =head1 SYNOPSIS
29
30 mv MPC.pm ~/.vep/Plugins
31 ./vep -i variations.vcf --plugin MPC,fordist_constraint_official_mpc_values.txt.gz
32
33 =head1 DESCRIPTION
34
35 A VEP plugin that retrieves MPC scores for variants from a tabix-indexed MPC data file.
36
37 MPC is a missense deleteriousness metric based on the analysis of genic regions
38 depleted of missense mutations in the Exome Agggregation Consortium (ExAC) data.
39
40 The MPC score is the product of work by Kaitlin Samocha (ks20@sanger.ac.uk).
41 Publication currently in pre-print: Samocha et al bioRxiv 2017 (TBD)
42
43 The MPC score file is available to download from:
44
45 ftp://ftp.broadinstitute.org/pub/ExAC_release/release1/regional_missense_constraint/
46
47 The data are currently mapped to GRCh37 only. Not all transcripts are included; see
48 README in the above directory for exclusion criteria.
49
50 =cut
51
52 package MPC;
53
54 use strict;
55 use warnings;
56
57 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
58
59 use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin;
60
61 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin);
62
63 my %INCLUDE_SO = map {$_ => 1} qw(missense_variant stop_lost stop_gained start_lost);
64
65 sub new {
66 my $class = shift;
67
68 my $self = $class->SUPER::new(@_);
69
70 $self->expand_left(0);
71 $self->expand_right(0);
72
73 $self->get_user_params();
74
75 return $self;
76 }
77
78 sub feature_types {
79 return ['Transcript'];
80 }
81
82 sub get_header_info {
83 return { MPC => 'MPC score' };
84 }
85
86 sub run {
87 my ($self, $tva) = @_;
88
89 # only for missense variants
90 return {} unless grep {$INCLUDE_SO{$_->SO_term}} @{$tva->get_all_OverlapConsequences};
91
92 my $vf = $tva->variation_feature;
93
94 return {} unless $vf->{start} eq $vf->{end};
95
96 # get allele, reverse comp if needed
97 my $allele = $tva->variation_feature_seq;
98 reverse_comp(\$allele) if $vf->{strand} < 0;
99
100 return {} unless $allele =~ /^[ACGT]$/;
101
102 # get transcript stable ID
103 my $tr_id = $tva->transcript->stable_id;
104
105 my ($res) = grep {
106 $_->{pos} == $vf->{start} &&
107 $_->{alt} eq $allele &&
108 $_->{tr} eq $tr_id
109 } @{$self->get_data($vf->{chr}, $vf->{start}, $vf->{end})};
110
111 return $res ? { MPC => $res->{MPC} } : {};
112 }
113
114 sub parse_data {
115 my ($self, $line) = @_;
116
117 my @split = split /\t/, $line;
118
119 return {
120 pos => $split[1],
121 alt => $split[3],
122 tr => $split[5],
123 MPC => $split[-1],
124 };
125 }
126
127 sub get_start {
128 return $_[1]->{pos};
129 }
130
131 sub get_end {
132 return $_[1]->{pos};
133 }
134
135 1;