Mercurial > repos > dvanzessen > vep_emc
diff dir_plugins/MPC.pm @ 3:49397129aec0 draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:20:39 -0400 |
| parents | e545d0a25ffe |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dir_plugins/MPC.pm Mon Jul 15 05:20:39 2019 -0400 @@ -0,0 +1,135 @@ +=head1 LICENSE + +Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +Copyright [2016-2018] EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=head1 CONTACT + + Ensembl <http://www.ensembl.org/info/about/contact/index.html> + +=cut + +=head1 NAME + + MPC + +=head1 SYNOPSIS + + mv MPC.pm ~/.vep/Plugins + ./vep -i variations.vcf --plugin MPC,fordist_constraint_official_mpc_values.txt.gz + +=head1 DESCRIPTION + + A VEP plugin that retrieves MPC scores for variants from a tabix-indexed MPC data file. + + MPC is a missense deleteriousness metric based on the analysis of genic regions + depleted of missense mutations in the Exome Agggregation Consortium (ExAC) data. + + The MPC score is the product of work by Kaitlin Samocha (ks20@sanger.ac.uk). + Publication currently in pre-print: Samocha et al bioRxiv 2017 (TBD) + + The MPC score file is available to download from: + + ftp://ftp.broadinstitute.org/pub/ExAC_release/release1/regional_missense_constraint/ + + The data are currently mapped to GRCh37 only. Not all transcripts are included; see + README in the above directory for exclusion criteria. + +=cut + +package MPC; + +use strict; +use warnings; + +use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); + +use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin; + +use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin); + +my %INCLUDE_SO = map {$_ => 1} qw(missense_variant stop_lost stop_gained start_lost); + +sub new { + my $class = shift; + + my $self = $class->SUPER::new(@_); + + $self->expand_left(0); + $self->expand_right(0); + + $self->get_user_params(); + + return $self; +} + +sub feature_types { + return ['Transcript']; +} + +sub get_header_info { + return { MPC => 'MPC score' }; +} + +sub run { + my ($self, $tva) = @_; + + # only for missense variants + return {} unless grep {$INCLUDE_SO{$_->SO_term}} @{$tva->get_all_OverlapConsequences}; + + my $vf = $tva->variation_feature; + + return {} unless $vf->{start} eq $vf->{end}; + + # get allele, reverse comp if needed + my $allele = $tva->variation_feature_seq; + reverse_comp(\$allele) if $vf->{strand} < 0; + + return {} unless $allele =~ /^[ACGT]$/; + + # get transcript stable ID + my $tr_id = $tva->transcript->stable_id; + + my ($res) = grep { + $_->{pos} == $vf->{start} && + $_->{alt} eq $allele && + $_->{tr} eq $tr_id + } @{$self->get_data($vf->{chr}, $vf->{start}, $vf->{end})}; + + return $res ? { MPC => $res->{MPC} } : {}; +} + +sub parse_data { + my ($self, $line) = @_; + + my @split = split /\t/, $line; + + return { + pos => $split[1], + alt => $split[3], + tr => $split[5], + MPC => $split[-1], + }; +} + +sub get_start { + return $_[1]->{pos}; +} + +sub get_end { + return $_[1]->{pos}; +} + +1;
