|
0
|
1 =head1 NAME
|
|
|
2 PON_P2
|
|
|
3
|
|
|
4 =head1 SYNOPSIS
|
|
|
5 mv PON_P2.pm ~/.vep/Plugins
|
|
|
6 ./vep -i variations.vcf --plugin PON_P2,/path/to/python/script/ponp2.py,hg37
|
|
|
7
|
|
|
8 =head1 DESCRIPTION
|
|
|
9 This plugin for Ensembl Variant Effect Predictor (VEP) computes the predictions of PON-P2
|
|
|
10 for amino acid substitutions in human proteins. PON-P2 is developed and maintained by
|
|
|
11 Protein Structure and Bioinformatics Group at Lund University and is available at
|
|
|
12 http://structure.bmc.lu.se/PON-P2/.
|
|
|
13
|
|
|
14 To run this plugin, you will require a python script and its dependencies (Python,
|
|
|
15 python suds). The python file can be downloaded from http://structure.bmc.lu.se/PON-P2/vep.html/
|
|
|
16 and the complete path to this file must be supplied while using this plugin.
|
|
|
17
|
|
|
18 =head1 CONTACT
|
|
|
19 Abhishek Niroula <abhishek.niroula@med.lu.se>
|
|
|
20 Mauno Vihinen <mauno.vihinen@med.lu.se>
|
|
|
21
|
|
|
22 =cut
|
|
|
23
|
|
|
24 package PON_P2;
|
|
|
25
|
|
|
26
|
|
|
27 use strict;
|
|
|
28 use warnings;
|
|
|
29
|
|
|
30
|
|
|
31 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
|
|
|
32 use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin;
|
|
|
33
|
|
|
34 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
|
|
|
35
|
|
|
36
|
|
|
37 sub feature_types {
|
|
|
38 return ['Transcript'];
|
|
|
39 }
|
|
|
40
|
|
|
41
|
|
|
42 sub get_header_info {
|
|
|
43 return {
|
|
|
44 PON_P2 => "PON-P2 prediction and score for amino acid substitutions"
|
|
|
45 };
|
|
|
46 }
|
|
|
47
|
|
|
48
|
|
|
49 sub new {
|
|
|
50 my $class = shift;
|
|
|
51 my $self = $class->SUPER::new(@_);
|
|
|
52 # get parameters
|
|
|
53 my $command = $self->params->[0];
|
|
|
54 my $Hg = $self->params->[1];
|
|
|
55 die 'ERROR: Path to python script not specified! Specify path to python script e.g. --plugin PON_P2,/path/to/python/client/for/ponp2.py,[hg37/hg38]\n' unless defined($command);
|
|
|
56 die 'ERROR: Reference genome not specified! Specify the reference genome after the path to python file e.g. --plugin PON_P2,/path/to/python/client/for/ponp2.py,[hg37/hg38]\n' unless defined($command);
|
|
|
57 die "ERROR: Wrong reference genome specified! It should be either 'hg37' or 'hg38'\n" unless ($Hg ~~ ["hg37","hg38"]);
|
|
|
58 die 'ERROR: Incorrect path to ponp2.py\n' unless -e $command;
|
|
|
59 $self->{command} = $command;
|
|
|
60 $self->{Hg} = $Hg;
|
|
|
61 return $self;
|
|
|
62 }
|
|
|
63
|
|
|
64
|
|
|
65 sub run {
|
|
|
66 my ($self, $tva) = @_;
|
|
|
67
|
|
|
68 # only for missense variants
|
|
|
69 return {} unless grep {$_->SO_term eq 'missense_variant'} @{$tva->get_all_OverlapConsequences};
|
|
|
70
|
|
|
71 ## Now get the variation features
|
|
|
72 my $vf=$tva -> variation_feature;
|
|
|
73
|
|
|
74 ## If not snp return
|
|
|
75 return {} unless $vf->{start} eq $vf->{end};
|
|
|
76
|
|
|
77 ## get allele, reverse comp if needed
|
|
|
78 my $allele = $tva -> variation_feature_seq;
|
|
|
79 my $Variation = $tva -> hgvs_genomic;
|
|
|
80 my ($Chr, $Pos, $Alt) = (split /:g.|>/, $Variation)[0,1,2];
|
|
|
81 my $Position = substr $Pos, 0, -1;
|
|
|
82 my $Ref = substr $Pos, -1;
|
|
|
83
|
|
|
84 ## Check for single nucleotide substitution
|
|
|
85 return {} unless $Ref =~ /^[ACGT]$/;
|
|
|
86 return {} unless $Alt =~ /^[ACGT]$/;
|
|
|
87
|
|
|
88 my $command = $self -> {command};
|
|
|
89 my $Hg = $self -> {Hg};
|
|
|
90 my $V = $Chr."_".$Position."_".$Ref."_".$Alt;;
|
|
|
91
|
|
|
92 ## Call pon-p2 python script here
|
|
|
93 my $ponp2Res = `python $command $V $Hg` or return {};
|
|
|
94 $ponp2Res =~ s/\R//g;
|
|
|
95
|
|
|
96 my ($pred, $prob) =split /\t/, $ponp2Res;
|
|
|
97
|
|
|
98 ## Can PON-P2 predict?
|
|
|
99 return {} if $pred eq "cannot";
|
|
|
100
|
|
|
101 ## Return predictions
|
|
|
102 return $pred && $prob ? {
|
|
|
103 PON_P2 => "$pred($prob)",
|
|
|
104 } : {};
|
|
|
105 }
|
|
|
106
|
|
|
107 1;
|