annotate dir_plugins/PON_P2.pm @ 10:f594c6bed58f draft default tip

Uploaded
author dvanzessen
date Tue, 21 Apr 2020 11:40:19 +0000
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2 PON_P2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5 mv PON_P2.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 ./vep -i variations.vcf --plugin PON_P2,/path/to/python/script/ponp2.py,hg37
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9 This plugin for Ensembl Variant Effect Predictor (VEP) computes the predictions of PON-P2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 for amino acid substitutions in human proteins. PON-P2 is developed and maintained by
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11 Protein Structure and Bioinformatics Group at Lund University and is available at
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 http://structure.bmc.lu.se/PON-P2/.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 To run this plugin, you will require a python script and its dependencies (Python,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 python suds). The python file can be downloaded from http://structure.bmc.lu.se/PON-P2/vep.html/
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 and the complete path to this file must be supplied while using this plugin.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19 Abhishek Niroula <abhishek.niroula@med.lu.se>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20 Mauno Vihinen <mauno.vihinen@med.lu.se>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 package PON_P2;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32 use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37 sub feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 return ['Transcript'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42 sub get_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43 return {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44 PON_P2 => "PON-P2 prediction and score for amino acid substitutions"
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50 my $class = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52 # get parameters
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53 my $command = $self->params->[0];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54 my $Hg = $self->params->[1];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55 die 'ERROR: Path to python script not specified! Specify path to python script e.g. --plugin PON_P2,/path/to/python/client/for/ponp2.py,[hg37/hg38]\n' unless defined($command);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56 die 'ERROR: Reference genome not specified! Specify the reference genome after the path to python file e.g. --plugin PON_P2,/path/to/python/client/for/ponp2.py,[hg37/hg38]\n' unless defined($command);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57 die "ERROR: Wrong reference genome specified! It should be either 'hg37' or 'hg38'\n" unless ($Hg ~~ ["hg37","hg38"]);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58 die 'ERROR: Incorrect path to ponp2.py\n' unless -e $command;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59 $self->{command} = $command;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60 $self->{Hg} = $Hg;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68 # only for missense variants
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69 return {} unless grep {$_->SO_term eq 'missense_variant'} @{$tva->get_all_OverlapConsequences};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71 ## Now get the variation features
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72 my $vf=$tva -> variation_feature;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74 ## If not snp return
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75 return {} unless $vf->{start} eq $vf->{end};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77 ## get allele, reverse comp if needed
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78 my $allele = $tva -> variation_feature_seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79 my $Variation = $tva -> hgvs_genomic;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80 my ($Chr, $Pos, $Alt) = (split /:g.|>/, $Variation)[0,1,2];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81 my $Position = substr $Pos, 0, -1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82 my $Ref = substr $Pos, -1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84 ## Check for single nucleotide substitution
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85 return {} unless $Ref =~ /^[ACGT]$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86 return {} unless $Alt =~ /^[ACGT]$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88 my $command = $self -> {command};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89 my $Hg = $self -> {Hg};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90 my $V = $Chr."_".$Position."_".$Ref."_".$Alt;;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92 ## Call pon-p2 python script here
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93 my $ponp2Res = `python $command $V $Hg` or return {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94 $ponp2Res =~ s/\R//g;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96 my ($pred, $prob) =split /\t/, $ponp2Res;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 ## Can PON-P2 predict?
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 return {} if $pred eq "cannot";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101 ## Return predictions
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102 return $pred && $prob ? {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103 PON_P2 => "$pred($prob)",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 } : {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107 1;