annotate dir_plugins/Blosum62.pm @ 10:f594c6bed58f draft default tip

Uploaded
author dvanzessen
date Tue, 21 Apr 2020 11:40:19 +0000
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 LICENSE
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 Licensed under the Apache License, Version 2.0 (the "License");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7 you may not use this file except in compliance with the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 You may obtain a copy of the License at
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 http://www.apache.org/licenses/LICENSE-2.0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 Unless required by applicable law or agreed to in writing, software
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13 distributed under the License is distributed on an "AS IS" BASIS,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 See the License for the specific language governing permissions and
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 limitations under the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26 Blosum62
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30 mv Blosum62.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31 ./vep -i variations.vcf --plugin Blosum62
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36 looks up the BLOSUM 62 substitution matrix score for the reference
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37 and alternative amino acids predicted for a missense mutation. It adds
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 one new entry to the VEP's Extra column, BLOSUM62 which is the
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39 associated score.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43 package Blosum62;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50 my @BLOSUM_62 = qw(
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52 -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53 -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54 -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56 -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57 -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59 -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60 -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61 -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62 -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63 -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64 -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68 -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69 -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71 );
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73 my @AAs = qw(A R N D C Q E G H I L K M F P S T W Y V);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76 my $class = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80 # construct a hash representing the matrix for quick lookups
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82 my $num = @AAs;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84 for (my $i = 0; $i < $num; $i++) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85 for (my $j = 0; $j < $num; $j++) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86 $self->{matrix}->{$AAs[$i]}->{$AAs[$j]} = $BLOSUM_62[($i * $num) + $j];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93 sub version {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94 return '2.3';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97 sub feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 return ['Transcript'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101 sub get_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102 return {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103 BLOSUM62 => "BLOSUM62 substitution score for the reference and alternative amino acids",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
108 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
109
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
110 if ($tva->pep_allele_string && $tva->pep_allele_string =~ /^([A-Z])\/([A-Z])$/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
111
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
112 my $score = $self->{matrix}->{$1}->{$2};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
113
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
114 if (defined $score) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
115 return {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
116 BLOSUM62 => $score
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
117 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
118 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
119 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
120
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
121 return {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
122 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
123
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
124 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
125