comparison dir_plugins/ProteinSeqs.pm @ 0:e545d0a25ffe draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:17:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e545d0a25ffe
1 =head1 LICENSE
2
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 =head1 CONTACT
19
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
21
22 =cut
23
24 =head1 NAME
25
26 ProteinSeqs
27
28 =head1 SYNOPSIS
29
30 mv ProteinSeqs.pm ~/.vep/Plugins
31 ./vep -i variations.vcf --plugin ProteinSeqs,reference.fa,mutated.fa
32
33 =head1 DESCRIPTION
34
35 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
36 prints out the reference and mutated protein sequences of any
37 proteins found with non-synonymous mutations in the input file.
38
39 You should supply the name of file where you want to store the
40 reference protein sequences as the first argument, and a file to
41 store the mutated sequences as the second argument.
42
43 Note that, for simplicity, where stop codons are gained the plugin
44 simply substitutes a '*' into the sequence and does not truncate the
45 protein. Where a stop codon is lost any new amino acids encoded by the
46 mutation are appended to the sequence, but the plugin does not attempt
47 to translate until the next downstream stop codon. Also, the protein
48 sequence resulting from each mutation is printed separately, no attempt
49 is made to apply multiple mutations to the same protein.
50
51 =cut
52
53 package ProteinSeqs;
54
55 use strict;
56 use warnings;
57
58 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
59
60 sub version {
61 return '2.4';
62 }
63
64 sub feature_types {
65 return ['Transcript'];
66 }
67
68 sub new {
69 my $class = shift;
70
71 my $self = $class->SUPER::new(@_);
72
73 if($self->{config}->{fork}) {
74 print STDERR "WARNING: Plugin ProteinSeqs is disabling forking\n" unless $self->{config}->{quiet};
75 delete($self->{config}->{fork});
76 }
77
78 # use some default file names if none are supplied
79
80 my $ref_file = $self->params->[0] || 'reference.fa';
81 my $mut_file = $self->params->[1] || 'mutated.fa';
82
83 open $self->{ref_file}, ">$ref_file" or die "Failed to open $ref_file";
84 open $self->{mut_file}, ">$mut_file" or die "Failed to open $mut_file";
85
86 return $self;
87 }
88
89 sub run {
90 my ($self, $tva) = @_;
91
92 # check if we have a mutant amino acid, if not there isn't much we can do!
93
94 if (my $mut_aa = $tva->peptide) {
95
96 # get the peptide coordinates
97
98 my $tl_start = $tva->transcript_variation->translation_start;
99 my $tl_end = $tva->transcript_variation->translation_end;
100
101 # and our reference sequence
102
103 my $ref_seq = $tva->transcript_variation->_peptide;
104
105 # splice the mutant peptide sequence into the reference sequence
106
107 my $mut_seq = $ref_seq;
108
109 substr($mut_seq, $tl_start-1, $tl_end - $tl_start + 1) = $mut_aa;
110
111 # print out our reference and mutant sequences
112
113 my $translation_id = $tva->transcript->translation->stable_id;
114
115 # only print the reference sequence if we haven't printed it yet
116
117 $self->print_fasta($ref_seq, $translation_id, $self->{ref_file})
118 unless $self->{printed_ref}->{$translation_id}++;
119
120 # we always print the mutated sequence as each mutation may have
121 # a different consequence
122
123 $self->print_fasta($mut_seq, $tva->hgvs_protein, $self->{mut_file});
124 }
125
126 # return an empty hashref because we don't want to add
127 # anything to the VEP output file
128
129 return {};
130 }
131
132 sub print_fasta {
133 my ($self, $peptide, $id, $fh) = @_;
134
135 # break the sequence into 80 characters per line
136
137 $peptide =~ s/(.{80})/$1\n/g;
138
139 # get rid of any trailing newline
140
141 chomp $peptide;
142
143 # print the sequence
144
145 print $fh ">$id\n$peptide\n";
146 }
147
148 sub STORABLE_freeze {
149 my ($self, $cloning) = @_;
150 return if $cloning;
151
152 close $self->{ref_file};
153 close $self->{mut_file};
154
155 delete $self->{ref_file};
156 delete $self->{ref_file};
157 }
158
159 sub STORABLE_thaw {
160 my ($self, $cloning) = @_;
161 return if $cloning;
162
163 my $ref_file = $self->params->[0] || 'reference.fa';
164 my $mut_file = $self->params->[1] || 'mutated.fa';
165
166 open $self->{ref_file}, ">>$ref_file" or die "Failed to open $ref_file";
167 open $self->{mut_file}, ">>$mut_file" or die "Failed to open $mut_file";
168 }
169
170 1;