| 0 | 1 =head1 LICENSE | 
|  | 2 | 
|  | 3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute | 
|  | 4 Copyright [2016-2018] EMBL-European Bioinformatics Institute | 
|  | 5 | 
|  | 6 Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 7 you may not use this file except in compliance with the License. | 
|  | 8 You may obtain a copy of the License at | 
|  | 9 | 
|  | 10      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 11 | 
|  | 12 Unless required by applicable law or agreed to in writing, software | 
|  | 13 distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 15 See the License for the specific language governing permissions and | 
|  | 16 limitations under the License. | 
|  | 17 | 
|  | 18 =head1 CONTACT | 
|  | 19 | 
|  | 20  Ensembl <http://www.ensembl.org/info/about/contact/index.html> | 
|  | 21 | 
|  | 22 =cut | 
|  | 23 | 
|  | 24 =head1 NAME | 
|  | 25 | 
|  | 26  ProteinSeqs | 
|  | 27 | 
|  | 28 =head1 SYNOPSIS | 
|  | 29 | 
|  | 30  mv ProteinSeqs.pm ~/.vep/Plugins | 
|  | 31  ./vep -i variations.vcf --plugin ProteinSeqs,reference.fa,mutated.fa | 
|  | 32 | 
|  | 33 =head1 DESCRIPTION | 
|  | 34 | 
|  | 35  This is a plugin for the Ensembl Variant Effect Predictor (VEP) that | 
|  | 36  prints out the reference and mutated protein sequences of any | 
|  | 37  proteins found with non-synonymous mutations in the input file. | 
|  | 38 | 
|  | 39  You should supply the name of file where you want to store the | 
|  | 40  reference protein sequences as the first argument, and a file to | 
|  | 41  store the mutated sequences as the second argument. | 
|  | 42 | 
|  | 43  Note that, for simplicity, where stop codons are gained the plugin | 
|  | 44  simply substitutes a '*' into the sequence and does not truncate the | 
|  | 45  protein. Where a stop codon is lost any new amino acids encoded by the | 
|  | 46  mutation are appended to the sequence, but the plugin does not attempt | 
|  | 47  to translate until the next downstream stop codon. Also, the protein | 
|  | 48  sequence resulting from each mutation is printed separately, no attempt | 
|  | 49  is made to apply multiple mutations to the same protein. | 
|  | 50 | 
|  | 51 =cut | 
|  | 52 | 
|  | 53 package ProteinSeqs; | 
|  | 54 | 
|  | 55 use strict; | 
|  | 56 use warnings; | 
|  | 57 | 
|  | 58 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); | 
|  | 59 | 
|  | 60 sub version { | 
|  | 61     return '2.4'; | 
|  | 62 } | 
|  | 63 | 
|  | 64 sub feature_types { | 
|  | 65     return ['Transcript']; | 
|  | 66 } | 
|  | 67 | 
|  | 68 sub new { | 
|  | 69     my $class = shift; | 
|  | 70 | 
|  | 71     my $self = $class->SUPER::new(@_); | 
|  | 72 | 
|  | 73     if($self->{config}->{fork}) { | 
|  | 74       print STDERR "WARNING: Plugin ProteinSeqs is disabling forking\n" unless $self->{config}->{quiet}; | 
|  | 75       delete($self->{config}->{fork}); | 
|  | 76     } | 
|  | 77 | 
|  | 78     # use some default file names if none are supplied | 
|  | 79 | 
|  | 80     my $ref_file = $self->params->[0] || 'reference.fa'; | 
|  | 81     my $mut_file = $self->params->[1] || 'mutated.fa'; | 
|  | 82 | 
|  | 83     open $self->{ref_file}, ">$ref_file" or die "Failed to open $ref_file"; | 
|  | 84     open $self->{mut_file}, ">$mut_file" or die "Failed to open $mut_file"; | 
|  | 85 | 
|  | 86     return $self; | 
|  | 87 } | 
|  | 88 | 
|  | 89 sub run { | 
|  | 90     my ($self, $tva) = @_; | 
|  | 91 | 
|  | 92     # check if we have a mutant amino acid, if not there isn't much we can do! | 
|  | 93 | 
|  | 94     if (my $mut_aa = $tva->peptide) { | 
|  | 95 | 
|  | 96         # get the peptide coordinates | 
|  | 97 | 
|  | 98         my $tl_start = $tva->transcript_variation->translation_start; | 
|  | 99         my $tl_end = $tva->transcript_variation->translation_end; | 
|  | 100 | 
|  | 101         # and our reference sequence | 
|  | 102 | 
|  | 103         my $ref_seq = $tva->transcript_variation->_peptide; | 
|  | 104 | 
|  | 105         # splice the mutant peptide sequence into the reference sequence | 
|  | 106 | 
|  | 107         my $mut_seq = $ref_seq; | 
|  | 108 | 
|  | 109         substr($mut_seq, $tl_start-1, $tl_end - $tl_start + 1) = $mut_aa; | 
|  | 110 | 
|  | 111         # print out our reference and mutant sequences | 
|  | 112 | 
|  | 113         my $translation_id = $tva->transcript->translation->stable_id; | 
|  | 114 | 
|  | 115         # only print the reference sequence if we haven't printed it yet | 
|  | 116 | 
|  | 117         $self->print_fasta($ref_seq, $translation_id, $self->{ref_file}) | 
|  | 118             unless $self->{printed_ref}->{$translation_id}++; | 
|  | 119 | 
|  | 120         # we always print the mutated sequence as each mutation may have | 
|  | 121         # a different consequence | 
|  | 122 | 
|  | 123         $self->print_fasta($mut_seq, $tva->hgvs_protein, $self->{mut_file}); | 
|  | 124     } | 
|  | 125 | 
|  | 126     # return an empty hashref because we don't want to add | 
|  | 127     # anything to the VEP output file | 
|  | 128 | 
|  | 129     return {}; | 
|  | 130 } | 
|  | 131 | 
|  | 132 sub print_fasta { | 
|  | 133     my ($self, $peptide, $id, $fh) = @_; | 
|  | 134 | 
|  | 135     # break the sequence into 80 characters per line | 
|  | 136 | 
|  | 137     $peptide =~ s/(.{80})/$1\n/g; | 
|  | 138 | 
|  | 139     # get rid of any trailing newline | 
|  | 140 | 
|  | 141     chomp $peptide; | 
|  | 142 | 
|  | 143     # print the sequence | 
|  | 144 | 
|  | 145     print $fh ">$id\n$peptide\n"; | 
|  | 146 } | 
|  | 147 | 
|  | 148 sub STORABLE_freeze { | 
|  | 149     my ($self, $cloning) = @_; | 
|  | 150     return if $cloning; | 
|  | 151 | 
|  | 152     close $self->{ref_file}; | 
|  | 153     close $self->{mut_file}; | 
|  | 154 | 
|  | 155     delete $self->{ref_file}; | 
|  | 156     delete $self->{ref_file}; | 
|  | 157 } | 
|  | 158 | 
|  | 159 sub STORABLE_thaw { | 
|  | 160     my ($self, $cloning) = @_; | 
|  | 161     return if $cloning; | 
|  | 162 | 
|  | 163     my $ref_file = $self->params->[0] || 'reference.fa'; | 
|  | 164     my $mut_file = $self->params->[1] || 'mutated.fa'; | 
|  | 165 | 
|  | 166     open $self->{ref_file}, ">>$ref_file" or die "Failed to open $ref_file"; | 
|  | 167     open $self->{mut_file}, ">>$mut_file" or die "Failed to open $mut_file"; | 
|  | 168 } | 
|  | 169 | 
|  | 170 1; |