annotate variant_effect_predictor/Bio/EnsEMBL/Variation/TranscriptVariationAllele.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Variation::TranscriptVariationAllele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 use Bio::EnsEMBL::Variation::TranscriptVariationAllele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 my $tva = Bio::EnsEMBL::Variation::TranscriptVariationAllele->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 -transcript_variation => $tv,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 -variation_feature_seq => 'A',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 -is_reference => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 print "sequence with respect to the transcript: ", $tva->feature_seq, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 print "sequence with respect to the variation feature: ", $tva->variation_feature_seq, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $tva->get_all_OverlapConsequences }), "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 print "amino acid change: ", $tva->peptide_allele_string, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 print "resulting codon: ", $tva->codon, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 print "reference codon: ", $tva->transcript_variation->get_reference_TranscriptVariationAllele->codon, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 print "PolyPhen prediction: ", $tva->polyphen_prediction, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 print "SIFT prediction: ", $tva->sift_prediction, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 A TranscriptVariationAllele object represents a single allele of a TranscriptVariation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 It provides methods that are specific to the sequence of the allele, such as codon,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 peptide etc. Methods that depend only on position (e.g. CDS start) will be found in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 the associated TranscriptVariation. Ordinarily you will not create these objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 yourself, but instead you would create a TranscriptVariation object which will then
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 construct TranscriptVariationAlleles based on the allele string of the associated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 VariationFeature.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 Note that any methods that are not specific to Transcripts will be found in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 VariationFeatureOverlapAllele superclass.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 package Bio::EnsEMBL::Variation::TranscriptVariationAllele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 use Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix qw($AA_LOOKUP);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 use Bio::EnsEMBL::Variation::Utils::Sequence qw(hgvs_variant_notation format_hgvs_string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(within_cds within_intron stop_lost affects_start_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 use base qw(Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 our $DEBUG = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 sub new_fast {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 my ($class, $hashref) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 # swap a transcript_variation argument for a variation_feature_overlap one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 if ($hashref->{transcript_variation}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 $hashref->{variation_feature_overlap} = delete $hashref->{transcript_variation};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 # and call the superclass
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 return $class->SUPER::new_fast($hashref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 =head2 transcript_variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 Description: Get/set the associated TranscriptVariation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 Returntype : Bio::EnsEMBL::Variation::TranscriptVariation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 Exceptions : throws if the argument is the wrong type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 sub transcript_variation {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 my ($self, $tv) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 assert_ref($tv, 'Bio::EnsEMBL::Variation::TranscriptVariation') if $tv;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 return $self->variation_feature_overlap($tv);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 =head2 variation_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 Description: Get the associated VariationFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 Returntype : Bio::EnsEMBL::Variation::VariationFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 sub variation_feature {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 return $self->transcript_variation->variation_feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 =head2 pep_allele_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 Description: Return a '/' delimited string of the reference peptide and the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 peptide resulting from this allele, or a single peptide if this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 allele does not change the peptide (e.g. because it is synonymous)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 Returntype : string or undef if this allele is not in the CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 sub pep_allele_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 my $pep = $self->peptide;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 return undef unless $pep;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 my $ref_pep = $self->transcript_variation->get_reference_TranscriptVariationAllele->peptide;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 return undef unless $ref_pep;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 return $ref_pep ne $pep ? $ref_pep.'/'.$pep : $pep;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 =head2 codon_allele_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 Description: Return a '/' delimited string of the reference codon and the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 codon resulting from this allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 Returntype : string or undef if this allele is not in the CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 sub codon_allele_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 my $codon = $self->codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 return undef unless $codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 my $ref_codon = $self->transcript_variation->get_reference_TranscriptVariationAllele->codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 return $ref_codon.'/'.$codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 =head2 display_codon_allele_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 Description: Return a '/' delimited string of the reference display_codon and the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 display_codon resulting from this allele. The display_codon identifies
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 the nucleotides affected by this variant in UPPER CASE and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 nucleotides in lower case
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 Returntype : string or undef if this allele is not in the CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 sub display_codon_allele_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 my $display_codon = $self->display_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 return undef unless $display_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 my $ref_display_codon = $self->transcript_variation->get_reference_TranscriptVariationAllele->display_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 return undef unless $ref_display_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 return $ref_display_codon.'/'.$display_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 =head2 peptide
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 Description: Return the amino acid sequence that this allele is predicted to result in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 Returntype : string or undef if this allele is not in the CDS or is a frameshift
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 sub peptide {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 my ($self, $peptide) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 $self->{peptide} = $peptide if $peptide;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 unless ($self->{peptide}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 return undef unless $self->seq_is_unambiguous_dna;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 if (my $codon = $self->codon) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 # the codon method can set the peptide in some circumstances
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 # so check here before we try an (expensive) translation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 return $self->{peptide} if $self->{peptide};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 # translate the codon sequence to establish the peptide allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 # allow for partial codons - split the sequence into whole and partial
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 # e.g. AAAGG split into AAA and GG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 my $whole_codon = substr($codon, 0, int(length($codon) / 3) * 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 my $partial_codon = substr($codon, int(length($codon) / 3) * 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 my $pep = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 if($whole_codon) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 # for mithocondrial dna we need to to use a different codon table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 my $codon_table = $self->transcript_variation->_codon_table;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 my $codon_seq = Bio::Seq->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 -seq => $whole_codon,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 -moltype => 'dna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 -alphabet => 'dna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 $pep .= $codon_seq->translate(undef, undef, undef, $codon_table)->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 if($partial_codon) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 $pep .= 'X';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 $pep ||= '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 $self->{peptide} = $pep;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 return $self->{peptide};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 =head2 codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 Description: Return the codon sequence that this allele is predicted to result in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 Returntype : string or undef if this allele is not in the CDS or is a frameshift
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 sub codon {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 my ($self, $codon) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $self->{codon} = $codon if defined $codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 my $tv = $self->transcript_variation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 return undef unless $tv->translation_start && $tv->translation_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 return undef unless $self->seq_is_dna;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 unless ($self->{codon}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 # try to calculate the codon sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 my $seq = $self->feature_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 $seq = '' if $seq eq '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 # calculate necessary coords and lengths
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 my $codon_cds_start = $tv->translation_start * 3 - 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 my $codon_cds_end = $tv->translation_end * 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 my $codon_len = $codon_cds_end - $codon_cds_start + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 my $vf_nt_len = $tv->cds_end - $tv->cds_start + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 my $allele_len = $self->seq_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 if ($allele_len != $vf_nt_len) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 if (abs($allele_len - $vf_nt_len) % 3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 # this is a frameshift variation, we don't attempt to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 # calculate the resulting codon or peptide change as this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 # could get quite complicated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 # splice the allele sequence into the CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 my $cds = $tv->_translateable_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 substr($cds, $tv->cds_start-1, $vf_nt_len) = $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 # and extract the codon sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 my $codon = substr($cds, $codon_cds_start-1, $codon_len + ($allele_len - $vf_nt_len));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 if (length($codon) < 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $self->{codon} = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 $self->{peptide} = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 $self->{codon} = $codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 return $self->{codon};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 =head2 display_codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 Description: Return the codon sequence that this allele is predicted to result in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 with the affected nucleotides identified in UPPER CASE and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 nucleotides in lower case
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 Returntype : string or undef if this allele is not in the CDS or is a frameshift
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 sub display_codon {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 unless ($self->{_display_codon}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 if ($self->codon && defined $self->transcript_variation->codon_position) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 my $display_codon = lc $self->codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 # if this allele is an indel then just return all lowercase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 if ($self->feature_seq ne '-') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 # codon_position is 1-based, while substr assumes the string starts at 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 my $pos = $self->transcript_variation->codon_position - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 my $len = length $self->feature_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 substr($display_codon, $pos, $len) = uc substr($display_codon, $pos, $len);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $self->{_display_codon} = $display_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 return $self->{_display_codon};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 =head2 polyphen_prediction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 Description: Return the qualitative PolyPhen-2 prediction for the effect of this allele.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 (Note that we currently only have PolyPhen predictions for variants that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 result in single amino acid substitutions in human)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 Returntype : string (one of 'probably damaging', 'possibly damaging', 'benign', 'unknown')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 if this is a non-synonymous mutation and a prediction is available, undef
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 otherwise
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 sub polyphen_prediction {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 my ($self, $classifier, $polyphen_prediction) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 $classifier ||= 'humvar';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 my $analysis = "polyphen_${classifier}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 $self->{$analysis}->{prediction} = $polyphen_prediction if $polyphen_prediction;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 unless ($self->{$analysis}->{prediction}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 my ($prediction, $score) = $self->_protein_function_prediction($analysis);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 $self->{$analysis}->{score} = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 $self->{$analysis}->{prediction} = $prediction;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 return $self->{$analysis}->{prediction};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 =head2 polyphen_score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 Description: Return the PolyPhen-2 probability that this allele is deleterious (Note that we
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 currently only have PolyPhen predictions for variants that result in single
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 amino acid substitutions in human)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 Returntype : float between 0 and 1 if this is a non-synonymous mutation and a prediction is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 available, undef otherwise
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 sub polyphen_score {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 my ($self, $classifier, $polyphen_score) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 $classifier ||= 'humvar';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 my $analysis = "polyphen_${classifier}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 $self->{$analysis}->{score} = $polyphen_score if defined $polyphen_score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 unless ($self->{$analysis}->{score}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 my ($prediction, $score) = $self->_protein_function_prediction($analysis);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 $self->{$analysis}->{score} = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 $self->{$analysis}->{prediction} = $prediction;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 return $self->{$analysis}->{score};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 =head2 sift_prediction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 Description: Return the qualitative SIFT prediction for the effect of this allele.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 (Note that we currently only have SIFT predictions for variants that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 result in single amino acid substitutions in human)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 Returntype : string (one of 'tolerated', 'deleterious') if this is a non-synonymous
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 mutation and a prediction is available, undef otherwise
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 sub sift_prediction {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 my ($self, $sift_prediction) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 $self->{sift_prediction} = $sift_prediction if $sift_prediction;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 unless ($self->{sift_prediction}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 my ($prediction, $score) = $self->_protein_function_prediction('sift');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 $self->{sift_score} = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 $self->{sift_prediction} = $prediction unless $self->{sift_prediction};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 return $self->{sift_prediction};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 =head2 sift_score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 Description: Return the SIFT score for this allele (Note that we currently only have SIFT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 predictions for variants that result in single amino acid substitutions in human)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 Returntype : float between 0 and 1 if this is a non-synonymous mutation and a prediction is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 available, undef otherwise
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 sub sift_score {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 my ($self, $sift_score) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 $self->{sift_score} = $sift_score if defined $sift_score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 unless ($self->{sift_score}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 my ($prediction, $score) = $self->_protein_function_prediction('sift');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 $self->{sift_score} = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 $self->{sift_prediction} = $prediction;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 return $self->{sift_score};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 sub _protein_function_prediction {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 my ($self, $analysis) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 # we can only get results for variants that cause a single amino acid substitution,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 # so check the peptide allele string first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 if ($self->pep_allele_string && $self->pep_allele_string =~ /^[A-Z]\/[A-Z]$/ && defined $AA_LOOKUP->{$self->peptide}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 if (my $matrix = $self->transcript_variation->_protein_function_predictions($analysis)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 my ($prediction, $score) = $matrix->get_prediction(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 $self->transcript_variation->translation_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 $self->peptide,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 return wantarray ? ($prediction, $score) : $prediction;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 =head2 hgvs_genomic
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 Description: Return a string representing the genomic-level effect of this allele in HGVS format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 sub hgvs_genomic {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 return _hgvs_generic(@_,'genomic');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 =head2 hgvs_coding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 Description: Return a string representing the CDS-level effect of this allele in HGVS format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 Returntype : string or undef if this allele is not in the CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 sub hgvs_coding {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 deprecate('HGVS coding support has been moved to hgvs_transcript. This method will be removed in the next release.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 return hgvs_transcript(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 =head2 hgvs_transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 Description: Return a string representing the CDS-level effect of this allele in HGVS format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 Returntype : string or undef if this allele is not in the CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 sub hgvs_transcript {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 my $notation = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 ##### set if string supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 $self->{hgvs_transcript} = $notation if defined $notation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 ##### return if held
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 return $self->{hgvs_transcript} if defined $self->{hgvs_transcript} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 return $self->{hgvs_coding} if defined $self->{hgvs_coding} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 my $variation_feature_sequence = $self->variation_feature_seq() ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 ### don't try to handle odd characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 return undef if $variation_feature_sequence =~ m/[^ACGT\-]/ig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 ### no result for reference allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 return undef if $self->is_reference ==1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 ### else evaluate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 ### get reference sequence strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 my $refseq_strand = $self->transcript_variation->transcript->strand();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 if($DEBUG ==1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 my $var_name = $self->transcript_variation->variation_feature->variation_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 print "\nHGVS transcript: Checking $var_name refseq strand => $refseq_strand seq name : " . $self->transcript_variation->transcript_stable_id() . " var strand " . $self->transcript_variation->variation_feature->strand() . " vf st " . $self->variation_feature->strand() ." seqname: " . $self->variation_feature->seqname() ." seq: " . $self->variation_feature_seq ."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 my $hgvs_notation ; ### store components of HGVS string in hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 ### vf strand is relative to transcript or transcript slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 if( $self->transcript_variation->variation_feature->strand() <0 && $refseq_strand >0 ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 $self->transcript_variation->variation_feature->strand() >0 && $refseq_strand < 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 reverse_comp(\$variation_feature_sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 ### need to get ref seq from slice transcript is on for intron labelling
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 my ($slice_start, $slice_end, $slice) = $self->_var2transcript_slice_coords();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 unless($slice_start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 #warn "TVA: VF not within transcript - no HGVS\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 ### decide event type from HGVS nomenclature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 $hgvs_notation = hgvs_variant_notation( $variation_feature_sequence, ### alt_allele,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 $slice->seq(), ### using this to extract ref allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 $slice_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 $slice_end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 ### This should not happen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 unless($hgvs_notation->{'type'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 #warn "Error - not continuing; no HGVS annotation\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 ### create reference name - transcript name & seq version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 $hgvs_notation->{'ref_name'} = $self->transcript_variation->transcript_stable_id() . "." . $self->transcript_variation->transcript->version();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 ### get position relative to transcript features [use HGVS coords not variation feature coords due to dups]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 $hgvs_notation->{start} = $self->_get_cDNA_position( $hgvs_notation->{start} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 $hgvs_notation->{end} = $self->_get_cDNA_position( $hgvs_notation->{end} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 # Make sure that start is always less than end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 my ($exon_start_coord, $intron_start_offset) = $hgvs_notation->{start} =~ m/(\-?[0-9]+)\+?(\-?[0-9]+)?/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 my ($exon_end_coord, $intron_end_offset) = $hgvs_notation->{end} =~ m/(\-?[0-9]+)\+?(\-?[0-9]+)?/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 $intron_start_offset ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 $intron_end_offset ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 ($hgvs_notation->{start},$hgvs_notation->{end}) = ($hgvs_notation->{end},$hgvs_notation->{start}) if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 (($exon_start_coord + $intron_start_offset) > ($exon_end_coord + $intron_end_offset));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 if($self->transcript->cdna_coding_start()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 $hgvs_notation->{'numbering'} = "c"; ### set 'c' if transcript is coding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 $hgvs_notation->{'numbering'} = "n"; ### set 'n' if transcript non-coding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 ### generic formatting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 $self->{hgvs_transcript} = format_hgvs_string( $hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 if($DEBUG ==1){print "HGVS notation for var " . $self->transcript_variation->variation_feature->variation_name() . " from hgvs transcript : " . $self->{hgvs_transcript} . " \n";}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 return $self->{hgvs_transcript};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 =head2 hgvs_protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 Description: Return a string representing the protein-level effect of this allele in HGVS format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 Returntype : string or undef if this allele is not in the CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 sub hgvs_protein {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 my $notation = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 my $hgvs_notation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 if($DEBUG ==1){print "\nStarting hgvs_protein with ". $self->transcript_variation->variation_feature->variation_name() . "\n"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 ### set if string supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 $self->{hgvs_protein} = $notation if defined $notation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 ### return if set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 return $self->{hgvs_protein} if defined $self->{hgvs_protein} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 ### don't try to handle odd characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 return undef if $self->variation_feature_seq() =~ m/[^ACGT\-]/ig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 #### else, check viable input and create notation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 return if $self->is_reference();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 unless ($self->transcript_variation->translation_start() && $self->transcript_variation->translation_end()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 return undef ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 #### for debug
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 #my $var_name = $self->transcript_variation->variation_feature->variation_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 ### get reference sequence [add seq version to transcript name]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 $hgvs_notation->{ref_name} = $self->transcript_variation->transcript->translation()->display_id() . "." . $self->transcript_variation->transcript->translation()->version();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 $hgvs_notation->{'numbering'} = 'p';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 ### get default reference location [changed later in some cases eg. duplication]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 $hgvs_notation->{start} = $self->transcript_variation->translation_start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 $hgvs_notation->{end} = $self->transcript_variation->translation_end();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 ## get default reference & alt peptides [changed later to hgvs format]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 $hgvs_notation->{alt} = $self->peptide;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 $hgvs_notation->{ref} = $self->transcript_variation->get_reference_TranscriptVariationAllele->peptide;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 if(defined $hgvs_notation->{alt} && defined $hgvs_notation->{ref}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 $hgvs_notation = _clip_alleles( $hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 #### define type - types are different for protein numbering
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 $hgvs_notation = $self->_get_hgvs_protein_type($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 ##### Convert ref & alt peptides taking into account HGVS rules
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 $hgvs_notation = $self->_get_hgvs_peptides($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 ##### String formatting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 $self->{hgvs_protein} = $self->_get_hgvs_protein_format($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 return $self->{hgvs_protein} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 ### HGVS: format protein string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 sub _get_hgvs_protein_format{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 my $hgvs_notation = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 if ((defined $hgvs_notation->{ref} && defined $hgvs_notation->{alt} &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 $hgvs_notation->{ref} eq $hgvs_notation->{alt}) ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 (defined $hgvs_notation->{type} && $hgvs_notation->{type} eq "=")){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 ### no protein change - return transcript nomenclature with flag for neutral protein consequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 $hgvs_notation->{'hgvs'} = $self->hgvs_transcript() . "(p.=)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 return $hgvs_notation->{'hgvs'} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 ### all start with refseq name & numbering type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 $hgvs_notation->{'hgvs'} = $hgvs_notation->{'ref_name'} . ":" . $hgvs_notation->{'numbering'} . ".";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 ### handle stop_lost seperately regardless of cause by del/delins => p.XposAA1extnum_AA_to_stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 if(stop_lost($self)){ ### if deletion of stop add extX and number of new aa to alt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 $hgvs_notation->{alt} = substr($hgvs_notation->{alt}, 0, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 if($hgvs_notation->{type} eq "del"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 my $aa_til_stop = _stop_loss_extra_AA($self,$hgvs_notation->{start}-1, "del");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 if(defined $aa_til_stop){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 $hgvs_notation->{alt} .= "extX" . $aa_til_stop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 elsif($hgvs_notation->{type} eq ">"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 my $aa_til_stop = _stop_loss_extra_AA($self,$hgvs_notation->{start}-1, "subs");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 if(defined $aa_til_stop){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 $hgvs_notation->{alt} .= "extX" . $aa_til_stop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 # warn "TVA: stop loss for type $hgvs_notation->{type} not caught \n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start} . $hgvs_notation->{alt} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 elsif( $hgvs_notation->{type} eq "dup"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 if($hgvs_notation->{start} < $hgvs_notation->{end}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 ### list only first and last peptides in long duplicated string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 my $ref_pep_first = substr($hgvs_notation->{alt}, 0, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 my $ref_pep_last = substr($hgvs_notation->{alt}, -3, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 $hgvs_notation->{'hgvs'} .= $ref_pep_first . $hgvs_notation->{start} . "_" . $ref_pep_last . $hgvs_notation->{end} ."dup";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{alt} . $hgvs_notation->{start} . "dup" ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 print "formating a dup $hgvs_notation->{hgvs} \n" if $DEBUG==1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 elsif($hgvs_notation->{type} eq ">"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 #### substitution
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref}. $hgvs_notation->{start} . $hgvs_notation->{alt};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 elsif( $hgvs_notation->{type} eq "delins" || $hgvs_notation->{type} eq "ins" ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 #### list first and last aa in reference only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 my $ref_pep_first = substr($hgvs_notation->{ref}, 0, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 my $ref_pep_last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 if(substr($hgvs_notation->{ref}, -1, 1) eq "X"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 $ref_pep_last ="X";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 $ref_pep_last = substr($hgvs_notation->{ref}, -3, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 if($hgvs_notation->{ref} =~ /X$/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 ### For stops & add extX & distance to next stop to alt pep
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 my $aa_til_stop = _stop_loss_extra_AA($self,$hgvs_notation->{start}-1, "loss");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 if(defined $aa_til_stop){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 $hgvs_notation->{alt} .="extX" . $aa_til_stop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 if($hgvs_notation->{start} == $hgvs_notation->{end} && $hgvs_notation->{type} eq "delins"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 $hgvs_notation->{'hgvs'} .= $ref_pep_first . $hgvs_notation->{start} . $hgvs_notation->{end} . $hgvs_notation->{type} . $hgvs_notation->{alt} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 ### correct ordering if needed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 if($hgvs_notation->{start} > $hgvs_notation->{end}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 ( $hgvs_notation->{start}, $hgvs_notation->{end}) = ($hgvs_notation->{end}, $hgvs_notation->{start} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 $hgvs_notation->{'hgvs'} .= $ref_pep_first . $hgvs_notation->{start} . "_" . $ref_pep_last . $hgvs_notation->{end} . $hgvs_notation->{type} . $hgvs_notation->{alt} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 elsif($hgvs_notation->{type} eq "fs"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 if(defined $hgvs_notation->{alt} && $hgvs_notation->{alt} eq "X"){ ## stop gained
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start} . $hgvs_notation->{alt} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 else{ ## not immediate stop - count aa until next
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 my $aa_til_stop = _stop_loss_extra_AA($self, $hgvs_notation->{start}-1, "fs");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793 if($aa_til_stop ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 ### use long form if new stop found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start} . $hgvs_notation->{alt} . "fsX$aa_til_stop" ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 ### otherwise use short form
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start} . "fs";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 elsif( $hgvs_notation->{type} eq "del"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 $hgvs_notation->{alt} = "del";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 if( length($hgvs_notation->{ref}) >3 ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 my $ref_pep_first = substr($hgvs_notation->{ref}, 0, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 my $ref_pep_last = substr($hgvs_notation->{ref}, -3, 3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 $hgvs_notation->{'hgvs'} .= $ref_pep_first . $hgvs_notation->{start} . "_" . $ref_pep_last . $hgvs_notation->{end} .$hgvs_notation->{alt} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start} . $hgvs_notation->{alt} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 elsif($hgvs_notation->{start} ne $hgvs_notation->{end} ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start} . "_" . $hgvs_notation->{alt} . $hgvs_notation->{end} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 #### default to substitution
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref}. $hgvs_notation->{start} . $hgvs_notation->{alt};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 if($DEBUG==1){ print "Returning protein format: $hgvs_notation->{'hgvs'}\n";}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 return $hgvs_notation->{'hgvs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 ### HGVS: get type of variation event in protein terms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 sub _get_hgvs_protein_type{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 my $hgvs_notation = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 ### get allele length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 my ($ref_length, $alt_length ) = $self->_get_allele_length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 if( defined $hgvs_notation->{ref} && defined $hgvs_notation->{alt} ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 ### Run type checks on peptides if available
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 if($hgvs_notation->{alt} eq $hgvs_notation->{ref} ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 #### synonymous indicated by =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 $hgvs_notation->{type} = "=";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 elsif( length($hgvs_notation->{ref}) ==1 && length($hgvs_notation->{alt}) ==1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 $hgvs_notation->{type} = ">";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 elsif($hgvs_notation->{ref} eq "-" || $hgvs_notation->{ref} eq "") {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 $hgvs_notation->{type} = "ins";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 elsif($hgvs_notation->{alt} eq "" ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 $hgvs_notation->{type} = "del";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 elsif( (length($hgvs_notation->{alt}) > length($hgvs_notation->{ref}) ) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 $hgvs_notation->{alt} =~ / $hgvs_notation->{ref}/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 ### capture duplication event described as TTT/TTTTT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 $hgvs_notation->{type} = "dup";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 elsif( (length($hgvs_notation->{alt}) >1 && length($hgvs_notation->{ref}) ==1) ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 (length($hgvs_notation->{alt}) ==1 && length($hgvs_notation->{ref}) >1) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 $hgvs_notation->{type} = "delins";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 $hgvs_notation->{type} = ">";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 elsif($ref_length ne $alt_length && ($ref_length - $alt_length)%3 !=0 ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 $hgvs_notation->{type} = "fs";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 elsif(length($self->variation_feature_seq()) >1 ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879 if($hgvs_notation->{start} == ($hgvs_notation->{end} + 1) ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 ### convention for insertions - end one less than start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 $hgvs_notation->{type} = "ins";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 elsif( $hgvs_notation->{start} != $hgvs_notation->{end} ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 $hgvs_notation->{type} = "delins";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 $hgvs_notation->{type} = ">";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 #print STDERR "DEBUG ".$self->variation_feature->start."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 #warn "Cannot define protein variant type [$ref_length - $alt_length]\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894 return $hgvs_notation ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 ### HGVS: get reference & alternative peptide
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899 sub _get_hgvs_peptides{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 my $hgvs_notation = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 if($hgvs_notation->{type} eq "fs"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 ### ensembl alt/ref peptides not the same as HGVS alt/ref - look up seperately
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 $hgvs_notation = $self->_get_fs_peptides($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 elsif($hgvs_notation->{type} eq "ins" ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 ### HGVS ref are peptides flanking insertion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 $hgvs_notation->{ref} = $self->_get_surrounding_peptides($hgvs_notation->{start});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913 if( $hgvs_notation->{alt} =~/\*/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 ## inserted bases after stop irrelevant; consider as substitution gaining stop MAINTAIN PREVIOUS BEHAVIOUR FOR NOW
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 #$hgvs_notation->{alt} =~ s/\*\w+$/\*/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 ### Additional check that inserted bases do not duplicate 3' reference sequence [set to type = dup if so]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 $hgvs_notation = $self->_check_for_peptide_duplication($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 ### Convert peptide to 3 letter code as used in HGVS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 $hgvs_notation->{ref} = Bio::SeqUtils->seq3(Bio::PrimarySeq->new(-seq => $hgvs_notation->{ref}, -id => 'ref', -alphabet => 'protein')) || "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 if( $hgvs_notation->{alt} eq "-"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 $hgvs_notation->{alt} = "del";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 $hgvs_notation->{alt} = Bio::SeqUtils->seq3(Bio::PrimarySeq->new(-seq => $hgvs_notation->{alt}, -id => 'ref', -alphabet => 'protein')) || "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 ### handle special cases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 if( affects_start_codon($self) ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935 #### handle initiator loss -> probably no translation => alt allele is '?'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936 $hgvs_notation->{alt} = "?";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 $hgvs_notation->{type} = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 elsif( $hgvs_notation->{type} eq "del"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 #### partial last codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 $hgvs_notation->{alt} = "del";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944 elsif($hgvs_notation->{type} eq "fs"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 ### only quote first ref peptide for frameshift
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 $hgvs_notation->{ref} = substr($hgvs_notation->{ref},0,3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949 ### set stop to HGVS prefered "X"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 if(defined $hgvs_notation->{ref}){ $hgvs_notation->{ref} =~ s/Ter|Xaa/X/g; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 if(defined $hgvs_notation->{alt}){ $hgvs_notation->{alt} =~ s/Ter|Xaa/X/g; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 return ($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 ### HGVS: remove common peptides from alt and ref strings & shift start/end accordingly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 sub _clip_alleles{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 my $hgvs_notation = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 my $check_alt = $hgvs_notation->{alt} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 my $check_ref = $hgvs_notation->{ref} ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 my $check_start = $hgvs_notation->{start};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966 ### strip same bases from start of string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967 for (my $p =0; $p <length ($hgvs_notation->{ref}); $p++){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 my $check_next_ref = substr( $check_ref, 0, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 my $check_next_alt = substr( $check_alt, 0, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 if($check_next_ref eq "*" && $check_next_alt eq "*"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 ### stop re-created by variant - no protein change
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 $hgvs_notation->{type} = "=";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 return($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978 if($check_next_ref eq $check_next_alt){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 $check_start++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980 $check_ref = substr( $check_ref, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 $check_alt = substr( $check_alt, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 #### strip same bases from end of string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 for (my $q =0; $q < length ($check_ref); $q++){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989 my $check_next_ref = substr( $check_ref, -1, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 my $check_next_alt = substr( $check_alt, -1, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 if($check_next_ref eq $check_next_alt){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 chop $check_ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 chop $check_alt;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000 $hgvs_notation->{alt} = $check_alt;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 $hgvs_notation->{ref} = $check_ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003 ### check if clipping force type change & adjust location
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 if(length ($check_ref) == 0 && length ($check_alt) >= 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 ### re-set as ins not delins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006 $hgvs_notation->{type} ="ins";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 ### insertion between ref base and next => adjust next
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 if($hgvs_notation->{end} == $hgvs_notation->{start} ){$hgvs_notation->{end} = $check_start; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 # $hgvs_notation->{start} = $check_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 elsif(length ($check_ref) >=1 && length ($check_alt) == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 ### re-set as del not delins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 $hgvs_notation->{type} ="del";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 $hgvs_notation->{start} = $check_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 #### save trimmed peptide string & increased start position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018 $hgvs_notation->{start} = $check_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022 return $hgvs_notation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 #### HGVS: check allele lengths to look for frameshifts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 sub _get_allele_length{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 my $ref_length = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 my $alt_length = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 my $al_string = $self->allele_string();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 my $ref_allele = (split/\//, $al_string)[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038 $ref_allele =~ s/\-//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 $ref_length = length $ref_allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 my $alt_allele = $self->variation_feature_seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 $alt_allele =~ s/\-//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 $alt_length = length $alt_allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 return ($ref_length, $alt_length );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 ### HGVS: list first different peptide [may not be first changed codon]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050 sub _get_fs_peptides{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053 my $hgvs_notation = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 ### get CDS with alt variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056 my $alt_cds = $self->_get_alternate_cds();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 return undef unless defined($alt_cds);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 #### get new translation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 my $alt_trans = $alt_cds->translate()->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 ### get changed end (currently in single letter AA coding)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 my $ref_trans = $self->transcript->translate()->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 $ref_trans .= "*"; ## appending ref stop for checking purposes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066 $hgvs_notation->{start} = $self->transcript_variation->translation_start() ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 if( $hgvs_notation->{start} > length $alt_trans){ ## deletion of stop, no further AA in alt seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 $hgvs_notation->{alt} = "del";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 $hgvs_notation->{type} = "del";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 return $hgvs_notation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 while ($hgvs_notation->{start} <= length $alt_trans){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075 ### frame shift may result in the same AA#
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077 $hgvs_notation->{ref} = substr($ref_trans, $hgvs_notation->{start}-1, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078 $hgvs_notation->{alt} = substr($alt_trans, $hgvs_notation->{start}-1, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 if($hgvs_notation->{ref} eq "*" && $hgvs_notation->{alt} eq "*"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 ### variation at stop codon, but maintains stop codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 return ($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 last if $hgvs_notation->{ref} ne $hgvs_notation->{alt};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 $hgvs_notation->{start}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 return ($hgvs_notation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091 #### HGVS: if variant is an insertion, ref pep is initially "-", so seek AA before and after insertion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 sub _get_surrounding_peptides{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 my $ref_pos = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096 my $ref_trans = $self->transcript->translate()->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 my $end = substr($ref_trans, $ref_pos-1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099 my $ref_string = substr($ref_trans, $ref_pos-1, 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 return ($ref_string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 #### HGVS: alternate CDS needed to check for new stop when variant disrupts 'reference' stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 sub _get_alternate_cds{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111 ### get reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 my $reference_cds_seq = $self->transcript->translateable_seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 return undef unless defined($self->transcript_variation->cds_start) && defined($self->transcript_variation->cds_end());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 ### get sequences upstream and downstream of variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 my $upstream_seq = substr($reference_cds_seq, 0, ($self->transcript_variation->cds_start() -1) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 my $downstream_seq = substr($reference_cds_seq, ($self->transcript_variation->cds_end() ) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 ### fix alternate allele if deletion or on opposite strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 my $alt_allele = $self->variation_feature_seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 $alt_allele =~ s/\-//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123 if( $self->transcript_variation->variation_feature->strand() <0 && $self->transcript_variation->transcript->strand() >0 ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 $self->transcript_variation->variation_feature->strand() >0 && $self->transcript_variation->transcript->strand() < 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126 reverse_comp(\$alt_allele) ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 ### build alternate seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129 my $alternate_seq = $upstream_seq . $alt_allele . $downstream_seq ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131 ### create seq obj with alternative allele in the CDS sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 my $alt_cds =Bio::PrimarySeq->new(-seq => $alternate_seq, -id => 'alt_cds', -alphabet => 'dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 ### append UTR if available as stop may be disrupted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135 my $utr = $self->transcript_variation->transcript->three_prime_utr();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 if (defined $utr) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 ### append the UTR to the alternative CDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139 $alt_cds->seq($alt_cds->seq() . $utr->seq());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142 ##warn "No UTR available for alternate CDS\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 return $alt_cds;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 ### HGVS: if inserted string is identical to 3' reference sequence, describe as duplication
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 sub _check_for_peptide_duplication{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 my $hgvs_notation= shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 ##### get reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155 my $reference_cds_seq = $self->transcript->translateable_seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157 ##### get sequence upstream of variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 my $upstream_seq = substr($reference_cds_seq, 0, ($self->transcript_variation->cds_start() -1) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160 ##### create translation to check against inserted peptides
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 my $upstream_cds =Bio::PrimarySeq->new(-seq => $upstream_seq, -id => 'alt_cds', -alphabet => 'dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 my $upstream_trans = $upstream_cds->translate()->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164 ## Test whether alt peptide matches the reference sequence just before the variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 my $test_new_start = $hgvs_notation->{'start'} - length($hgvs_notation->{'alt'}) -1 ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166 my $test_seq = substr($upstream_trans, $test_new_start, length($hgvs_notation->{'alt'}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168 if ( $test_new_start >= 0 && $test_seq eq $hgvs_notation->{alt}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 $hgvs_notation->{type} = 'dup';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171 $hgvs_notation->{end} = $hgvs_notation->{start} -1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 $hgvs_notation->{start} -= length($hgvs_notation->{alt});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 return $hgvs_notation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 #### HGVS: if a stop is lost, seek the next in transcript & count number of extra AA's
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 sub _stop_loss_extra_AA{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184 my $ref_var_pos = shift; ### first effected AA - supply for frameshifts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 my $test = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187 return undef unless $ref_var_pos;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 my $extra_aa;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191 ### get the sequence with variant added
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192 my $alt_cds = $self->_get_alternate_cds();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 return undef unless defined($alt_cds);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 ### get new translation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 my $alt_trans = $alt_cds->translate();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198 my $ref_temp = $self->transcript->translate()->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199 my $ref_len = length($ref_temp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201 if($DEBUG==1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 print "alt translated:\n" . $alt_trans->seq() . "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 print "ref translated:\n$ref_temp\n";;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 #### Find the number of residues that are translated until a termination codon is encountered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 if ($alt_trans->seq() =~ m/\*/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208 if($DEBUG==1){print "Got $+[0] aa before stop, var event at $ref_var_pos \n";}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 if($test eq "fs" ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 ### frame shift - count from first AA effected by variant to stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212 $extra_aa = $+[0] - $ref_var_pos;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213 if($DEBUG==1){ print "Stop change ($test): found $extra_aa amino acids before fs stop [ $+[0] - peptide ref_start: $ref_var_pos )]\n";}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 $extra_aa = $+[0] - 1 - $ref_len;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218 if($DEBUG==1){ print "Stop change ($test): found $extra_aa amino acids before next stop [ $+[0] - 1 -normal stop $ref_len)]\n";}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 # A special case is if the first aa is a stop codon => don't display the number of residues until the stop codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 if(defined $extra_aa && $extra_aa >0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 return $extra_aa ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227 #warn "No stop found in alternate sequence\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 =head
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234 # We haven't implemented support for these methods yet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236 sub hgvs_rna {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237 return _hgvs_generic(@_,'rna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240 sub hgvs_mitochondrial {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 return _hgvs_generic(@_,'mitochondrial');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 sub _hgvs_generic {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249 my $reference = pop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 my $notation = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252 #ÊThe rna and mitochondrial modes have not yet been implemented, so return undef in case we get a call to these
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253 return undef if ($reference =~ m/rna|mitochondrial/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 my $sub = qq{hgvs_$reference};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257 $self->{$sub} = $notation if defined $notation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259 unless ($self->{$sub}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261 # Use the transcript this VF is on as the reference feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262 my $reference_feature = $self->transcript;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263 # If we want genomic coordinates, the reference_feature should actually be the slice for the underlying seq_region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264 $reference_feature = $reference_feature->slice->seq_region_Slice if ($reference eq 'genomic');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266 # Calculate the HGVS notation on-the-fly and pass it to the TranscriptVariation in order to distribute the result to the other alleles
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267 $self->transcript_variation->$sub($self->variation_feature->get_all_hgvs_notations($reference_feature,substr($reference,0,1),undef,undef,$self->transcript_variation));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270 return $self->{$sub};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274 ### HGVS: move variant to transcript slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 sub _var2transcript_slice_coords{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279 my $ref_slice = $self->transcript->feature_Slice(); #### returns with strand same as feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280 my $tr_vf = $self->variation_feature->transfer($ref_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282 # Return undef if this VariationFeature does not fall within the supplied feature.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 return undef if (!defined $tr_vf ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284 $tr_vf->start < 1 ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285 $tr_vf->end < 1 ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286 $tr_vf->start > ($self->transcript->end - $self->transcript->start + 1) ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 $tr_vf->end > ($self->transcript->end - $self->transcript->start + 1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289 return( $tr_vf->start() , $tr_vf->end(), $ref_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294 ### HGVS: get variant position in transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 # intron:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297 # If the position is in an intron, the boundary position of the closest exon and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298 # a + or - offset into the intron is returned.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299 # Ordered by genome forward not 5' -> 3'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 # upstream:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 # If the position is 5' of the start codon, it is reported relative to the start codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303 # (-1 being the last nucleotide before the 'A' of ATG).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305 #downstream:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 # If the position is 3' pf the stop codon, it is reported with a '*' prefix and the offset
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307 # from the start codon (*1 being the first nucleotide after the last position of the stop codon)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309 sub _get_cDNA_position {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312 my $position = shift; ### start or end of variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314 my $transcript = $self->transcript();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315 my $strand = $transcript->strand();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 #### TranscriptVariation start/stop coord relative to transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318 #### Switch to chromosome coordinates taking into account strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319 $position = ( $strand > 0 ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320 ( $self->transcript->start() + $position - 1 ) :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 ( $self->transcript->end() - $position + 1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 # Get all exons and sort them in positional order
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326 my @exons = sort {$a->start() <=> $b->start()} @{$transcript->get_all_Exons()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327 my $n_exons = scalar(@exons);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329 my $cdna_position;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330 # Loop over the exons and get the coordinates of the variation in exon+intron notation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331 for (my $i=0; $i<$n_exons; $i++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333 # Skip if the start point is beyond this exon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334 next if ($position > $exons[$i]->end());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337 # EXONIC: If the start coordinate is within this exon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338 if ($position >= $exons[$i]->start()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339 # Get the cDNA start coordinate of the exon and add the number of nucleotides from the exon boundary to the variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340 # If the transcript is in the opposite direction, count from the end instead
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341 $cdna_position = $exons[$i]->cdna_start($transcript) + ( $strand > 0 ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342 ( $position - $exons[$i]->start ) :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343 ( $exons[$i]->end() - $position )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345 last; #### last exon checked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347 ## INTRONIC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348 # Else the start coordinate is between this exon and the previous one, determine which one is closest and get coordinates relative to that one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351 my $updist = ($position - $exons[$i-1]->end());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 my $downdist = ($exons[$i]->start() - $position);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354 # If the distance to the upstream exon is the shortest, or equal and in the positive orientation, use that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355 if ($updist < $downdist || ($updist == $downdist && $strand >= 0)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357 # If the orientation is reversed, we should use the cDNA start and a '-' offset
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358 $cdna_position = ($strand >= 0 ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359 $exons[$i-1]->cdna_end($transcript) . '+' :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360 $exons[$i-1]->cdna_start($transcript) . '-')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361 . $updist;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363 # Else if downstream is shortest...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 # If the orientation is reversed, we should use the cDNA end and a '+' offset
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366 $cdna_position = ($strand >= 0 ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367 $exons[$i]->cdna_start($transcript) . '-' :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368 $exons[$i]->cdna_end($transcript) . '+')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369 . $downdist;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371 last; ## last exon checked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375 # Shift the position to make it relative to the start & stop codons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376 my $start_codon = $transcript->cdna_coding_start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377 my $stop_codon = $transcript->cdna_coding_end();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379 # Disassemble the cDNA coordinate into the exon and intron parts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380 ### just built this now taking it appart again
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381 my ($cdna_coord, $intron_offset) = $cdna_position =~ m/([0-9]+)([\+\-][0-9]+)?/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 # Start by correcting for the stop codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385 if (defined($stop_codon) && $cdna_coord > $stop_codon) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386 # Get the offset from the stop codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387 $cdna_coord -= $stop_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388 # Prepend a * to indicate the position is in the 3' UTR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389 $cdna_coord = '*' . $cdna_coord;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391 elsif (defined($start_codon)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392 # If the position is beyond the start codon, add 1 to get the correct offset
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393 $cdna_coord += ($cdna_coord >= $start_codon);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394 # Subtract the position of the start codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395 $cdna_coord -= $start_codon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 # Re-assemble the cDNA position [ return exon num & offset & direction for intron eg. 142+363]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399 $cdna_position = $cdna_coord . (defined($intron_offset) ? $intron_offset : '');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400 return $cdna_position;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406