comparison variant_effect_predictor/Bio/EnsEMBL/Variation/TranscriptVariation.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Variation::TranscriptVariation
24
25 =head1 SYNOPSIS
26
27 use Bio::EnsEMBL::Variation::TranscriptVariation;
28
29 my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(
30 -transcript => $transcript,
31 -variation_feature => $var_feat
32 );
33
34 print "consequence type: ", (join ",", @{$tv->consequence_type}), "\n";
35 print "cdna coords: ", $tv->cdna_start, '-', $tv->cdna_end, "\n";
36 print "cds coords: ", $tv->cds_start, '-', $tv->cds_end, "\n";
37 print "pep coords: ", $tv->translation_start, '-',$tv->translation_end, "\n";
38 print "amino acid change: ", $tv->pep_allele_string, "\n";
39 print "codon change: ", $tv->codons, "\n";
40 print "allele sequences: ", (join ",", map { $_->variation_feature_seq }
41 @{ $tv->get_all_TranscriptVariationAlleles }, "\n";
42
43 =head1 DESCRIPTION
44
45 A TranscriptVariation object represents a variation feature which is in close
46 proximity to an Ensembl transcript. A TranscriptVariation object has several
47 attributes which define the relationship of the variation to the transcript.
48
49 =cut
50
51 package Bio::EnsEMBL::Variation::TranscriptVariation;
52
53 use strict;
54 use warnings;
55
56 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
57 use Bio::EnsEMBL::Variation::TranscriptVariationAllele;
58 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap within_cds);
59 use Bio::EnsEMBL::Variation::BaseTranscriptVariation;
60
61 use base qw(Bio::EnsEMBL::Variation::BaseTranscriptVariation Bio::EnsEMBL::Variation::VariationFeatureOverlap);
62
63 =head2 new
64
65 Arg [-TRANSCRIPT] :
66 The Bio::EnsEMBL::Transcript associated with the given VariationFeature
67
68 Arg [-VARIATION_FEATURE] :
69 The Bio::EnsEMBL::VariationFeature associated with the given Transcript
70
71 Arg [-ADAPTOR] :
72 A Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor
73
74 Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] :
75 A flag indiciating if ambiguous single nucleotide alleles should be disambiguated
76 when constructing the TranscriptVariationAllele objects, e.g. a Variationfeature
77 with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit
78 ourselves to single nucleotide alleles to avoid the combinatorial explosion if we
79 allowed longer alleles with potentially many ambiguous bases.
80
81 Example :
82 my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(
83 -transcript => $transcript,
84 -variation_feature => $var_feat
85 );
86
87 Description: Constructs a new TranscriptVariation instance given a VariationFeature
88 and a Transcript, most of the work is done in the VariationFeatureOverlap
89 superclass - see there for more documentation.
90 Returntype : A new Bio::EnsEMBL::Variation::TranscriptVariation instance
91 Exceptions : throws unless both VARIATION_FEATURE and TRANSCRIPT are supplied
92 Status : At Risk
93
94 =cut
95
96 sub new {
97 my $class = shift;
98
99 my %args = @_;
100
101 # swap a '-transcript' argument for a '-feature' one for the superclass
102
103 for my $arg (keys %args) {
104 if (lc($arg) eq '-transcript') {
105 $args{'-feature'} = delete $args{$arg};
106 }
107 }
108
109 # call the superclass constructor
110 my $self = $class->SUPER::new(%args) || return undef;
111
112 # rebless the alleles from vfoas to tvas
113 map { bless $_, 'Bio::EnsEMBL::Variation::TranscriptVariationAllele' }
114 @{ $self->get_all_TranscriptVariationAlleles };
115
116 return $self;
117 }
118
119 sub get_TranscriptVariationAllele_for_allele_seq {
120 my ($self, $allele_seq) = @_;
121 return $self->SUPER::get_VariationFeatureOverlapAllele_for_allele_seq($allele_seq);
122 }
123
124 =head2 add_TranscriptVariationAllele
125
126 Arg [1] : A Bio::EnsEMBL::Variation::TranscriptVariationAllele instance
127 Description: Add an allele to this TranscriptVariation
128 Returntype : none
129 Exceptions : throws if the argument is not the expected type
130 Status : At Risk
131
132 =cut
133
134 sub add_TranscriptVariationAllele {
135 my ($self, $tva) = @_;
136 assert_ref($tva, 'Bio::EnsEMBL::Variation::TranscriptVariationAllele');
137 return $self->SUPER::add_VariationFeatureOverlapAllele($tva);
138 }
139
140 =head2 get_reference_TranscriptVariationAllele
141
142 Description: Get the object representing the reference allele of this TranscriptVariation
143 Returntype : Bio::EnsEMBL::Variation::TranscriptVariationAllele instance
144 Exceptions : none
145 Status : At Risk
146
147 =cut
148
149 sub get_reference_TranscriptVariationAllele {
150 my $self = shift;
151 return $self->SUPER::get_reference_VariationFeatureOverlapAllele(@_);
152 }
153
154 =head2 get_all_alternate_TranscriptVariationAlleles
155
156 Description: Get a list of the alternate alleles of this TranscriptVariation
157 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariationAllele objects
158 Exceptions : none
159 Status : At Risk
160
161 =cut
162
163 sub get_all_alternate_TranscriptVariationAlleles {
164 my $self = shift;
165 return $self->SUPER::get_all_alternate_VariationFeatureOverlapAlleles(@_);
166 }
167
168 =head2 get_all_TranscriptVariationAlleles
169
170 Description: Get a list of the all the alleles, both reference and alternate, of
171 this TranscriptVariation
172 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariationAllele objects
173 Exceptions : none
174 Status : At Risk
175
176 =cut
177
178 sub get_all_TranscriptVariationAlleles {
179 my $self = shift;
180 return $self->SUPER::get_all_VariationFeatureOverlapAlleles(@_);
181 }
182
183
184 =head2 cdna_allele_string
185
186 Description: Return a '/' delimited string of the alleles of this variation with respect
187 to the associated transcript
188 Returntype : string
189 Exceptions : None
190 Caller : general
191 Status : Stable
192
193 =cut
194
195 sub cdna_allele_string {
196 my $self = shift;
197
198 unless ($self->{_cdna_allele_string}) {
199 $self->{_cdna_allele_string} = join '/', map { $_->feature_seq } @{ $self->get_all_TranscriptVariationAlleles };
200 }
201
202 return $self->{_cdna_allele_string};
203 }
204
205 =head2 pep_allele_string
206
207 Description: Return a '/' delimited string of amino acid codes representing
208 all the possible changes made to the peptide by this variation
209 Returntype : string
210 Exceptions : None
211 Caller : general
212 Status : Stable
213
214 =cut
215
216 sub pep_allele_string {
217 my $self = shift;
218
219 unless ($self->{_pep_allele_string}) {
220
221 my @peptides = grep { defined } map { $_->peptide } @{ $self->get_all_TranscriptVariationAlleles };
222
223 $self->{_pep_allele_string} = join '/', @peptides;
224 }
225
226 return $self->{_pep_allele_string};
227 }
228
229 =head2 codons
230
231 Description: Return a '/' delimited string of all possible codon sequences.
232 The variant sequence within the codon will be capitalised while
233 the rest of the codon sequence will be in lower case
234 Returntype : string
235 Exceptions : None
236 Caller : general
237 Status : Stable
238
239 =cut
240
241 sub codons {
242 my $self = shift;
243
244 unless ($self->{_display_codon_allele_string}) {
245
246 my @display_codons = grep { defined } map { $_->display_codon } @{ $self->get_all_TranscriptVariationAlleles };
247
248 $self->{_display_codon_allele_string} = join '/', @display_codons;
249 }
250
251 return $self->{_display_codon_allele_string};
252 }
253
254 =head2 codon_position
255
256 Description: For variations that fall in the CDS, returns the base in the
257 codon that this variation falls in
258 Returntype : int - 1, 2 or 3
259 Exceptions : None
260 Caller : general
261 Status : At Risk
262
263 =cut
264
265 sub codon_position {
266 my $self = shift;
267
268 unless ($self->{_codon_position}) {
269
270 my $cdna_start = $self->cdna_start;
271
272 my $tran_cdna_start = $self->transcript->cdna_coding_start;
273
274 # we need to take into account the exon phase
275 my $exon_phase = $self->transcript->start_Exon->phase;
276
277 my $phase_offset = $exon_phase > 0 ? $exon_phase : 0;
278
279 if (defined $cdna_start && defined $tran_cdna_start) {
280 $self->{_codon_position} = (($cdna_start - $tran_cdna_start + $phase_offset) % 3) + 1;
281 }
282 }
283
284 return $self->{_codon_position};
285 }
286
287 =head2 affects_cds
288
289 Description: Check if any of this TranscriptVariation's alleles lie within
290 the CDS of the Transcript
291 Returntype : boolean
292 Exceptions : None
293 Caller : general
294 Status : At Risk
295
296 =cut
297
298 sub affects_cds {
299 my $self = shift;
300 return scalar grep { within_cds($_) } @{ $self->get_all_alternate_TranscriptVariationAlleles };
301 }
302
303 =head2 affects_peptide
304
305 Description: Check if any of this TranscriptVariation's alleles change the
306 resultant peptide sequence
307 Returntype : boolean
308 Exceptions : None
309 Caller : general
310 Status : At Risk
311
312 =cut
313
314 sub affects_peptide {
315 my $self = shift;
316 return scalar grep { $_->SO_term =~ /stop|non_syn|frameshift|inframe|initiator/ } map {@{$_->get_all_OverlapConsequences}} @{ $self->get_all_alternate_TranscriptVariationAlleles };
317 }
318
319
320 sub _protein_function_predictions {
321
322 my ($self, $analysis) = @_;
323
324 my $tran = $self->transcript;
325
326 my $matrix = $tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis};
327
328 unless ($matrix || exists($tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis})) {
329 my $pfpma = $self->{adaptor}->db->get_ProteinFunctionPredictionMatrixAdaptor;
330
331 $matrix = $pfpma->fetch_by_analysis_translation_md5($analysis, $self->_translation_md5);
332
333 $tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis} = $matrix;
334 }
335
336 return $matrix;
337 }
338
339 =head2 hgvs_genomic
340
341 Description: Return the strings representing the genomic-level effect of each of the alleles
342 of this variation in HGVS format
343 Returntype : hashref where the key is the allele sequence and then value is the HGVS string
344 Exceptions : none
345 Status : At Risk
346
347 =cut
348
349 sub hgvs_genomic {
350 return _hgvs_generic(@_,'genomic');
351 }
352
353 =head2 hgvs_coding
354
355 Description: Return the strings representing the CDS-level effect of each of the alleles
356 of this variation in HGVS format
357 Returntype : hashref where the key is the allele sequence and then value is the HGVS string
358 Exceptions : none
359 Status : At Risk
360
361 =cut
362
363 sub hgvs_coding {
364 deprecate('HGVS coding support has been moved to hgvs_transcript. This method will be removed in the next release.');
365 return _hgvs_generic(@_,'transcript');
366 }
367
368 =head2 hgvs_transcript
369
370 Description: Return the strings representing the CDS-level effect of each of the alleles
371 of this variation in HGVS format
372 Returntype : hashref where the key is the allele sequence and then value is the HGVS string
373 Exceptions : none
374 Status : At Risk
375
376 =cut
377
378 sub hgvs_transcript {
379 return _hgvs_generic(@_,'transcript');
380 }
381
382 =head2 hgvs_protein
383
384 Description: Return the strings representing the protein-level effect of each of the alleles
385 of this variation in HGVS format
386 Returntype : hashref where the key is the allele sequence and then value is the HGVS string
387 Exceptions : none
388 Status : At Risk
389
390 =cut
391
392 sub hgvs_protein {
393 return _hgvs_generic(@_,'protein');
394 }
395
396 =head
397
398 # We haven't implemented support for these methods yet
399
400 sub hgvs_rna {
401 return _hgvs_generic(@_,'rna');
402 }
403
404 sub hgvs_mitochondrial {
405 return _hgvs_generic(@_,'mitochondrial');
406 }
407 =cut
408
409 sub _hgvs_generic {
410 my $self = shift;
411 my $reference = pop;
412 my $hgvs = shift;
413
414 #ÊThe rna and mitochondrial modes have not yet been implemented, so return undef in case we get a call to these
415 return undef if ($reference =~ m/rna|mitochondrial/);
416
417 # The HGVS subroutine
418 my $sub = qq{hgvs_$reference};
419
420 # Loop over the TranscriptVariationAllele objects associated with this TranscriptVariation
421 foreach my $tv_allele (@{ $self->get_all_alternate_TranscriptVariationAlleles }) {
422
423 #ÊIf an HGVS hash was supplied and the allele exists as key, set the HGVS notation for this allele
424 if (defined($hgvs)) {
425 my $notation = $hgvs->{$tv_allele->variation_feature_seq()};
426 $tv_allele->$sub($notation) if defined $notation;
427 }
428 # Else, add the HGVS notation for this allele to the HGVS hash
429 else {
430 $hgvs->{$tv_allele->variation_feature_seq()} = $tv_allele->$sub();
431 }
432 }
433
434 return $hgvs;
435 }
436
437 sub _prefetch_for_vep {
438 my $self = shift;
439
440 $self->cdna_coords;
441 $self->cds_coords;
442 $self->translation_coords;
443 $self->pep_allele_string;
444 }
445
446
447 1;