Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/TranscriptVariation.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Variation::TranscriptVariation | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 use Bio::EnsEMBL::Variation::TranscriptVariation; | |
28 | |
29 my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new( | |
30 -transcript => $transcript, | |
31 -variation_feature => $var_feat | |
32 ); | |
33 | |
34 print "consequence type: ", (join ",", @{$tv->consequence_type}), "\n"; | |
35 print "cdna coords: ", $tv->cdna_start, '-', $tv->cdna_end, "\n"; | |
36 print "cds coords: ", $tv->cds_start, '-', $tv->cds_end, "\n"; | |
37 print "pep coords: ", $tv->translation_start, '-',$tv->translation_end, "\n"; | |
38 print "amino acid change: ", $tv->pep_allele_string, "\n"; | |
39 print "codon change: ", $tv->codons, "\n"; | |
40 print "allele sequences: ", (join ",", map { $_->variation_feature_seq } | |
41 @{ $tv->get_all_TranscriptVariationAlleles }, "\n"; | |
42 | |
43 =head1 DESCRIPTION | |
44 | |
45 A TranscriptVariation object represents a variation feature which is in close | |
46 proximity to an Ensembl transcript. A TranscriptVariation object has several | |
47 attributes which define the relationship of the variation to the transcript. | |
48 | |
49 =cut | |
50 | |
51 package Bio::EnsEMBL::Variation::TranscriptVariation; | |
52 | |
53 use strict; | |
54 use warnings; | |
55 | |
56 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref); | |
57 use Bio::EnsEMBL::Variation::TranscriptVariationAllele; | |
58 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap within_cds); | |
59 use Bio::EnsEMBL::Variation::BaseTranscriptVariation; | |
60 | |
61 use base qw(Bio::EnsEMBL::Variation::BaseTranscriptVariation Bio::EnsEMBL::Variation::VariationFeatureOverlap); | |
62 | |
63 =head2 new | |
64 | |
65 Arg [-TRANSCRIPT] : | |
66 The Bio::EnsEMBL::Transcript associated with the given VariationFeature | |
67 | |
68 Arg [-VARIATION_FEATURE] : | |
69 The Bio::EnsEMBL::VariationFeature associated with the given Transcript | |
70 | |
71 Arg [-ADAPTOR] : | |
72 A Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor | |
73 | |
74 Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] : | |
75 A flag indiciating if ambiguous single nucleotide alleles should be disambiguated | |
76 when constructing the TranscriptVariationAllele objects, e.g. a Variationfeature | |
77 with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit | |
78 ourselves to single nucleotide alleles to avoid the combinatorial explosion if we | |
79 allowed longer alleles with potentially many ambiguous bases. | |
80 | |
81 Example : | |
82 my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new( | |
83 -transcript => $transcript, | |
84 -variation_feature => $var_feat | |
85 ); | |
86 | |
87 Description: Constructs a new TranscriptVariation instance given a VariationFeature | |
88 and a Transcript, most of the work is done in the VariationFeatureOverlap | |
89 superclass - see there for more documentation. | |
90 Returntype : A new Bio::EnsEMBL::Variation::TranscriptVariation instance | |
91 Exceptions : throws unless both VARIATION_FEATURE and TRANSCRIPT are supplied | |
92 Status : At Risk | |
93 | |
94 =cut | |
95 | |
96 sub new { | |
97 my $class = shift; | |
98 | |
99 my %args = @_; | |
100 | |
101 # swap a '-transcript' argument for a '-feature' one for the superclass | |
102 | |
103 for my $arg (keys %args) { | |
104 if (lc($arg) eq '-transcript') { | |
105 $args{'-feature'} = delete $args{$arg}; | |
106 } | |
107 } | |
108 | |
109 # call the superclass constructor | |
110 my $self = $class->SUPER::new(%args) || return undef; | |
111 | |
112 # rebless the alleles from vfoas to tvas | |
113 map { bless $_, 'Bio::EnsEMBL::Variation::TranscriptVariationAllele' } | |
114 @{ $self->get_all_TranscriptVariationAlleles }; | |
115 | |
116 return $self; | |
117 } | |
118 | |
119 sub get_TranscriptVariationAllele_for_allele_seq { | |
120 my ($self, $allele_seq) = @_; | |
121 return $self->SUPER::get_VariationFeatureOverlapAllele_for_allele_seq($allele_seq); | |
122 } | |
123 | |
124 =head2 add_TranscriptVariationAllele | |
125 | |
126 Arg [1] : A Bio::EnsEMBL::Variation::TranscriptVariationAllele instance | |
127 Description: Add an allele to this TranscriptVariation | |
128 Returntype : none | |
129 Exceptions : throws if the argument is not the expected type | |
130 Status : At Risk | |
131 | |
132 =cut | |
133 | |
134 sub add_TranscriptVariationAllele { | |
135 my ($self, $tva) = @_; | |
136 assert_ref($tva, 'Bio::EnsEMBL::Variation::TranscriptVariationAllele'); | |
137 return $self->SUPER::add_VariationFeatureOverlapAllele($tva); | |
138 } | |
139 | |
140 =head2 get_reference_TranscriptVariationAllele | |
141 | |
142 Description: Get the object representing the reference allele of this TranscriptVariation | |
143 Returntype : Bio::EnsEMBL::Variation::TranscriptVariationAllele instance | |
144 Exceptions : none | |
145 Status : At Risk | |
146 | |
147 =cut | |
148 | |
149 sub get_reference_TranscriptVariationAllele { | |
150 my $self = shift; | |
151 return $self->SUPER::get_reference_VariationFeatureOverlapAllele(@_); | |
152 } | |
153 | |
154 =head2 get_all_alternate_TranscriptVariationAlleles | |
155 | |
156 Description: Get a list of the alternate alleles of this TranscriptVariation | |
157 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariationAllele objects | |
158 Exceptions : none | |
159 Status : At Risk | |
160 | |
161 =cut | |
162 | |
163 sub get_all_alternate_TranscriptVariationAlleles { | |
164 my $self = shift; | |
165 return $self->SUPER::get_all_alternate_VariationFeatureOverlapAlleles(@_); | |
166 } | |
167 | |
168 =head2 get_all_TranscriptVariationAlleles | |
169 | |
170 Description: Get a list of the all the alleles, both reference and alternate, of | |
171 this TranscriptVariation | |
172 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariationAllele objects | |
173 Exceptions : none | |
174 Status : At Risk | |
175 | |
176 =cut | |
177 | |
178 sub get_all_TranscriptVariationAlleles { | |
179 my $self = shift; | |
180 return $self->SUPER::get_all_VariationFeatureOverlapAlleles(@_); | |
181 } | |
182 | |
183 | |
184 =head2 cdna_allele_string | |
185 | |
186 Description: Return a '/' delimited string of the alleles of this variation with respect | |
187 to the associated transcript | |
188 Returntype : string | |
189 Exceptions : None | |
190 Caller : general | |
191 Status : Stable | |
192 | |
193 =cut | |
194 | |
195 sub cdna_allele_string { | |
196 my $self = shift; | |
197 | |
198 unless ($self->{_cdna_allele_string}) { | |
199 $self->{_cdna_allele_string} = join '/', map { $_->feature_seq } @{ $self->get_all_TranscriptVariationAlleles }; | |
200 } | |
201 | |
202 return $self->{_cdna_allele_string}; | |
203 } | |
204 | |
205 =head2 pep_allele_string | |
206 | |
207 Description: Return a '/' delimited string of amino acid codes representing | |
208 all the possible changes made to the peptide by this variation | |
209 Returntype : string | |
210 Exceptions : None | |
211 Caller : general | |
212 Status : Stable | |
213 | |
214 =cut | |
215 | |
216 sub pep_allele_string { | |
217 my $self = shift; | |
218 | |
219 unless ($self->{_pep_allele_string}) { | |
220 | |
221 my @peptides = grep { defined } map { $_->peptide } @{ $self->get_all_TranscriptVariationAlleles }; | |
222 | |
223 $self->{_pep_allele_string} = join '/', @peptides; | |
224 } | |
225 | |
226 return $self->{_pep_allele_string}; | |
227 } | |
228 | |
229 =head2 codons | |
230 | |
231 Description: Return a '/' delimited string of all possible codon sequences. | |
232 The variant sequence within the codon will be capitalised while | |
233 the rest of the codon sequence will be in lower case | |
234 Returntype : string | |
235 Exceptions : None | |
236 Caller : general | |
237 Status : Stable | |
238 | |
239 =cut | |
240 | |
241 sub codons { | |
242 my $self = shift; | |
243 | |
244 unless ($self->{_display_codon_allele_string}) { | |
245 | |
246 my @display_codons = grep { defined } map { $_->display_codon } @{ $self->get_all_TranscriptVariationAlleles }; | |
247 | |
248 $self->{_display_codon_allele_string} = join '/', @display_codons; | |
249 } | |
250 | |
251 return $self->{_display_codon_allele_string}; | |
252 } | |
253 | |
254 =head2 codon_position | |
255 | |
256 Description: For variations that fall in the CDS, returns the base in the | |
257 codon that this variation falls in | |
258 Returntype : int - 1, 2 or 3 | |
259 Exceptions : None | |
260 Caller : general | |
261 Status : At Risk | |
262 | |
263 =cut | |
264 | |
265 sub codon_position { | |
266 my $self = shift; | |
267 | |
268 unless ($self->{_codon_position}) { | |
269 | |
270 my $cdna_start = $self->cdna_start; | |
271 | |
272 my $tran_cdna_start = $self->transcript->cdna_coding_start; | |
273 | |
274 # we need to take into account the exon phase | |
275 my $exon_phase = $self->transcript->start_Exon->phase; | |
276 | |
277 my $phase_offset = $exon_phase > 0 ? $exon_phase : 0; | |
278 | |
279 if (defined $cdna_start && defined $tran_cdna_start) { | |
280 $self->{_codon_position} = (($cdna_start - $tran_cdna_start + $phase_offset) % 3) + 1; | |
281 } | |
282 } | |
283 | |
284 return $self->{_codon_position}; | |
285 } | |
286 | |
287 =head2 affects_cds | |
288 | |
289 Description: Check if any of this TranscriptVariation's alleles lie within | |
290 the CDS of the Transcript | |
291 Returntype : boolean | |
292 Exceptions : None | |
293 Caller : general | |
294 Status : At Risk | |
295 | |
296 =cut | |
297 | |
298 sub affects_cds { | |
299 my $self = shift; | |
300 return scalar grep { within_cds($_) } @{ $self->get_all_alternate_TranscriptVariationAlleles }; | |
301 } | |
302 | |
303 =head2 affects_peptide | |
304 | |
305 Description: Check if any of this TranscriptVariation's alleles change the | |
306 resultant peptide sequence | |
307 Returntype : boolean | |
308 Exceptions : None | |
309 Caller : general | |
310 Status : At Risk | |
311 | |
312 =cut | |
313 | |
314 sub affects_peptide { | |
315 my $self = shift; | |
316 return scalar grep { $_->SO_term =~ /stop|non_syn|frameshift|inframe|initiator/ } map {@{$_->get_all_OverlapConsequences}} @{ $self->get_all_alternate_TranscriptVariationAlleles }; | |
317 } | |
318 | |
319 | |
320 sub _protein_function_predictions { | |
321 | |
322 my ($self, $analysis) = @_; | |
323 | |
324 my $tran = $self->transcript; | |
325 | |
326 my $matrix = $tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis}; | |
327 | |
328 unless ($matrix || exists($tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis})) { | |
329 my $pfpma = $self->{adaptor}->db->get_ProteinFunctionPredictionMatrixAdaptor; | |
330 | |
331 $matrix = $pfpma->fetch_by_analysis_translation_md5($analysis, $self->_translation_md5); | |
332 | |
333 $tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis} = $matrix; | |
334 } | |
335 | |
336 return $matrix; | |
337 } | |
338 | |
339 =head2 hgvs_genomic | |
340 | |
341 Description: Return the strings representing the genomic-level effect of each of the alleles | |
342 of this variation in HGVS format | |
343 Returntype : hashref where the key is the allele sequence and then value is the HGVS string | |
344 Exceptions : none | |
345 Status : At Risk | |
346 | |
347 =cut | |
348 | |
349 sub hgvs_genomic { | |
350 return _hgvs_generic(@_,'genomic'); | |
351 } | |
352 | |
353 =head2 hgvs_coding | |
354 | |
355 Description: Return the strings representing the CDS-level effect of each of the alleles | |
356 of this variation in HGVS format | |
357 Returntype : hashref where the key is the allele sequence and then value is the HGVS string | |
358 Exceptions : none | |
359 Status : At Risk | |
360 | |
361 =cut | |
362 | |
363 sub hgvs_coding { | |
364 deprecate('HGVS coding support has been moved to hgvs_transcript. This method will be removed in the next release.'); | |
365 return _hgvs_generic(@_,'transcript'); | |
366 } | |
367 | |
368 =head2 hgvs_transcript | |
369 | |
370 Description: Return the strings representing the CDS-level effect of each of the alleles | |
371 of this variation in HGVS format | |
372 Returntype : hashref where the key is the allele sequence and then value is the HGVS string | |
373 Exceptions : none | |
374 Status : At Risk | |
375 | |
376 =cut | |
377 | |
378 sub hgvs_transcript { | |
379 return _hgvs_generic(@_,'transcript'); | |
380 } | |
381 | |
382 =head2 hgvs_protein | |
383 | |
384 Description: Return the strings representing the protein-level effect of each of the alleles | |
385 of this variation in HGVS format | |
386 Returntype : hashref where the key is the allele sequence and then value is the HGVS string | |
387 Exceptions : none | |
388 Status : At Risk | |
389 | |
390 =cut | |
391 | |
392 sub hgvs_protein { | |
393 return _hgvs_generic(@_,'protein'); | |
394 } | |
395 | |
396 =head | |
397 | |
398 # We haven't implemented support for these methods yet | |
399 | |
400 sub hgvs_rna { | |
401 return _hgvs_generic(@_,'rna'); | |
402 } | |
403 | |
404 sub hgvs_mitochondrial { | |
405 return _hgvs_generic(@_,'mitochondrial'); | |
406 } | |
407 =cut | |
408 | |
409 sub _hgvs_generic { | |
410 my $self = shift; | |
411 my $reference = pop; | |
412 my $hgvs = shift; | |
413 | |
414 #ÊThe rna and mitochondrial modes have not yet been implemented, so return undef in case we get a call to these | |
415 return undef if ($reference =~ m/rna|mitochondrial/); | |
416 | |
417 # The HGVS subroutine | |
418 my $sub = qq{hgvs_$reference}; | |
419 | |
420 # Loop over the TranscriptVariationAllele objects associated with this TranscriptVariation | |
421 foreach my $tv_allele (@{ $self->get_all_alternate_TranscriptVariationAlleles }) { | |
422 | |
423 #ÊIf an HGVS hash was supplied and the allele exists as key, set the HGVS notation for this allele | |
424 if (defined($hgvs)) { | |
425 my $notation = $hgvs->{$tv_allele->variation_feature_seq()}; | |
426 $tv_allele->$sub($notation) if defined $notation; | |
427 } | |
428 # Else, add the HGVS notation for this allele to the HGVS hash | |
429 else { | |
430 $hgvs->{$tv_allele->variation_feature_seq()} = $tv_allele->$sub(); | |
431 } | |
432 } | |
433 | |
434 return $hgvs; | |
435 } | |
436 | |
437 sub _prefetch_for_vep { | |
438 my $self = shift; | |
439 | |
440 $self->cdna_coords; | |
441 $self->cds_coords; | |
442 $self->translation_coords; | |
443 $self->pep_allele_string; | |
444 } | |
445 | |
446 | |
447 1; |