comparison variant_effect_predictor/Bio/EnsEMBL/Variation/Utils/VariationEffect.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Variation::Utils::VariationEffect
24
25 =head1 DESCRIPTION
26
27 This module defines a set of predicate subroutines that check the effect of a
28 Bio::EnsEMBL::Variation::VariationFeature on some other Bio::EnsEMBL::Feature.
29 All of these predicates take a VariationFeatureOverlapAllele as their first and
30 only argument and return a true or false value depending on whether the effect
31 being checked for holds or not. The link between these predicates and the
32 specific effect is configured in the Bio::EnsEMBL::Variation::Utils::Config
33 module and a list of OverlapConsequence objects that represent a link between,
34 for example, a Sequence Ontology consequence term, and the predicate that
35 checks for it is provided in the Bio::EnsEMBL::Variation::Utils::Constants
36 module. If you want to add a new consequence you should write a predicate in
37 this module and then add an entry in the configuration file.
38
39 =cut
40
41 package Bio::EnsEMBL::Variation::Utils::VariationEffect;
42
43 use strict;
44 use warnings;
45
46 use base qw(Exporter);
47
48 our @EXPORT_OK = qw(overlap within_cds MAX_DISTANCE_FROM_TRANSCRIPT within_intron stop_lost affects_start_codon $UPSTREAM_DISTANCE $DOWNSTREAM_DISTANCE);
49
50 use constant MAX_DISTANCE_FROM_TRANSCRIPT => 5000;
51
52 our $UPSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT;
53 our $DOWNSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT;
54
55 #package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
56
57 sub overlap {
58 my ( $f1_start, $f1_end, $f2_start, $f2_end ) = @_;
59
60 return ( ($f1_end >= $f2_start) and ($f1_start <= $f2_end) );
61 }
62
63 sub within_feature {
64 my ($bvfoa, $feat) = @_;
65 my $bvf = $bvfoa->base_variation_feature;
66 $feat = $bvfoa->feature unless defined($feat);
67
68 return overlap(
69 $bvf->start,
70 $bvf->end,
71 $feat->start,
72 $feat->end
73 );
74 }
75
76 sub partial_overlap_feature {
77 my ($bvfoa, $feat) = @_;
78 my $bvf = $bvfoa->base_variation_feature;
79 $feat = $bvfoa->feature unless defined($feat);
80
81 return (
82 within_feature(@_) and
83 (not complete_overlap_feature(@_)) and
84 (($bvf->end > $feat->end) or ($bvf->start < $feat->start))
85 );
86 }
87
88 sub complete_within_feature {
89 my ($bvfoa, $feat) = @_;
90 my $bvf = $bvfoa->base_variation_feature;
91 $feat = $bvfoa->feature unless defined($feat);
92
93 return (
94 ($bvf->start >= $feat->start) and
95 ($bvf->end <= $feat->end)
96 );
97 }
98
99 sub complete_overlap_feature {
100 my ($bvfoa, $feat) = @_;
101 my $bvf = $bvfoa->base_variation_feature;
102 $feat = $bvfoa->feature unless defined($feat);
103
104 return (
105 ($bvf->start <= $feat->start) and
106 ($bvf->end >= $feat->end)
107 );
108 }
109
110 sub deletion {
111 my $bvfoa = shift;
112
113 my $bvf = $bvfoa->base_variation_feature;
114
115 # sequence variant will have alleles
116 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
117 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa);
118 return (
119 (defined($ref_allele) && ($alt_allele eq '') and $ref_allele) or
120 $bvf->allele_string =~ /deletion/i
121 );
122 }
123
124 # structural variant depends on class
125 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
126 return (
127 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'deletion') or
128 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /deletion/i) or
129 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /loss/i)
130 );
131 }
132
133 else { return 0; }
134 }
135
136 sub insertion {
137 my $bvfoa = shift;
138
139 my $bvf = $bvfoa->base_variation_feature;
140
141 # sequence variant will have alleles
142 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
143 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa);
144 return (
145 (defined($ref_allele) && ($ref_allele eq '') and $alt_allele) or
146 $bvf->allele_string =~ /insertion/i
147 );
148 }
149
150 # structural variant depends on class
151 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
152 return (
153 duplication($bvfoa) or
154 tandem_duplication($bvfoa) or
155 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'insertion') or
156 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /insertion/i) or
157 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /gain/i)
158 );
159 }
160
161 else { return 0; }
162 }
163
164 sub copy_number_gain {
165 my $bvfoa = shift;
166
167 return (duplication($bvfoa) or tandem_duplication($bvfoa) or $bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /gain/i);
168 }
169
170 sub copy_number_loss {
171 my $bvfoa = shift;
172
173 return $bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /loss/i;
174 }
175
176 sub duplication {
177 my $bvfoa = shift;
178
179 return (
180 (
181 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'duplication') or
182 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /duplication/i)
183 ) and
184 (not tandem_duplication($bvfoa))
185 );
186 }
187
188 sub tandem_duplication {
189 my $bvfoa = shift;
190
191 my $bvf = $bvfoa->base_variation_feature;
192
193 # for sequence variants, check sequence vs ref
194 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
195 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa);
196
197 return 0 unless $ref_allele and $alt_allele;
198 return 0 unless
199 length($alt_allele) > length($ref_allele) and
200 length($alt_allele) % length($ref_allele) == 0;
201
202 my $copies = length($alt_allele) / length($ref_allele);
203
204 return $alt_allele eq $ref_allele x $copies;
205 }
206
207 # structural variant depends on class
208 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
209 return (
210 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'tandem_duplication') or
211 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /tandem_duplication/i)
212 );
213 }
214 }
215
216 sub feature_ablation {
217 my $bvfoa = shift;
218
219 return (deletion($bvfoa) and complete_overlap_feature($bvfoa));
220 }
221
222 sub feature_amplification {
223 my $bvfoa = shift;
224
225 return (copy_number_gain($bvfoa) && complete_overlap_feature($bvfoa));
226 }
227
228 sub feature_elongation {
229 my $bvfoa = shift;
230
231 return (
232 complete_within_feature($bvfoa) and
233 (copy_number_gain($bvfoa) or insertion($bvfoa)) and
234 not(
235 $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele') and
236 (inframe_insertion($bvfoa) or stop_lost($bvfoa))
237 )
238 );
239 }
240
241 sub feature_truncation {
242 my $bvfoa = shift;
243
244 return (
245 (partial_overlap_feature($bvfoa) or complete_within_feature($bvfoa)) and
246 (copy_number_loss($bvfoa) or deletion($bvfoa)) and
247 not(
248 $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele') and
249 (inframe_deletion($bvfoa) or stop_gained($bvfoa))
250 )
251 );
252 }
253
254 #sub transcript_fusion {
255 # #my $bvfoa = shift;
256 # #my $bvf = $bvfoa->base_variation_feature;
257 #
258 # return 0;
259 #
260 # #my $transcripts = $bvf->_get_overlapping_Transcripts();
261 #}
262
263 sub _before_start {
264 my ($bvf, $feat, $dist) = @_;
265
266 return ( ($bvf->end >= ($feat->start - $dist)) and
267 ($bvf->end < $feat->start) );
268 }
269
270 sub _after_end {
271 my ($bvf, $feat, $dist) = @_;
272 return ( ($bvf->start <= ($feat->end + $dist))
273 and ($bvf->start > $feat->end) );
274 }
275
276 sub _upstream {
277 my ($bvf, $feat, $dist) = @_;
278 return $feat->strand == 1 ?
279 _before_start($bvf, $feat, $dist) :
280 _after_end($bvf, $feat, $dist);
281 }
282
283 sub _downstream {
284 my ($bvf, $feat, $dist) = @_;
285 return $feat->strand == 1 ?
286 _after_end($bvf, $feat, $dist) :
287 _before_start($bvf, $feat, $dist);
288 }
289
290 #package Bio::EnsEMBL::Variation::TranscriptVariationAllele;
291
292 sub upstream {
293 my $vfoa = shift;
294 my $bvf = $vfoa->base_variation_feature;
295 my $feat = $vfoa->feature;
296
297 return _upstream($bvf, $feat, $UPSTREAM_DISTANCE);
298 }
299
300 sub downstream {
301 my $vfoa = shift;
302 my $bvf = $vfoa->base_variation_feature;
303 my $feat = $vfoa->feature;
304
305 return _downstream($bvf, $feat, $DOWNSTREAM_DISTANCE);
306 }
307
308 sub affects_transcript {
309 my ($bvf, $tran) = @_;
310
311 return 0 unless $tran->isa('Bio::EnsEMBL::Transcript');
312
313 return overlap(
314 $bvf->start,
315 $bvf->end,
316 $tran->start - 5000,
317 $tran->end + 5000
318 );
319 }
320
321 sub within_transcript {
322 my $bvfoa = shift;
323 return within_feature($bvfoa);
324 }
325
326 sub within_nmd_transcript {
327 my $bvfoa = shift;
328 my $tran = $bvfoa->transcript;
329
330 return ( within_transcript($bvfoa) and ($tran->biotype eq 'nonsense_mediated_decay') );
331 }
332
333 sub within_non_coding_gene {
334 my $bvfoa = shift;
335 my $tran = $bvfoa->transcript;
336
337 return ( within_transcript($bvfoa) and (not $tran->translation) and (not within_mature_miRNA($bvfoa)));
338 }
339
340 sub non_coding_exon_variant {
341 my $bvfoa = shift;
342
343 return 0 unless within_non_coding_gene($bvfoa);
344
345 my $bvf = $bvfoa->base_variation_feature;
346 my $exons = $bvfoa->base_variation_feature_overlap->_exons;
347
348 if(scalar grep {overlap($bvf->start, $bvf->end, $_->start, $_->end)} @$exons) {
349 return 1;
350 }
351 else {
352 return 0;
353 }
354 }
355
356 sub within_miRNA {
357 my $bvfoa = shift;
358 my $tran = $bvfoa->transcript;
359
360 # don't call this for now
361
362 return 0;
363
364 return ( within_transcript($bvfoa) and ($tran->biotype eq 'miRNA') );
365 }
366
367 sub within_mature_miRNA {
368 my $bvfoa = shift;
369
370 my $bvfo = $bvfoa->base_variation_feature_overlap;
371 my $bvf = $bvfoa->base_variation_feature;
372 my $tran = $bvfoa->transcript;
373
374 return 0 unless ( within_transcript($bvfoa) and ($tran->biotype eq 'miRNA') );
375
376 my ($attribute) = @{ $tran->get_all_Attributes('miRNA') };
377
378 if (defined $attribute && $attribute->value =~ /(\d+)-(\d+)/) {
379 for my $coord ($bvfo->_mapper->cdna2genomic($1, $2, $tran->strand)) {
380 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
381 if (overlap(
382 $bvf->start,
383 $bvf->end,
384 $coord->start,
385 $coord->end) ) {
386 return 1;
387 }
388 }
389 }
390 }
391
392 return 0;
393 }
394
395 sub donor_splice_site {
396 my $bvfoa = shift;
397 my $tran = $bvfoa->transcript;
398
399 return $tran->strand == 1 ?
400 $bvfoa->base_variation_feature_overlap->_intron_effects->{start_splice_site} :
401 $bvfoa->base_variation_feature_overlap->_intron_effects->{end_splice_site};
402 }
403
404 sub acceptor_splice_site {
405 my $bvfoa = shift;
406 my $tran = $bvfoa->transcript;
407
408 return $tran->strand == 1 ?
409 $bvfoa->base_variation_feature_overlap->_intron_effects->{end_splice_site} :
410 $bvfoa->base_variation_feature_overlap->_intron_effects->{start_splice_site};
411 }
412
413 sub essential_splice_site {
414 my $bvfoa = shift;
415
416 return ( acceptor_splice_site($bvfoa) or donor_splice_site($bvfoa) );
417 }
418
419 sub splice_region {
420 my $bvfoa = shift;
421
422 return 0 if donor_splice_site($bvfoa);
423 return 0 if acceptor_splice_site($bvfoa);
424 return 0 if essential_splice_site($bvfoa);
425
426 return $bvfoa->base_variation_feature_overlap->_intron_effects->{splice_region};
427 }
428
429 sub within_intron {
430 my $bvfoa = shift;
431
432 return $bvfoa->base_variation_feature_overlap->_intron_effects->{intronic};
433 }
434
435 sub within_cds {
436 my $bvfoa = shift;
437 my $bvf = $bvfoa->base_variation_feature;
438 my $tran = $bvfoa->transcript;
439 my $bvfo = $bvfoa->base_variation_feature_overlap;
440
441 my $cds_coords = $bvfoa->base_variation_feature_overlap->cds_coords;
442
443 if (@$cds_coords > 0) {
444 for my $coord (@$cds_coords) {
445 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
446 if ($coord->end > 0 && $coord->start <= length($bvfo->_translateable_seq)) {
447 return 1;
448 }
449 }
450 }
451 }
452
453 # we also need to check if the vf is in a frameshift intron within the CDS
454
455 if (defined $tran->translation &&
456 $bvfoa->base_variation_feature_overlap->_intron_effects->{within_frameshift_intron}) {
457
458 return overlap(
459 $bvf->start,
460 $bvf->end,
461 $tran->coding_region_start,
462 $tran->coding_region_end,
463 );
464 }
465
466 return 0;
467 }
468
469 sub within_cdna {
470 my $bvfoa = shift;
471 my $bvf = $bvfoa->base_variation_feature;
472 my $tran = $bvfoa->transcript;
473 my $bvfo = $bvfoa->base_variation_feature_overlap;
474
475 my $cdna_coords = $bvfo->cdna_coords;
476
477 if (@$cdna_coords > 0) {
478 for my $coord (@$cdna_coords) {
479 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
480 if ($coord->end > 0 && $coord->start <= $tran->length) {
481 return 1;
482 }
483 }
484 }
485 }
486
487 # we also need to check if the vf is in a frameshift intron within the cDNA
488
489 if ($bvfoa->base_variation_feature_overlap->_intron_effects->{within_frameshift_intron}) {
490 return within_transcript($bvfoa);
491 }
492
493 return 0;
494 }
495
496 sub _before_coding {
497 my ($bvf, $tran) = @_;
498 return 0 unless defined $tran->translation;
499
500 my $bvf_s = $bvf->start;
501 my $bvf_e = $bvf->end;
502 my $t_s = $tran->start;
503 my $cds_s = $tran->coding_region_start;
504
505 # we need to special case insertions just before the CDS start
506 if ($bvf_s == $bvf_e+1 && $bvf_s == $cds_s) {
507 return 1;
508 }
509
510 return overlap($bvf_s, $bvf_e, $t_s, $cds_s-1);
511 }
512
513 sub _after_coding {
514 my ($bvf, $tran) = @_;
515 return 0 unless defined $tran->translation;
516
517 my $bvf_s = $bvf->start;
518 my $bvf_e = $bvf->end;
519 my $t_e = $tran->end;
520 my $cds_e = $tran->coding_region_end;
521
522 # we need to special case insertions just after the CDS end
523 if ($bvf_s == $bvf_e+1 && $bvf_e == $cds_e) {
524 return 1;
525 }
526
527 return overlap($bvf_s, $bvf_e, $cds_e+1, $t_e);
528 }
529
530 sub within_5_prime_utr {
531 my $bvfoa = shift;
532 my $bvf = $bvfoa->base_variation_feature;
533 my $tran = $bvfoa->transcript;
534
535 my $five_prime_of_coding =
536 $tran->strand == 1 ?
537 _before_coding($bvf, $tran) :
538 _after_coding($bvf, $tran);
539
540 return ( $five_prime_of_coding and within_cdna($bvfoa) );
541 }
542
543 sub within_3_prime_utr {
544 my $bvfoa = shift;
545 my $bvf = $bvfoa->base_variation_feature;
546 my $tran = $bvfoa->transcript;
547
548 my $three_prime_of_coding =
549 $tran->strand == 1 ?
550 _after_coding($bvf, $tran) :
551 _before_coding($bvf, $tran);
552
553 return ( $three_prime_of_coding and within_cdna($bvfoa) );
554 }
555
556 sub complex_indel {
557 my $bvfoa = shift;
558 my $bvf = $bvfoa->base_variation_feature;
559
560 # pass the no_db flag to var_class to ensure we don't rely on the database for it
561 # as it may not have been set at this stage in the pipeline
562 my $class = $bvf->var_class(1);
563
564 return 0 unless $class =~ /insertion|deletion|indel/;
565
566 return @{ $bvfoa->base_variation_feature_overlap->cds_coords } > 1;
567 }
568
569 sub _get_peptide_alleles {
570 my $bvfoa = shift;
571 my $bvfo = $bvfoa->base_variation_feature_overlap;
572
573 return () if frameshift($bvfoa);
574
575 my $alt_pep = $bvfoa->peptide;
576
577 return () unless defined $alt_pep;
578
579 my $ref_pep = $bvfo->get_reference_TranscriptVariationAllele->peptide;
580
581 return () unless defined $ref_pep;
582
583 $ref_pep = '' if $ref_pep eq '-';
584 $alt_pep = '' if $alt_pep eq '-';
585
586 return ($ref_pep, $alt_pep);
587 }
588
589 sub _get_codon_alleles {
590 my $bvfoa = shift;
591 my $bvfo = $bvfoa->base_variation_feature_overlap;
592
593 return () if frameshift($bvfoa);
594
595 my $alt_codon = $bvfoa->codon;
596
597 return () unless defined $alt_codon;
598
599 my $ref_codon = $bvfo->get_reference_TranscriptVariationAllele->codon;
600
601 return () unless defined $ref_codon;
602
603 $ref_codon = '' if $ref_codon eq '-';
604 $alt_codon = '' if $alt_codon eq '-';
605
606 return ($ref_codon, $alt_codon);
607 }
608
609 sub _get_alleles {
610 my $bvfoa = shift;
611 my $bvfo = $bvfoa->base_variation_feature_overlap;
612
613 my $ref_tva = $bvfo->get_reference_VariationFeatureOverlapAllele;
614
615 return () unless defined ($ref_tva);
616
617 my $ref_allele = $ref_tva->variation_feature_seq;
618 my $alt_allele = $bvfoa->variation_feature_seq;
619
620 return () unless defined($ref_allele) and defined($alt_allele);
621
622 $ref_allele = '' if $ref_allele eq '-';
623 $alt_allele = '' if $alt_allele eq '-';
624
625 return ($ref_allele, $alt_allele);
626 }
627
628 sub stop_retained {
629 my $bvfoa = shift;
630
631 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
632
633 return 0 unless $ref_pep;
634
635 return ( $alt_pep =~ /\*/ && $ref_pep =~ /\*/ );
636 }
637
638 sub affects_start_codon {
639 my $bvfoa = shift;
640 my $bvfo = $bvfoa->base_variation_feature_overlap;
641
642 # sequence variant
643 if($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptVariation')) {
644 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
645
646 return 0 unless $ref_pep;
647
648 return ( ($bvfo->translation_start == 1) and (substr($ref_pep,0,1) ne substr($alt_pep,0,1)) );
649 }
650
651 # structural variant
652 elsif($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariation')) {
653 my $tr = $bvfo->transcript;
654 my $bvf = $bvfo->base_variation_feature;
655
656 my ($tr_crs, $tr_cre) = ($tr->coding_region_start, $tr->coding_region_end);
657 return 0 unless defined($tr_crs) && defined($tr_cre);
658
659 if($tr->strand == 1) {
660 return overlap($tr_crs, $tr_crs + 2, $bvf->start, $bvf->end);
661 }
662 else {
663 return overlap($tr_cre-2, $tr_cre, $bvf->start, $bvf->end);
664 }
665 }
666
667 else {
668 return 0;
669 }
670 }
671
672 sub synonymous_variant {
673 my $bvfoa = shift;
674
675 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
676
677 return 0 unless $ref_pep;
678
679 return ( ($alt_pep eq $ref_pep) and (not stop_retained($bvfoa) and ($alt_pep !~ /X/) and ($ref_pep !~ /X/)) );
680 }
681
682 sub missense_variant {
683 my $bvfoa = shift;
684
685 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
686
687 return 0 unless defined $ref_pep;
688
689 return 0 if affects_start_codon($bvfoa);
690 return 0 if stop_lost($bvfoa);
691 return 0 if stop_gained($bvfoa);
692 return 0 if partial_codon($bvfoa);
693
694 return 0 if inframe_deletion($bvfoa);
695 return 0 if inframe_insertion($bvfoa);
696
697 return ( $ref_pep ne $alt_pep );
698 }
699
700 sub inframe_insertion {
701 my $bvfoa = shift;
702
703 # sequence variant
704 if($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
705 my ($ref_codon, $alt_codon) = _get_codon_alleles($bvfoa);
706
707 return 0 unless defined $ref_codon;
708
709 return (
710 (length($alt_codon) > length ($ref_codon)) &&
711 ( ($alt_codon =~ /^\Q$ref_codon\E/) || ($alt_codon =~ /\Q$ref_codon\E$/) )
712 );
713 }
714
715 # structural variant
716 elsif($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
717
718 # TO BE DONE, NO WAY OF KNOWING WHAT SEQUENCE IS INSERTED YET
719 return 0;
720
721 # must be an insertion
722 return 0 unless insertion($bvfoa);
723
724 my $bvfo = $bvfoa->base_variation_feature_overlap;
725
726 my $cds_coords = $bvfo->cds_coords;
727
728 if(scalar @$cds_coords == 1) {
729
730 # wholly within exon
731 if($cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
732 1;
733 }
734 }
735
736 # variant partially overlaps
737 else {
738 return 0;
739 }
740 }
741
742 else {
743 return 0;
744 }
745 }
746
747 sub inframe_deletion {
748 my $bvfoa = shift;
749
750 # sequence variant
751 if($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
752 my ($ref_codon, $alt_codon) = _get_codon_alleles($bvfoa);
753
754 return 0 unless defined $ref_codon;
755
756 return (
757 (length($alt_codon) < length ($ref_codon)) &&
758 ( ($ref_codon =~ /^\Q$alt_codon\E/) || ($ref_codon =~ /\Q$alt_codon\E$/) )
759 );
760 }
761
762 # structural variant
763 elsif($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
764
765 # must be a deletion
766 return 0 unless deletion($bvfoa);
767
768 my $bvfo = $bvfoa->base_variation_feature_overlap;
769 my $cds_coords = $bvfo->cds_coords;
770 my $exons = $bvfo->_exons;
771 my $bvf = $bvfo->base_variation_feature;
772
773 # in exon
774 return (
775 scalar @$cds_coords == 1 and
776 $cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate') and
777 scalar grep {complete_within_feature($bvfoa, $_)} @$exons and
778 $bvf->length() % 3 == 0
779 );
780 }
781
782 else {
783 return 0;
784 }
785 }
786
787 sub stop_gained {
788 my $bvfoa = shift;
789 my $bvfo = $bvfoa->base_variation_feature_overlap;
790
791 return 0 unless $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele');
792
793 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
794
795 return 0 unless defined $ref_pep;
796
797 return ( ($alt_pep =~ /\*/) and ($ref_pep !~ /\*/) );
798 }
799
800 sub stop_lost {
801 my $bvfoa = shift;
802
803 # sequence variant
804 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
805 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
806
807 return 0 unless defined $ref_pep;
808
809 return ( ($alt_pep !~ /\*/) and ($ref_pep =~ /\*/) );
810 }
811
812 # structural variant
813 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
814 return 0 unless deletion($bvfoa);
815
816 my $tr = $bvfoa->transcript;
817 my $bvf = $bvfoa->base_variation_feature;
818
819 my ($tr_crs, $tr_cre) = ($tr->coding_region_start, $tr->coding_region_end);
820 return 0 unless defined($tr_crs) && defined($tr_cre);
821
822 if($tr->strand == 1) {
823 return overlap($tr_cre-2, $tr_cre, $bvf->start, $bvf->end);
824 }
825 else {
826 return overlap($tr_crs, $tr_crs + 2, $bvf->start, $bvf->end);
827 }
828 }
829
830 else {
831 return 0;
832 }
833 }
834
835 sub frameshift {
836 my $bvfoa = shift;
837
838 # sequence variant
839 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
840
841 return 0 if partial_codon($bvfoa);
842
843 my $bvfo = $bvfoa->base_variation_feature_overlap;
844
845 return 0 unless defined $bvfo->cds_start && defined $bvfo->cds_end;
846
847 my $var_len = $bvfo->cds_end - $bvfo->cds_start + 1;
848
849 my $allele_len = $bvfoa->seq_length;
850
851 # if the allele length is undefined then we can't call a frameshift
852
853 return 0 unless defined $allele_len;
854
855 return abs( $allele_len - $var_len ) % 3;
856 }
857
858 # structural variant
859 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
860 my $bvf = $bvfoa->base_variation_feature;
861 my $exons = $bvfoa->base_variation_feature_overlap->_exons;
862
863 return (
864 (
865 deletion($bvfoa) or
866 copy_number_loss($bvfoa)
867 ) and
868 scalar grep {complete_within_feature($bvfoa, $_)} @$exons and
869 $bvf->length % 3 != 0
870 );
871
872 # TODO INSERTIONS
873 }
874
875 else {
876 return 0;
877 }
878 }
879
880 sub partial_codon {
881 my $bvfoa = shift;
882
883 my $bvfo = $bvfoa->base_variation_feature_overlap;
884
885 return 0 unless defined $bvfo->translation_start;
886
887 my $cds_length = length $bvfo->_translateable_seq;
888
889 my $codon_cds_start = ($bvfo->translation_start * 3) - 2;
890
891 my $last_codon_length = $cds_length - ($codon_cds_start - 1);
892
893 return ( $last_codon_length < 3 and $last_codon_length > 0 );
894 }
895
896 sub coding_unknown {
897 my $bvfoa = shift;
898
899 # sequence variant
900 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
901 return (within_cds($bvfoa) and ((not $bvfoa->peptide) or (not _get_peptide_alleles($bvfoa)) or ($bvfoa->peptide =~ /X/)) and (not (frameshift($bvfoa) or inframe_deletion($bvfoa))));
902 }
903
904 # structural variant
905 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
906 return (within_cds($bvfoa) and not(inframe_insertion($bvfoa) or inframe_deletion($bvfoa) or frameshift($bvfoa)));
907 }
908
909 else {
910 return 0;
911 }
912 }
913
914 #package Bio::EnsEMBL::Variation::RegulatoryFeatureVariationAllele;
915
916 sub within_regulatory_feature {
917 my $rfva = shift;
918 return within_feature($rfva);
919 }
920
921 #package Bio::EnsEMBL::Variation::ExternalFeatureVariationAllele;
922
923 sub within_external_feature {
924 my $efva = shift;
925 return (within_feature($efva) and (not within_miRNA_target_site($efva)));
926 }
927
928 #sub within_miRNA_target_site {
929 # my $efva = shift;
930 #
931 # my $fset = $efva->variation_feature_overlap->feature->feature_set;
932 #
933 # return ($fset && $fset->name eq 'miRanda miRNA targets');
934 #}
935
936 #package Bio::EnsEMBL::Variation::MotifFeatureVariationAllele;
937
938 #sub within_motif_feature {
939 # my $mfva = shift;
940 # return (
941 # within_feature($mfva) and
942 # !increased_binding_affinity($mfva) and
943 # !decreased_binding_affinity($mfva)
944 # );
945 #}
946
947 sub within_motif_feature {
948 my $mfva = shift;
949 return within_feature($mfva);
950 }
951
952 #sub increased_binding_affinity {
953 # my $mfva = shift;
954 # my $change = $mfva->binding_affinity_change;
955 # return (within_feature($mfva) and (defined $change) and ($change > 0));
956 #}
957 #
958 #sub decreased_binding_affinity {
959 # my $mfva = shift;
960 # my $change = $mfva->binding_affinity_change;
961 # return (within_feature($mfva) and (defined $change) and ($change < 0));
962 #}
963
964 sub contains_entire_feature {
965 my $vfo = shift;
966
967 my $bvf = $vfo->base_variation_feature;
968 my $feat = $vfo->feature;
969
970 return ( ($bvf->start <= $feat->start) && ($bvf->end >= $feat->end) );
971 }
972
973 1;
974