0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Variation::Utils::VariationEffect
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 This module defines a set of predicate subroutines that check the effect of a
|
|
28 Bio::EnsEMBL::Variation::VariationFeature on some other Bio::EnsEMBL::Feature.
|
|
29 All of these predicates take a VariationFeatureOverlapAllele as their first and
|
|
30 only argument and return a true or false value depending on whether the effect
|
|
31 being checked for holds or not. The link between these predicates and the
|
|
32 specific effect is configured in the Bio::EnsEMBL::Variation::Utils::Config
|
|
33 module and a list of OverlapConsequence objects that represent a link between,
|
|
34 for example, a Sequence Ontology consequence term, and the predicate that
|
|
35 checks for it is provided in the Bio::EnsEMBL::Variation::Utils::Constants
|
|
36 module. If you want to add a new consequence you should write a predicate in
|
|
37 this module and then add an entry in the configuration file.
|
|
38
|
|
39 =cut
|
|
40
|
|
41 package Bio::EnsEMBL::Variation::Utils::VariationEffect;
|
|
42
|
|
43 use strict;
|
|
44 use warnings;
|
|
45
|
|
46 use base qw(Exporter);
|
|
47
|
|
48 our @EXPORT_OK = qw(overlap within_cds MAX_DISTANCE_FROM_TRANSCRIPT within_intron stop_lost affects_start_codon $UPSTREAM_DISTANCE $DOWNSTREAM_DISTANCE);
|
|
49
|
|
50 use constant MAX_DISTANCE_FROM_TRANSCRIPT => 5000;
|
|
51
|
|
52 our $UPSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT;
|
|
53 our $DOWNSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT;
|
|
54
|
|
55 #package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
|
|
56
|
|
57 sub overlap {
|
|
58 my ( $f1_start, $f1_end, $f2_start, $f2_end ) = @_;
|
|
59
|
|
60 return ( ($f1_end >= $f2_start) and ($f1_start <= $f2_end) );
|
|
61 }
|
|
62
|
|
63 sub within_feature {
|
|
64 my ($bvfoa, $feat) = @_;
|
|
65 my $bvf = $bvfoa->base_variation_feature;
|
|
66 $feat = $bvfoa->feature unless defined($feat);
|
|
67
|
|
68 return overlap(
|
|
69 $bvf->start,
|
|
70 $bvf->end,
|
|
71 $feat->start,
|
|
72 $feat->end
|
|
73 );
|
|
74 }
|
|
75
|
|
76 sub partial_overlap_feature {
|
|
77 my ($bvfoa, $feat) = @_;
|
|
78 my $bvf = $bvfoa->base_variation_feature;
|
|
79 $feat = $bvfoa->feature unless defined($feat);
|
|
80
|
|
81 return (
|
|
82 within_feature(@_) and
|
|
83 (not complete_overlap_feature(@_)) and
|
|
84 (($bvf->end > $feat->end) or ($bvf->start < $feat->start))
|
|
85 );
|
|
86 }
|
|
87
|
|
88 sub complete_within_feature {
|
|
89 my ($bvfoa, $feat) = @_;
|
|
90 my $bvf = $bvfoa->base_variation_feature;
|
|
91 $feat = $bvfoa->feature unless defined($feat);
|
|
92
|
|
93 return (
|
|
94 ($bvf->start >= $feat->start) and
|
|
95 ($bvf->end <= $feat->end)
|
|
96 );
|
|
97 }
|
|
98
|
|
99 sub complete_overlap_feature {
|
|
100 my ($bvfoa, $feat) = @_;
|
|
101 my $bvf = $bvfoa->base_variation_feature;
|
|
102 $feat = $bvfoa->feature unless defined($feat);
|
|
103
|
|
104 return (
|
|
105 ($bvf->start <= $feat->start) and
|
|
106 ($bvf->end >= $feat->end)
|
|
107 );
|
|
108 }
|
|
109
|
|
110 sub deletion {
|
|
111 my $bvfoa = shift;
|
|
112
|
|
113 my $bvf = $bvfoa->base_variation_feature;
|
|
114
|
|
115 # sequence variant will have alleles
|
|
116 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
|
|
117 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa);
|
|
118 return (
|
|
119 (defined($ref_allele) && ($alt_allele eq '') and $ref_allele) or
|
|
120 $bvf->allele_string =~ /deletion/i
|
|
121 );
|
|
122 }
|
|
123
|
|
124 # structural variant depends on class
|
|
125 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
|
|
126 return (
|
|
127 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'deletion') or
|
|
128 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /deletion/i) or
|
|
129 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /loss/i)
|
|
130 );
|
|
131 }
|
|
132
|
|
133 else { return 0; }
|
|
134 }
|
|
135
|
|
136 sub insertion {
|
|
137 my $bvfoa = shift;
|
|
138
|
|
139 my $bvf = $bvfoa->base_variation_feature;
|
|
140
|
|
141 # sequence variant will have alleles
|
|
142 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
|
|
143 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa);
|
|
144 return (
|
|
145 (defined($ref_allele) && ($ref_allele eq '') and $alt_allele) or
|
|
146 $bvf->allele_string =~ /insertion/i
|
|
147 );
|
|
148 }
|
|
149
|
|
150 # structural variant depends on class
|
|
151 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
|
|
152 return (
|
|
153 duplication($bvfoa) or
|
|
154 tandem_duplication($bvfoa) or
|
|
155 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'insertion') or
|
|
156 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /insertion/i) or
|
|
157 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /gain/i)
|
|
158 );
|
|
159 }
|
|
160
|
|
161 else { return 0; }
|
|
162 }
|
|
163
|
|
164 sub copy_number_gain {
|
|
165 my $bvfoa = shift;
|
|
166
|
|
167 return (duplication($bvfoa) or tandem_duplication($bvfoa) or $bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /gain/i);
|
|
168 }
|
|
169
|
|
170 sub copy_number_loss {
|
|
171 my $bvfoa = shift;
|
|
172
|
|
173 return $bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /loss/i;
|
|
174 }
|
|
175
|
|
176 sub duplication {
|
|
177 my $bvfoa = shift;
|
|
178
|
|
179 return (
|
|
180 (
|
|
181 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'duplication') or
|
|
182 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /duplication/i)
|
|
183 ) and
|
|
184 (not tandem_duplication($bvfoa))
|
|
185 );
|
|
186 }
|
|
187
|
|
188 sub tandem_duplication {
|
|
189 my $bvfoa = shift;
|
|
190
|
|
191 my $bvf = $bvfoa->base_variation_feature;
|
|
192
|
|
193 # for sequence variants, check sequence vs ref
|
|
194 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
|
|
195 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa);
|
|
196
|
|
197 return 0 unless $ref_allele and $alt_allele;
|
|
198 return 0 unless
|
|
199 length($alt_allele) > length($ref_allele) and
|
|
200 length($alt_allele) % length($ref_allele) == 0;
|
|
201
|
|
202 my $copies = length($alt_allele) / length($ref_allele);
|
|
203
|
|
204 return $alt_allele eq $ref_allele x $copies;
|
|
205 }
|
|
206
|
|
207 # structural variant depends on class
|
|
208 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
|
|
209 return (
|
|
210 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'tandem_duplication') or
|
|
211 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /tandem_duplication/i)
|
|
212 );
|
|
213 }
|
|
214 }
|
|
215
|
|
216 sub feature_ablation {
|
|
217 my $bvfoa = shift;
|
|
218
|
|
219 return (deletion($bvfoa) and complete_overlap_feature($bvfoa));
|
|
220 }
|
|
221
|
|
222 sub feature_amplification {
|
|
223 my $bvfoa = shift;
|
|
224
|
|
225 return (copy_number_gain($bvfoa) && complete_overlap_feature($bvfoa));
|
|
226 }
|
|
227
|
|
228 sub feature_elongation {
|
|
229 my $bvfoa = shift;
|
|
230
|
|
231 return (
|
|
232 complete_within_feature($bvfoa) and
|
|
233 (copy_number_gain($bvfoa) or insertion($bvfoa)) and
|
|
234 not(
|
|
235 $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele') and
|
|
236 (inframe_insertion($bvfoa) or stop_lost($bvfoa))
|
|
237 )
|
|
238 );
|
|
239 }
|
|
240
|
|
241 sub feature_truncation {
|
|
242 my $bvfoa = shift;
|
|
243
|
|
244 return (
|
|
245 (partial_overlap_feature($bvfoa) or complete_within_feature($bvfoa)) and
|
|
246 (copy_number_loss($bvfoa) or deletion($bvfoa)) and
|
|
247 not(
|
|
248 $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele') and
|
|
249 (inframe_deletion($bvfoa) or stop_gained($bvfoa))
|
|
250 )
|
|
251 );
|
|
252 }
|
|
253
|
|
254 #sub transcript_fusion {
|
|
255 # #my $bvfoa = shift;
|
|
256 # #my $bvf = $bvfoa->base_variation_feature;
|
|
257 #
|
|
258 # return 0;
|
|
259 #
|
|
260 # #my $transcripts = $bvf->_get_overlapping_Transcripts();
|
|
261 #}
|
|
262
|
|
263 sub _before_start {
|
|
264 my ($bvf, $feat, $dist) = @_;
|
|
265
|
|
266 return ( ($bvf->end >= ($feat->start - $dist)) and
|
|
267 ($bvf->end < $feat->start) );
|
|
268 }
|
|
269
|
|
270 sub _after_end {
|
|
271 my ($bvf, $feat, $dist) = @_;
|
|
272 return ( ($bvf->start <= ($feat->end + $dist))
|
|
273 and ($bvf->start > $feat->end) );
|
|
274 }
|
|
275
|
|
276 sub _upstream {
|
|
277 my ($bvf, $feat, $dist) = @_;
|
|
278 return $feat->strand == 1 ?
|
|
279 _before_start($bvf, $feat, $dist) :
|
|
280 _after_end($bvf, $feat, $dist);
|
|
281 }
|
|
282
|
|
283 sub _downstream {
|
|
284 my ($bvf, $feat, $dist) = @_;
|
|
285 return $feat->strand == 1 ?
|
|
286 _after_end($bvf, $feat, $dist) :
|
|
287 _before_start($bvf, $feat, $dist);
|
|
288 }
|
|
289
|
|
290 #package Bio::EnsEMBL::Variation::TranscriptVariationAllele;
|
|
291
|
|
292 sub upstream {
|
|
293 my $vfoa = shift;
|
|
294 my $bvf = $vfoa->base_variation_feature;
|
|
295 my $feat = $vfoa->feature;
|
|
296
|
|
297 return _upstream($bvf, $feat, $UPSTREAM_DISTANCE);
|
|
298 }
|
|
299
|
|
300 sub downstream {
|
|
301 my $vfoa = shift;
|
|
302 my $bvf = $vfoa->base_variation_feature;
|
|
303 my $feat = $vfoa->feature;
|
|
304
|
|
305 return _downstream($bvf, $feat, $DOWNSTREAM_DISTANCE);
|
|
306 }
|
|
307
|
|
308 sub affects_transcript {
|
|
309 my ($bvf, $tran) = @_;
|
|
310
|
|
311 return 0 unless $tran->isa('Bio::EnsEMBL::Transcript');
|
|
312
|
|
313 return overlap(
|
|
314 $bvf->start,
|
|
315 $bvf->end,
|
|
316 $tran->start - 5000,
|
|
317 $tran->end + 5000
|
|
318 );
|
|
319 }
|
|
320
|
|
321 sub within_transcript {
|
|
322 my $bvfoa = shift;
|
|
323 return within_feature($bvfoa);
|
|
324 }
|
|
325
|
|
326 sub within_nmd_transcript {
|
|
327 my $bvfoa = shift;
|
|
328 my $tran = $bvfoa->transcript;
|
|
329
|
|
330 return ( within_transcript($bvfoa) and ($tran->biotype eq 'nonsense_mediated_decay') );
|
|
331 }
|
|
332
|
|
333 sub within_non_coding_gene {
|
|
334 my $bvfoa = shift;
|
|
335 my $tran = $bvfoa->transcript;
|
|
336
|
|
337 return ( within_transcript($bvfoa) and (not $tran->translation) and (not within_mature_miRNA($bvfoa)));
|
|
338 }
|
|
339
|
|
340 sub non_coding_exon_variant {
|
|
341 my $bvfoa = shift;
|
|
342
|
|
343 return 0 unless within_non_coding_gene($bvfoa);
|
|
344
|
|
345 my $bvf = $bvfoa->base_variation_feature;
|
|
346 my $exons = $bvfoa->base_variation_feature_overlap->_exons;
|
|
347
|
|
348 if(scalar grep {overlap($bvf->start, $bvf->end, $_->start, $_->end)} @$exons) {
|
|
349 return 1;
|
|
350 }
|
|
351 else {
|
|
352 return 0;
|
|
353 }
|
|
354 }
|
|
355
|
|
356 sub within_miRNA {
|
|
357 my $bvfoa = shift;
|
|
358 my $tran = $bvfoa->transcript;
|
|
359
|
|
360 # don't call this for now
|
|
361
|
|
362 return 0;
|
|
363
|
|
364 return ( within_transcript($bvfoa) and ($tran->biotype eq 'miRNA') );
|
|
365 }
|
|
366
|
|
367 sub within_mature_miRNA {
|
|
368 my $bvfoa = shift;
|
|
369
|
|
370 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
371 my $bvf = $bvfoa->base_variation_feature;
|
|
372 my $tran = $bvfoa->transcript;
|
|
373
|
|
374 return 0 unless ( within_transcript($bvfoa) and ($tran->biotype eq 'miRNA') );
|
|
375
|
|
376 my ($attribute) = @{ $tran->get_all_Attributes('miRNA') };
|
|
377
|
|
378 if (defined $attribute && $attribute->value =~ /(\d+)-(\d+)/) {
|
|
379 for my $coord ($bvfo->_mapper->cdna2genomic($1, $2, $tran->strand)) {
|
|
380 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
|
|
381 if (overlap(
|
|
382 $bvf->start,
|
|
383 $bvf->end,
|
|
384 $coord->start,
|
|
385 $coord->end) ) {
|
|
386 return 1;
|
|
387 }
|
|
388 }
|
|
389 }
|
|
390 }
|
|
391
|
|
392 return 0;
|
|
393 }
|
|
394
|
|
395 sub donor_splice_site {
|
|
396 my $bvfoa = shift;
|
|
397 my $tran = $bvfoa->transcript;
|
|
398
|
|
399 return $tran->strand == 1 ?
|
|
400 $bvfoa->base_variation_feature_overlap->_intron_effects->{start_splice_site} :
|
|
401 $bvfoa->base_variation_feature_overlap->_intron_effects->{end_splice_site};
|
|
402 }
|
|
403
|
|
404 sub acceptor_splice_site {
|
|
405 my $bvfoa = shift;
|
|
406 my $tran = $bvfoa->transcript;
|
|
407
|
|
408 return $tran->strand == 1 ?
|
|
409 $bvfoa->base_variation_feature_overlap->_intron_effects->{end_splice_site} :
|
|
410 $bvfoa->base_variation_feature_overlap->_intron_effects->{start_splice_site};
|
|
411 }
|
|
412
|
|
413 sub essential_splice_site {
|
|
414 my $bvfoa = shift;
|
|
415
|
|
416 return ( acceptor_splice_site($bvfoa) or donor_splice_site($bvfoa) );
|
|
417 }
|
|
418
|
|
419 sub splice_region {
|
|
420 my $bvfoa = shift;
|
|
421
|
|
422 return 0 if donor_splice_site($bvfoa);
|
|
423 return 0 if acceptor_splice_site($bvfoa);
|
|
424 return 0 if essential_splice_site($bvfoa);
|
|
425
|
|
426 return $bvfoa->base_variation_feature_overlap->_intron_effects->{splice_region};
|
|
427 }
|
|
428
|
|
429 sub within_intron {
|
|
430 my $bvfoa = shift;
|
|
431
|
|
432 return $bvfoa->base_variation_feature_overlap->_intron_effects->{intronic};
|
|
433 }
|
|
434
|
|
435 sub within_cds {
|
|
436 my $bvfoa = shift;
|
|
437 my $bvf = $bvfoa->base_variation_feature;
|
|
438 my $tran = $bvfoa->transcript;
|
|
439 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
440
|
|
441 my $cds_coords = $bvfoa->base_variation_feature_overlap->cds_coords;
|
|
442
|
|
443 if (@$cds_coords > 0) {
|
|
444 for my $coord (@$cds_coords) {
|
|
445 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
|
|
446 if ($coord->end > 0 && $coord->start <= length($bvfo->_translateable_seq)) {
|
|
447 return 1;
|
|
448 }
|
|
449 }
|
|
450 }
|
|
451 }
|
|
452
|
|
453 # we also need to check if the vf is in a frameshift intron within the CDS
|
|
454
|
|
455 if (defined $tran->translation &&
|
|
456 $bvfoa->base_variation_feature_overlap->_intron_effects->{within_frameshift_intron}) {
|
|
457
|
|
458 return overlap(
|
|
459 $bvf->start,
|
|
460 $bvf->end,
|
|
461 $tran->coding_region_start,
|
|
462 $tran->coding_region_end,
|
|
463 );
|
|
464 }
|
|
465
|
|
466 return 0;
|
|
467 }
|
|
468
|
|
469 sub within_cdna {
|
|
470 my $bvfoa = shift;
|
|
471 my $bvf = $bvfoa->base_variation_feature;
|
|
472 my $tran = $bvfoa->transcript;
|
|
473 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
474
|
|
475 my $cdna_coords = $bvfo->cdna_coords;
|
|
476
|
|
477 if (@$cdna_coords > 0) {
|
|
478 for my $coord (@$cdna_coords) {
|
|
479 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
|
|
480 if ($coord->end > 0 && $coord->start <= $tran->length) {
|
|
481 return 1;
|
|
482 }
|
|
483 }
|
|
484 }
|
|
485 }
|
|
486
|
|
487 # we also need to check if the vf is in a frameshift intron within the cDNA
|
|
488
|
|
489 if ($bvfoa->base_variation_feature_overlap->_intron_effects->{within_frameshift_intron}) {
|
|
490 return within_transcript($bvfoa);
|
|
491 }
|
|
492
|
|
493 return 0;
|
|
494 }
|
|
495
|
|
496 sub _before_coding {
|
|
497 my ($bvf, $tran) = @_;
|
|
498 return 0 unless defined $tran->translation;
|
|
499
|
|
500 my $bvf_s = $bvf->start;
|
|
501 my $bvf_e = $bvf->end;
|
|
502 my $t_s = $tran->start;
|
|
503 my $cds_s = $tran->coding_region_start;
|
|
504
|
|
505 # we need to special case insertions just before the CDS start
|
|
506 if ($bvf_s == $bvf_e+1 && $bvf_s == $cds_s) {
|
|
507 return 1;
|
|
508 }
|
|
509
|
|
510 return overlap($bvf_s, $bvf_e, $t_s, $cds_s-1);
|
|
511 }
|
|
512
|
|
513 sub _after_coding {
|
|
514 my ($bvf, $tran) = @_;
|
|
515 return 0 unless defined $tran->translation;
|
|
516
|
|
517 my $bvf_s = $bvf->start;
|
|
518 my $bvf_e = $bvf->end;
|
|
519 my $t_e = $tran->end;
|
|
520 my $cds_e = $tran->coding_region_end;
|
|
521
|
|
522 # we need to special case insertions just after the CDS end
|
|
523 if ($bvf_s == $bvf_e+1 && $bvf_e == $cds_e) {
|
|
524 return 1;
|
|
525 }
|
|
526
|
|
527 return overlap($bvf_s, $bvf_e, $cds_e+1, $t_e);
|
|
528 }
|
|
529
|
|
530 sub within_5_prime_utr {
|
|
531 my $bvfoa = shift;
|
|
532 my $bvf = $bvfoa->base_variation_feature;
|
|
533 my $tran = $bvfoa->transcript;
|
|
534
|
|
535 my $five_prime_of_coding =
|
|
536 $tran->strand == 1 ?
|
|
537 _before_coding($bvf, $tran) :
|
|
538 _after_coding($bvf, $tran);
|
|
539
|
|
540 return ( $five_prime_of_coding and within_cdna($bvfoa) );
|
|
541 }
|
|
542
|
|
543 sub within_3_prime_utr {
|
|
544 my $bvfoa = shift;
|
|
545 my $bvf = $bvfoa->base_variation_feature;
|
|
546 my $tran = $bvfoa->transcript;
|
|
547
|
|
548 my $three_prime_of_coding =
|
|
549 $tran->strand == 1 ?
|
|
550 _after_coding($bvf, $tran) :
|
|
551 _before_coding($bvf, $tran);
|
|
552
|
|
553 return ( $three_prime_of_coding and within_cdna($bvfoa) );
|
|
554 }
|
|
555
|
|
556 sub complex_indel {
|
|
557 my $bvfoa = shift;
|
|
558 my $bvf = $bvfoa->base_variation_feature;
|
|
559
|
|
560 # pass the no_db flag to var_class to ensure we don't rely on the database for it
|
|
561 # as it may not have been set at this stage in the pipeline
|
|
562 my $class = $bvf->var_class(1);
|
|
563
|
|
564 return 0 unless $class =~ /insertion|deletion|indel/;
|
|
565
|
|
566 return @{ $bvfoa->base_variation_feature_overlap->cds_coords } > 1;
|
|
567 }
|
|
568
|
|
569 sub _get_peptide_alleles {
|
|
570 my $bvfoa = shift;
|
|
571 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
572
|
|
573 return () if frameshift($bvfoa);
|
|
574
|
|
575 my $alt_pep = $bvfoa->peptide;
|
|
576
|
|
577 return () unless defined $alt_pep;
|
|
578
|
|
579 my $ref_pep = $bvfo->get_reference_TranscriptVariationAllele->peptide;
|
|
580
|
|
581 return () unless defined $ref_pep;
|
|
582
|
|
583 $ref_pep = '' if $ref_pep eq '-';
|
|
584 $alt_pep = '' if $alt_pep eq '-';
|
|
585
|
|
586 return ($ref_pep, $alt_pep);
|
|
587 }
|
|
588
|
|
589 sub _get_codon_alleles {
|
|
590 my $bvfoa = shift;
|
|
591 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
592
|
|
593 return () if frameshift($bvfoa);
|
|
594
|
|
595 my $alt_codon = $bvfoa->codon;
|
|
596
|
|
597 return () unless defined $alt_codon;
|
|
598
|
|
599 my $ref_codon = $bvfo->get_reference_TranscriptVariationAllele->codon;
|
|
600
|
|
601 return () unless defined $ref_codon;
|
|
602
|
|
603 $ref_codon = '' if $ref_codon eq '-';
|
|
604 $alt_codon = '' if $alt_codon eq '-';
|
|
605
|
|
606 return ($ref_codon, $alt_codon);
|
|
607 }
|
|
608
|
|
609 sub _get_alleles {
|
|
610 my $bvfoa = shift;
|
|
611 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
612
|
|
613 my $ref_tva = $bvfo->get_reference_VariationFeatureOverlapAllele;
|
|
614
|
|
615 return () unless defined ($ref_tva);
|
|
616
|
|
617 my $ref_allele = $ref_tva->variation_feature_seq;
|
|
618 my $alt_allele = $bvfoa->variation_feature_seq;
|
|
619
|
|
620 return () unless defined($ref_allele) and defined($alt_allele);
|
|
621
|
|
622 $ref_allele = '' if $ref_allele eq '-';
|
|
623 $alt_allele = '' if $alt_allele eq '-';
|
|
624
|
|
625 return ($ref_allele, $alt_allele);
|
|
626 }
|
|
627
|
|
628 sub stop_retained {
|
|
629 my $bvfoa = shift;
|
|
630
|
|
631 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
|
|
632
|
|
633 return 0 unless $ref_pep;
|
|
634
|
|
635 return ( $alt_pep =~ /\*/ && $ref_pep =~ /\*/ );
|
|
636 }
|
|
637
|
|
638 sub affects_start_codon {
|
|
639 my $bvfoa = shift;
|
|
640 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
641
|
|
642 # sequence variant
|
|
643 if($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptVariation')) {
|
|
644 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
|
|
645
|
|
646 return 0 unless $ref_pep;
|
|
647
|
|
648 return ( ($bvfo->translation_start == 1) and (substr($ref_pep,0,1) ne substr($alt_pep,0,1)) );
|
|
649 }
|
|
650
|
|
651 # structural variant
|
|
652 elsif($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariation')) {
|
|
653 my $tr = $bvfo->transcript;
|
|
654 my $bvf = $bvfo->base_variation_feature;
|
|
655
|
|
656 my ($tr_crs, $tr_cre) = ($tr->coding_region_start, $tr->coding_region_end);
|
|
657 return 0 unless defined($tr_crs) && defined($tr_cre);
|
|
658
|
|
659 if($tr->strand == 1) {
|
|
660 return overlap($tr_crs, $tr_crs + 2, $bvf->start, $bvf->end);
|
|
661 }
|
|
662 else {
|
|
663 return overlap($tr_cre-2, $tr_cre, $bvf->start, $bvf->end);
|
|
664 }
|
|
665 }
|
|
666
|
|
667 else {
|
|
668 return 0;
|
|
669 }
|
|
670 }
|
|
671
|
|
672 sub synonymous_variant {
|
|
673 my $bvfoa = shift;
|
|
674
|
|
675 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
|
|
676
|
|
677 return 0 unless $ref_pep;
|
|
678
|
|
679 return ( ($alt_pep eq $ref_pep) and (not stop_retained($bvfoa) and ($alt_pep !~ /X/) and ($ref_pep !~ /X/)) );
|
|
680 }
|
|
681
|
|
682 sub missense_variant {
|
|
683 my $bvfoa = shift;
|
|
684
|
|
685 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
|
|
686
|
|
687 return 0 unless defined $ref_pep;
|
|
688
|
|
689 return 0 if affects_start_codon($bvfoa);
|
|
690 return 0 if stop_lost($bvfoa);
|
|
691 return 0 if stop_gained($bvfoa);
|
|
692 return 0 if partial_codon($bvfoa);
|
|
693
|
|
694 return 0 if inframe_deletion($bvfoa);
|
|
695 return 0 if inframe_insertion($bvfoa);
|
|
696
|
|
697 return ( $ref_pep ne $alt_pep );
|
|
698 }
|
|
699
|
|
700 sub inframe_insertion {
|
|
701 my $bvfoa = shift;
|
|
702
|
|
703 # sequence variant
|
|
704 if($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
|
|
705 my ($ref_codon, $alt_codon) = _get_codon_alleles($bvfoa);
|
|
706
|
|
707 return 0 unless defined $ref_codon;
|
|
708
|
|
709 return (
|
|
710 (length($alt_codon) > length ($ref_codon)) &&
|
|
711 ( ($alt_codon =~ /^\Q$ref_codon\E/) || ($alt_codon =~ /\Q$ref_codon\E$/) )
|
|
712 );
|
|
713 }
|
|
714
|
|
715 # structural variant
|
|
716 elsif($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
|
|
717
|
|
718 # TO BE DONE, NO WAY OF KNOWING WHAT SEQUENCE IS INSERTED YET
|
|
719 return 0;
|
|
720
|
|
721 # must be an insertion
|
|
722 return 0 unless insertion($bvfoa);
|
|
723
|
|
724 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
725
|
|
726 my $cds_coords = $bvfo->cds_coords;
|
|
727
|
|
728 if(scalar @$cds_coords == 1) {
|
|
729
|
|
730 # wholly within exon
|
|
731 if($cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
|
|
732 1;
|
|
733 }
|
|
734 }
|
|
735
|
|
736 # variant partially overlaps
|
|
737 else {
|
|
738 return 0;
|
|
739 }
|
|
740 }
|
|
741
|
|
742 else {
|
|
743 return 0;
|
|
744 }
|
|
745 }
|
|
746
|
|
747 sub inframe_deletion {
|
|
748 my $bvfoa = shift;
|
|
749
|
|
750 # sequence variant
|
|
751 if($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
|
|
752 my ($ref_codon, $alt_codon) = _get_codon_alleles($bvfoa);
|
|
753
|
|
754 return 0 unless defined $ref_codon;
|
|
755
|
|
756 return (
|
|
757 (length($alt_codon) < length ($ref_codon)) &&
|
|
758 ( ($ref_codon =~ /^\Q$alt_codon\E/) || ($ref_codon =~ /\Q$alt_codon\E$/) )
|
|
759 );
|
|
760 }
|
|
761
|
|
762 # structural variant
|
|
763 elsif($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
|
|
764
|
|
765 # must be a deletion
|
|
766 return 0 unless deletion($bvfoa);
|
|
767
|
|
768 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
769 my $cds_coords = $bvfo->cds_coords;
|
|
770 my $exons = $bvfo->_exons;
|
|
771 my $bvf = $bvfo->base_variation_feature;
|
|
772
|
|
773 # in exon
|
|
774 return (
|
|
775 scalar @$cds_coords == 1 and
|
|
776 $cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate') and
|
|
777 scalar grep {complete_within_feature($bvfoa, $_)} @$exons and
|
|
778 $bvf->length() % 3 == 0
|
|
779 );
|
|
780 }
|
|
781
|
|
782 else {
|
|
783 return 0;
|
|
784 }
|
|
785 }
|
|
786
|
|
787 sub stop_gained {
|
|
788 my $bvfoa = shift;
|
|
789 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
790
|
|
791 return 0 unless $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele');
|
|
792
|
|
793 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
|
|
794
|
|
795 return 0 unless defined $ref_pep;
|
|
796
|
|
797 return ( ($alt_pep =~ /\*/) and ($ref_pep !~ /\*/) );
|
|
798 }
|
|
799
|
|
800 sub stop_lost {
|
|
801 my $bvfoa = shift;
|
|
802
|
|
803 # sequence variant
|
|
804 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
|
|
805 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa);
|
|
806
|
|
807 return 0 unless defined $ref_pep;
|
|
808
|
|
809 return ( ($alt_pep !~ /\*/) and ($ref_pep =~ /\*/) );
|
|
810 }
|
|
811
|
|
812 # structural variant
|
|
813 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
|
|
814 return 0 unless deletion($bvfoa);
|
|
815
|
|
816 my $tr = $bvfoa->transcript;
|
|
817 my $bvf = $bvfoa->base_variation_feature;
|
|
818
|
|
819 my ($tr_crs, $tr_cre) = ($tr->coding_region_start, $tr->coding_region_end);
|
|
820 return 0 unless defined($tr_crs) && defined($tr_cre);
|
|
821
|
|
822 if($tr->strand == 1) {
|
|
823 return overlap($tr_cre-2, $tr_cre, $bvf->start, $bvf->end);
|
|
824 }
|
|
825 else {
|
|
826 return overlap($tr_crs, $tr_crs + 2, $bvf->start, $bvf->end);
|
|
827 }
|
|
828 }
|
|
829
|
|
830 else {
|
|
831 return 0;
|
|
832 }
|
|
833 }
|
|
834
|
|
835 sub frameshift {
|
|
836 my $bvfoa = shift;
|
|
837
|
|
838 # sequence variant
|
|
839 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
|
|
840
|
|
841 return 0 if partial_codon($bvfoa);
|
|
842
|
|
843 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
844
|
|
845 return 0 unless defined $bvfo->cds_start && defined $bvfo->cds_end;
|
|
846
|
|
847 my $var_len = $bvfo->cds_end - $bvfo->cds_start + 1;
|
|
848
|
|
849 my $allele_len = $bvfoa->seq_length;
|
|
850
|
|
851 # if the allele length is undefined then we can't call a frameshift
|
|
852
|
|
853 return 0 unless defined $allele_len;
|
|
854
|
|
855 return abs( $allele_len - $var_len ) % 3;
|
|
856 }
|
|
857
|
|
858 # structural variant
|
|
859 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
|
|
860 my $bvf = $bvfoa->base_variation_feature;
|
|
861 my $exons = $bvfoa->base_variation_feature_overlap->_exons;
|
|
862
|
|
863 return (
|
|
864 (
|
|
865 deletion($bvfoa) or
|
|
866 copy_number_loss($bvfoa)
|
|
867 ) and
|
|
868 scalar grep {complete_within_feature($bvfoa, $_)} @$exons and
|
|
869 $bvf->length % 3 != 0
|
|
870 );
|
|
871
|
|
872 # TODO INSERTIONS
|
|
873 }
|
|
874
|
|
875 else {
|
|
876 return 0;
|
|
877 }
|
|
878 }
|
|
879
|
|
880 sub partial_codon {
|
|
881 my $bvfoa = shift;
|
|
882
|
|
883 my $bvfo = $bvfoa->base_variation_feature_overlap;
|
|
884
|
|
885 return 0 unless defined $bvfo->translation_start;
|
|
886
|
|
887 my $cds_length = length $bvfo->_translateable_seq;
|
|
888
|
|
889 my $codon_cds_start = ($bvfo->translation_start * 3) - 2;
|
|
890
|
|
891 my $last_codon_length = $cds_length - ($codon_cds_start - 1);
|
|
892
|
|
893 return ( $last_codon_length < 3 and $last_codon_length > 0 );
|
|
894 }
|
|
895
|
|
896 sub coding_unknown {
|
|
897 my $bvfoa = shift;
|
|
898
|
|
899 # sequence variant
|
|
900 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
|
|
901 return (within_cds($bvfoa) and ((not $bvfoa->peptide) or (not _get_peptide_alleles($bvfoa)) or ($bvfoa->peptide =~ /X/)) and (not (frameshift($bvfoa) or inframe_deletion($bvfoa))));
|
|
902 }
|
|
903
|
|
904 # structural variant
|
|
905 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
|
|
906 return (within_cds($bvfoa) and not(inframe_insertion($bvfoa) or inframe_deletion($bvfoa) or frameshift($bvfoa)));
|
|
907 }
|
|
908
|
|
909 else {
|
|
910 return 0;
|
|
911 }
|
|
912 }
|
|
913
|
|
914 #package Bio::EnsEMBL::Variation::RegulatoryFeatureVariationAllele;
|
|
915
|
|
916 sub within_regulatory_feature {
|
|
917 my $rfva = shift;
|
|
918 return within_feature($rfva);
|
|
919 }
|
|
920
|
|
921 #package Bio::EnsEMBL::Variation::ExternalFeatureVariationAllele;
|
|
922
|
|
923 sub within_external_feature {
|
|
924 my $efva = shift;
|
|
925 return (within_feature($efva) and (not within_miRNA_target_site($efva)));
|
|
926 }
|
|
927
|
|
928 #sub within_miRNA_target_site {
|
|
929 # my $efva = shift;
|
|
930 #
|
|
931 # my $fset = $efva->variation_feature_overlap->feature->feature_set;
|
|
932 #
|
|
933 # return ($fset && $fset->name eq 'miRanda miRNA targets');
|
|
934 #}
|
|
935
|
|
936 #package Bio::EnsEMBL::Variation::MotifFeatureVariationAllele;
|
|
937
|
|
938 #sub within_motif_feature {
|
|
939 # my $mfva = shift;
|
|
940 # return (
|
|
941 # within_feature($mfva) and
|
|
942 # !increased_binding_affinity($mfva) and
|
|
943 # !decreased_binding_affinity($mfva)
|
|
944 # );
|
|
945 #}
|
|
946
|
|
947 sub within_motif_feature {
|
|
948 my $mfva = shift;
|
|
949 return within_feature($mfva);
|
|
950 }
|
|
951
|
|
952 #sub increased_binding_affinity {
|
|
953 # my $mfva = shift;
|
|
954 # my $change = $mfva->binding_affinity_change;
|
|
955 # return (within_feature($mfva) and (defined $change) and ($change > 0));
|
|
956 #}
|
|
957 #
|
|
958 #sub decreased_binding_affinity {
|
|
959 # my $mfva = shift;
|
|
960 # my $change = $mfva->binding_affinity_change;
|
|
961 # return (within_feature($mfva) and (defined $change) and ($change < 0));
|
|
962 #}
|
|
963
|
|
964 sub contains_entire_feature {
|
|
965 my $vfo = shift;
|
|
966
|
|
967 my $bvf = $vfo->base_variation_feature;
|
|
968 my $feat = $vfo->feature;
|
|
969
|
|
970 return ( ($bvf->start <= $feat->start) && ($bvf->end >= $feat->end) );
|
|
971 }
|
|
972
|
|
973 1;
|
|
974
|