Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/Utils/VariationEffect.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::Variation::Utils::VariationEffect | |
| 24 | |
| 25 =head1 DESCRIPTION | |
| 26 | |
| 27 This module defines a set of predicate subroutines that check the effect of a | |
| 28 Bio::EnsEMBL::Variation::VariationFeature on some other Bio::EnsEMBL::Feature. | |
| 29 All of these predicates take a VariationFeatureOverlapAllele as their first and | |
| 30 only argument and return a true or false value depending on whether the effect | |
| 31 being checked for holds or not. The link between these predicates and the | |
| 32 specific effect is configured in the Bio::EnsEMBL::Variation::Utils::Config | |
| 33 module and a list of OverlapConsequence objects that represent a link between, | |
| 34 for example, a Sequence Ontology consequence term, and the predicate that | |
| 35 checks for it is provided in the Bio::EnsEMBL::Variation::Utils::Constants | |
| 36 module. If you want to add a new consequence you should write a predicate in | |
| 37 this module and then add an entry in the configuration file. | |
| 38 | |
| 39 =cut | |
| 40 | |
| 41 package Bio::EnsEMBL::Variation::Utils::VariationEffect; | |
| 42 | |
| 43 use strict; | |
| 44 use warnings; | |
| 45 | |
| 46 use base qw(Exporter); | |
| 47 | |
| 48 our @EXPORT_OK = qw(overlap within_cds MAX_DISTANCE_FROM_TRANSCRIPT within_intron stop_lost affects_start_codon $UPSTREAM_DISTANCE $DOWNSTREAM_DISTANCE); | |
| 49 | |
| 50 use constant MAX_DISTANCE_FROM_TRANSCRIPT => 5000; | |
| 51 | |
| 52 our $UPSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT; | |
| 53 our $DOWNSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT; | |
| 54 | |
| 55 #package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele; | |
| 56 | |
| 57 sub overlap { | |
| 58 my ( $f1_start, $f1_end, $f2_start, $f2_end ) = @_; | |
| 59 | |
| 60 return ( ($f1_end >= $f2_start) and ($f1_start <= $f2_end) ); | |
| 61 } | |
| 62 | |
| 63 sub within_feature { | |
| 64 my ($bvfoa, $feat) = @_; | |
| 65 my $bvf = $bvfoa->base_variation_feature; | |
| 66 $feat = $bvfoa->feature unless defined($feat); | |
| 67 | |
| 68 return overlap( | |
| 69 $bvf->start, | |
| 70 $bvf->end, | |
| 71 $feat->start, | |
| 72 $feat->end | |
| 73 ); | |
| 74 } | |
| 75 | |
| 76 sub partial_overlap_feature { | |
| 77 my ($bvfoa, $feat) = @_; | |
| 78 my $bvf = $bvfoa->base_variation_feature; | |
| 79 $feat = $bvfoa->feature unless defined($feat); | |
| 80 | |
| 81 return ( | |
| 82 within_feature(@_) and | |
| 83 (not complete_overlap_feature(@_)) and | |
| 84 (($bvf->end > $feat->end) or ($bvf->start < $feat->start)) | |
| 85 ); | |
| 86 } | |
| 87 | |
| 88 sub complete_within_feature { | |
| 89 my ($bvfoa, $feat) = @_; | |
| 90 my $bvf = $bvfoa->base_variation_feature; | |
| 91 $feat = $bvfoa->feature unless defined($feat); | |
| 92 | |
| 93 return ( | |
| 94 ($bvf->start >= $feat->start) and | |
| 95 ($bvf->end <= $feat->end) | |
| 96 ); | |
| 97 } | |
| 98 | |
| 99 sub complete_overlap_feature { | |
| 100 my ($bvfoa, $feat) = @_; | |
| 101 my $bvf = $bvfoa->base_variation_feature; | |
| 102 $feat = $bvfoa->feature unless defined($feat); | |
| 103 | |
| 104 return ( | |
| 105 ($bvf->start <= $feat->start) and | |
| 106 ($bvf->end >= $feat->end) | |
| 107 ); | |
| 108 } | |
| 109 | |
| 110 sub deletion { | |
| 111 my $bvfoa = shift; | |
| 112 | |
| 113 my $bvf = $bvfoa->base_variation_feature; | |
| 114 | |
| 115 # sequence variant will have alleles | |
| 116 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) { | |
| 117 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa); | |
| 118 return ( | |
| 119 (defined($ref_allele) && ($alt_allele eq '') and $ref_allele) or | |
| 120 $bvf->allele_string =~ /deletion/i | |
| 121 ); | |
| 122 } | |
| 123 | |
| 124 # structural variant depends on class | |
| 125 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { | |
| 126 return ( | |
| 127 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'deletion') or | |
| 128 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /deletion/i) or | |
| 129 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /loss/i) | |
| 130 ); | |
| 131 } | |
| 132 | |
| 133 else { return 0; } | |
| 134 } | |
| 135 | |
| 136 sub insertion { | |
| 137 my $bvfoa = shift; | |
| 138 | |
| 139 my $bvf = $bvfoa->base_variation_feature; | |
| 140 | |
| 141 # sequence variant will have alleles | |
| 142 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) { | |
| 143 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa); | |
| 144 return ( | |
| 145 (defined($ref_allele) && ($ref_allele eq '') and $alt_allele) or | |
| 146 $bvf->allele_string =~ /insertion/i | |
| 147 ); | |
| 148 } | |
| 149 | |
| 150 # structural variant depends on class | |
| 151 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { | |
| 152 return ( | |
| 153 duplication($bvfoa) or | |
| 154 tandem_duplication($bvfoa) or | |
| 155 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'insertion') or | |
| 156 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /insertion/i) or | |
| 157 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /gain/i) | |
| 158 ); | |
| 159 } | |
| 160 | |
| 161 else { return 0; } | |
| 162 } | |
| 163 | |
| 164 sub copy_number_gain { | |
| 165 my $bvfoa = shift; | |
| 166 | |
| 167 return (duplication($bvfoa) or tandem_duplication($bvfoa) or $bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /gain/i); | |
| 168 } | |
| 169 | |
| 170 sub copy_number_loss { | |
| 171 my $bvfoa = shift; | |
| 172 | |
| 173 return $bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /loss/i; | |
| 174 } | |
| 175 | |
| 176 sub duplication { | |
| 177 my $bvfoa = shift; | |
| 178 | |
| 179 return ( | |
| 180 ( | |
| 181 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'duplication') or | |
| 182 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /duplication/i) | |
| 183 ) and | |
| 184 (not tandem_duplication($bvfoa)) | |
| 185 ); | |
| 186 } | |
| 187 | |
| 188 sub tandem_duplication { | |
| 189 my $bvfoa = shift; | |
| 190 | |
| 191 my $bvf = $bvfoa->base_variation_feature; | |
| 192 | |
| 193 # for sequence variants, check sequence vs ref | |
| 194 if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) { | |
| 195 my ($ref_allele, $alt_allele) = _get_alleles($bvfoa); | |
| 196 | |
| 197 return 0 unless $ref_allele and $alt_allele; | |
| 198 return 0 unless | |
| 199 length($alt_allele) > length($ref_allele) and | |
| 200 length($alt_allele) % length($ref_allele) == 0; | |
| 201 | |
| 202 my $copies = length($alt_allele) / length($ref_allele); | |
| 203 | |
| 204 return $alt_allele eq $ref_allele x $copies; | |
| 205 } | |
| 206 | |
| 207 # structural variant depends on class | |
| 208 if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { | |
| 209 return ( | |
| 210 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'tandem_duplication') or | |
| 211 ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /tandem_duplication/i) | |
| 212 ); | |
| 213 } | |
| 214 } | |
| 215 | |
| 216 sub feature_ablation { | |
| 217 my $bvfoa = shift; | |
| 218 | |
| 219 return (deletion($bvfoa) and complete_overlap_feature($bvfoa)); | |
| 220 } | |
| 221 | |
| 222 sub feature_amplification { | |
| 223 my $bvfoa = shift; | |
| 224 | |
| 225 return (copy_number_gain($bvfoa) && complete_overlap_feature($bvfoa)); | |
| 226 } | |
| 227 | |
| 228 sub feature_elongation { | |
| 229 my $bvfoa = shift; | |
| 230 | |
| 231 return ( | |
| 232 complete_within_feature($bvfoa) and | |
| 233 (copy_number_gain($bvfoa) or insertion($bvfoa)) and | |
| 234 not( | |
| 235 $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele') and | |
| 236 (inframe_insertion($bvfoa) or stop_lost($bvfoa)) | |
| 237 ) | |
| 238 ); | |
| 239 } | |
| 240 | |
| 241 sub feature_truncation { | |
| 242 my $bvfoa = shift; | |
| 243 | |
| 244 return ( | |
| 245 (partial_overlap_feature($bvfoa) or complete_within_feature($bvfoa)) and | |
| 246 (copy_number_loss($bvfoa) or deletion($bvfoa)) and | |
| 247 not( | |
| 248 $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele') and | |
| 249 (inframe_deletion($bvfoa) or stop_gained($bvfoa)) | |
| 250 ) | |
| 251 ); | |
| 252 } | |
| 253 | |
| 254 #sub transcript_fusion { | |
| 255 # #my $bvfoa = shift; | |
| 256 # #my $bvf = $bvfoa->base_variation_feature; | |
| 257 # | |
| 258 # return 0; | |
| 259 # | |
| 260 # #my $transcripts = $bvf->_get_overlapping_Transcripts(); | |
| 261 #} | |
| 262 | |
| 263 sub _before_start { | |
| 264 my ($bvf, $feat, $dist) = @_; | |
| 265 | |
| 266 return ( ($bvf->end >= ($feat->start - $dist)) and | |
| 267 ($bvf->end < $feat->start) ); | |
| 268 } | |
| 269 | |
| 270 sub _after_end { | |
| 271 my ($bvf, $feat, $dist) = @_; | |
| 272 return ( ($bvf->start <= ($feat->end + $dist)) | |
| 273 and ($bvf->start > $feat->end) ); | |
| 274 } | |
| 275 | |
| 276 sub _upstream { | |
| 277 my ($bvf, $feat, $dist) = @_; | |
| 278 return $feat->strand == 1 ? | |
| 279 _before_start($bvf, $feat, $dist) : | |
| 280 _after_end($bvf, $feat, $dist); | |
| 281 } | |
| 282 | |
| 283 sub _downstream { | |
| 284 my ($bvf, $feat, $dist) = @_; | |
| 285 return $feat->strand == 1 ? | |
| 286 _after_end($bvf, $feat, $dist) : | |
| 287 _before_start($bvf, $feat, $dist); | |
| 288 } | |
| 289 | |
| 290 #package Bio::EnsEMBL::Variation::TranscriptVariationAllele; | |
| 291 | |
| 292 sub upstream { | |
| 293 my $vfoa = shift; | |
| 294 my $bvf = $vfoa->base_variation_feature; | |
| 295 my $feat = $vfoa->feature; | |
| 296 | |
| 297 return _upstream($bvf, $feat, $UPSTREAM_DISTANCE); | |
| 298 } | |
| 299 | |
| 300 sub downstream { | |
| 301 my $vfoa = shift; | |
| 302 my $bvf = $vfoa->base_variation_feature; | |
| 303 my $feat = $vfoa->feature; | |
| 304 | |
| 305 return _downstream($bvf, $feat, $DOWNSTREAM_DISTANCE); | |
| 306 } | |
| 307 | |
| 308 sub affects_transcript { | |
| 309 my ($bvf, $tran) = @_; | |
| 310 | |
| 311 return 0 unless $tran->isa('Bio::EnsEMBL::Transcript'); | |
| 312 | |
| 313 return overlap( | |
| 314 $bvf->start, | |
| 315 $bvf->end, | |
| 316 $tran->start - 5000, | |
| 317 $tran->end + 5000 | |
| 318 ); | |
| 319 } | |
| 320 | |
| 321 sub within_transcript { | |
| 322 my $bvfoa = shift; | |
| 323 return within_feature($bvfoa); | |
| 324 } | |
| 325 | |
| 326 sub within_nmd_transcript { | |
| 327 my $bvfoa = shift; | |
| 328 my $tran = $bvfoa->transcript; | |
| 329 | |
| 330 return ( within_transcript($bvfoa) and ($tran->biotype eq 'nonsense_mediated_decay') ); | |
| 331 } | |
| 332 | |
| 333 sub within_non_coding_gene { | |
| 334 my $bvfoa = shift; | |
| 335 my $tran = $bvfoa->transcript; | |
| 336 | |
| 337 return ( within_transcript($bvfoa) and (not $tran->translation) and (not within_mature_miRNA($bvfoa))); | |
| 338 } | |
| 339 | |
| 340 sub non_coding_exon_variant { | |
| 341 my $bvfoa = shift; | |
| 342 | |
| 343 return 0 unless within_non_coding_gene($bvfoa); | |
| 344 | |
| 345 my $bvf = $bvfoa->base_variation_feature; | |
| 346 my $exons = $bvfoa->base_variation_feature_overlap->_exons; | |
| 347 | |
| 348 if(scalar grep {overlap($bvf->start, $bvf->end, $_->start, $_->end)} @$exons) { | |
| 349 return 1; | |
| 350 } | |
| 351 else { | |
| 352 return 0; | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 sub within_miRNA { | |
| 357 my $bvfoa = shift; | |
| 358 my $tran = $bvfoa->transcript; | |
| 359 | |
| 360 # don't call this for now | |
| 361 | |
| 362 return 0; | |
| 363 | |
| 364 return ( within_transcript($bvfoa) and ($tran->biotype eq 'miRNA') ); | |
| 365 } | |
| 366 | |
| 367 sub within_mature_miRNA { | |
| 368 my $bvfoa = shift; | |
| 369 | |
| 370 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 371 my $bvf = $bvfoa->base_variation_feature; | |
| 372 my $tran = $bvfoa->transcript; | |
| 373 | |
| 374 return 0 unless ( within_transcript($bvfoa) and ($tran->biotype eq 'miRNA') ); | |
| 375 | |
| 376 my ($attribute) = @{ $tran->get_all_Attributes('miRNA') }; | |
| 377 | |
| 378 if (defined $attribute && $attribute->value =~ /(\d+)-(\d+)/) { | |
| 379 for my $coord ($bvfo->_mapper->cdna2genomic($1, $2, $tran->strand)) { | |
| 380 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { | |
| 381 if (overlap( | |
| 382 $bvf->start, | |
| 383 $bvf->end, | |
| 384 $coord->start, | |
| 385 $coord->end) ) { | |
| 386 return 1; | |
| 387 } | |
| 388 } | |
| 389 } | |
| 390 } | |
| 391 | |
| 392 return 0; | |
| 393 } | |
| 394 | |
| 395 sub donor_splice_site { | |
| 396 my $bvfoa = shift; | |
| 397 my $tran = $bvfoa->transcript; | |
| 398 | |
| 399 return $tran->strand == 1 ? | |
| 400 $bvfoa->base_variation_feature_overlap->_intron_effects->{start_splice_site} : | |
| 401 $bvfoa->base_variation_feature_overlap->_intron_effects->{end_splice_site}; | |
| 402 } | |
| 403 | |
| 404 sub acceptor_splice_site { | |
| 405 my $bvfoa = shift; | |
| 406 my $tran = $bvfoa->transcript; | |
| 407 | |
| 408 return $tran->strand == 1 ? | |
| 409 $bvfoa->base_variation_feature_overlap->_intron_effects->{end_splice_site} : | |
| 410 $bvfoa->base_variation_feature_overlap->_intron_effects->{start_splice_site}; | |
| 411 } | |
| 412 | |
| 413 sub essential_splice_site { | |
| 414 my $bvfoa = shift; | |
| 415 | |
| 416 return ( acceptor_splice_site($bvfoa) or donor_splice_site($bvfoa) ); | |
| 417 } | |
| 418 | |
| 419 sub splice_region { | |
| 420 my $bvfoa = shift; | |
| 421 | |
| 422 return 0 if donor_splice_site($bvfoa); | |
| 423 return 0 if acceptor_splice_site($bvfoa); | |
| 424 return 0 if essential_splice_site($bvfoa); | |
| 425 | |
| 426 return $bvfoa->base_variation_feature_overlap->_intron_effects->{splice_region}; | |
| 427 } | |
| 428 | |
| 429 sub within_intron { | |
| 430 my $bvfoa = shift; | |
| 431 | |
| 432 return $bvfoa->base_variation_feature_overlap->_intron_effects->{intronic}; | |
| 433 } | |
| 434 | |
| 435 sub within_cds { | |
| 436 my $bvfoa = shift; | |
| 437 my $bvf = $bvfoa->base_variation_feature; | |
| 438 my $tran = $bvfoa->transcript; | |
| 439 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 440 | |
| 441 my $cds_coords = $bvfoa->base_variation_feature_overlap->cds_coords; | |
| 442 | |
| 443 if (@$cds_coords > 0) { | |
| 444 for my $coord (@$cds_coords) { | |
| 445 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { | |
| 446 if ($coord->end > 0 && $coord->start <= length($bvfo->_translateable_seq)) { | |
| 447 return 1; | |
| 448 } | |
| 449 } | |
| 450 } | |
| 451 } | |
| 452 | |
| 453 # we also need to check if the vf is in a frameshift intron within the CDS | |
| 454 | |
| 455 if (defined $tran->translation && | |
| 456 $bvfoa->base_variation_feature_overlap->_intron_effects->{within_frameshift_intron}) { | |
| 457 | |
| 458 return overlap( | |
| 459 $bvf->start, | |
| 460 $bvf->end, | |
| 461 $tran->coding_region_start, | |
| 462 $tran->coding_region_end, | |
| 463 ); | |
| 464 } | |
| 465 | |
| 466 return 0; | |
| 467 } | |
| 468 | |
| 469 sub within_cdna { | |
| 470 my $bvfoa = shift; | |
| 471 my $bvf = $bvfoa->base_variation_feature; | |
| 472 my $tran = $bvfoa->transcript; | |
| 473 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 474 | |
| 475 my $cdna_coords = $bvfo->cdna_coords; | |
| 476 | |
| 477 if (@$cdna_coords > 0) { | |
| 478 for my $coord (@$cdna_coords) { | |
| 479 if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { | |
| 480 if ($coord->end > 0 && $coord->start <= $tran->length) { | |
| 481 return 1; | |
| 482 } | |
| 483 } | |
| 484 } | |
| 485 } | |
| 486 | |
| 487 # we also need to check if the vf is in a frameshift intron within the cDNA | |
| 488 | |
| 489 if ($bvfoa->base_variation_feature_overlap->_intron_effects->{within_frameshift_intron}) { | |
| 490 return within_transcript($bvfoa); | |
| 491 } | |
| 492 | |
| 493 return 0; | |
| 494 } | |
| 495 | |
| 496 sub _before_coding { | |
| 497 my ($bvf, $tran) = @_; | |
| 498 return 0 unless defined $tran->translation; | |
| 499 | |
| 500 my $bvf_s = $bvf->start; | |
| 501 my $bvf_e = $bvf->end; | |
| 502 my $t_s = $tran->start; | |
| 503 my $cds_s = $tran->coding_region_start; | |
| 504 | |
| 505 # we need to special case insertions just before the CDS start | |
| 506 if ($bvf_s == $bvf_e+1 && $bvf_s == $cds_s) { | |
| 507 return 1; | |
| 508 } | |
| 509 | |
| 510 return overlap($bvf_s, $bvf_e, $t_s, $cds_s-1); | |
| 511 } | |
| 512 | |
| 513 sub _after_coding { | |
| 514 my ($bvf, $tran) = @_; | |
| 515 return 0 unless defined $tran->translation; | |
| 516 | |
| 517 my $bvf_s = $bvf->start; | |
| 518 my $bvf_e = $bvf->end; | |
| 519 my $t_e = $tran->end; | |
| 520 my $cds_e = $tran->coding_region_end; | |
| 521 | |
| 522 # we need to special case insertions just after the CDS end | |
| 523 if ($bvf_s == $bvf_e+1 && $bvf_e == $cds_e) { | |
| 524 return 1; | |
| 525 } | |
| 526 | |
| 527 return overlap($bvf_s, $bvf_e, $cds_e+1, $t_e); | |
| 528 } | |
| 529 | |
| 530 sub within_5_prime_utr { | |
| 531 my $bvfoa = shift; | |
| 532 my $bvf = $bvfoa->base_variation_feature; | |
| 533 my $tran = $bvfoa->transcript; | |
| 534 | |
| 535 my $five_prime_of_coding = | |
| 536 $tran->strand == 1 ? | |
| 537 _before_coding($bvf, $tran) : | |
| 538 _after_coding($bvf, $tran); | |
| 539 | |
| 540 return ( $five_prime_of_coding and within_cdna($bvfoa) ); | |
| 541 } | |
| 542 | |
| 543 sub within_3_prime_utr { | |
| 544 my $bvfoa = shift; | |
| 545 my $bvf = $bvfoa->base_variation_feature; | |
| 546 my $tran = $bvfoa->transcript; | |
| 547 | |
| 548 my $three_prime_of_coding = | |
| 549 $tran->strand == 1 ? | |
| 550 _after_coding($bvf, $tran) : | |
| 551 _before_coding($bvf, $tran); | |
| 552 | |
| 553 return ( $three_prime_of_coding and within_cdna($bvfoa) ); | |
| 554 } | |
| 555 | |
| 556 sub complex_indel { | |
| 557 my $bvfoa = shift; | |
| 558 my $bvf = $bvfoa->base_variation_feature; | |
| 559 | |
| 560 # pass the no_db flag to var_class to ensure we don't rely on the database for it | |
| 561 # as it may not have been set at this stage in the pipeline | |
| 562 my $class = $bvf->var_class(1); | |
| 563 | |
| 564 return 0 unless $class =~ /insertion|deletion|indel/; | |
| 565 | |
| 566 return @{ $bvfoa->base_variation_feature_overlap->cds_coords } > 1; | |
| 567 } | |
| 568 | |
| 569 sub _get_peptide_alleles { | |
| 570 my $bvfoa = shift; | |
| 571 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 572 | |
| 573 return () if frameshift($bvfoa); | |
| 574 | |
| 575 my $alt_pep = $bvfoa->peptide; | |
| 576 | |
| 577 return () unless defined $alt_pep; | |
| 578 | |
| 579 my $ref_pep = $bvfo->get_reference_TranscriptVariationAllele->peptide; | |
| 580 | |
| 581 return () unless defined $ref_pep; | |
| 582 | |
| 583 $ref_pep = '' if $ref_pep eq '-'; | |
| 584 $alt_pep = '' if $alt_pep eq '-'; | |
| 585 | |
| 586 return ($ref_pep, $alt_pep); | |
| 587 } | |
| 588 | |
| 589 sub _get_codon_alleles { | |
| 590 my $bvfoa = shift; | |
| 591 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 592 | |
| 593 return () if frameshift($bvfoa); | |
| 594 | |
| 595 my $alt_codon = $bvfoa->codon; | |
| 596 | |
| 597 return () unless defined $alt_codon; | |
| 598 | |
| 599 my $ref_codon = $bvfo->get_reference_TranscriptVariationAllele->codon; | |
| 600 | |
| 601 return () unless defined $ref_codon; | |
| 602 | |
| 603 $ref_codon = '' if $ref_codon eq '-'; | |
| 604 $alt_codon = '' if $alt_codon eq '-'; | |
| 605 | |
| 606 return ($ref_codon, $alt_codon); | |
| 607 } | |
| 608 | |
| 609 sub _get_alleles { | |
| 610 my $bvfoa = shift; | |
| 611 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 612 | |
| 613 my $ref_tva = $bvfo->get_reference_VariationFeatureOverlapAllele; | |
| 614 | |
| 615 return () unless defined ($ref_tva); | |
| 616 | |
| 617 my $ref_allele = $ref_tva->variation_feature_seq; | |
| 618 my $alt_allele = $bvfoa->variation_feature_seq; | |
| 619 | |
| 620 return () unless defined($ref_allele) and defined($alt_allele); | |
| 621 | |
| 622 $ref_allele = '' if $ref_allele eq '-'; | |
| 623 $alt_allele = '' if $alt_allele eq '-'; | |
| 624 | |
| 625 return ($ref_allele, $alt_allele); | |
| 626 } | |
| 627 | |
| 628 sub stop_retained { | |
| 629 my $bvfoa = shift; | |
| 630 | |
| 631 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); | |
| 632 | |
| 633 return 0 unless $ref_pep; | |
| 634 | |
| 635 return ( $alt_pep =~ /\*/ && $ref_pep =~ /\*/ ); | |
| 636 } | |
| 637 | |
| 638 sub affects_start_codon { | |
| 639 my $bvfoa = shift; | |
| 640 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 641 | |
| 642 # sequence variant | |
| 643 if($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptVariation')) { | |
| 644 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); | |
| 645 | |
| 646 return 0 unless $ref_pep; | |
| 647 | |
| 648 return ( ($bvfo->translation_start == 1) and (substr($ref_pep,0,1) ne substr($alt_pep,0,1)) ); | |
| 649 } | |
| 650 | |
| 651 # structural variant | |
| 652 elsif($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariation')) { | |
| 653 my $tr = $bvfo->transcript; | |
| 654 my $bvf = $bvfo->base_variation_feature; | |
| 655 | |
| 656 my ($tr_crs, $tr_cre) = ($tr->coding_region_start, $tr->coding_region_end); | |
| 657 return 0 unless defined($tr_crs) && defined($tr_cre); | |
| 658 | |
| 659 if($tr->strand == 1) { | |
| 660 return overlap($tr_crs, $tr_crs + 2, $bvf->start, $bvf->end); | |
| 661 } | |
| 662 else { | |
| 663 return overlap($tr_cre-2, $tr_cre, $bvf->start, $bvf->end); | |
| 664 } | |
| 665 } | |
| 666 | |
| 667 else { | |
| 668 return 0; | |
| 669 } | |
| 670 } | |
| 671 | |
| 672 sub synonymous_variant { | |
| 673 my $bvfoa = shift; | |
| 674 | |
| 675 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); | |
| 676 | |
| 677 return 0 unless $ref_pep; | |
| 678 | |
| 679 return ( ($alt_pep eq $ref_pep) and (not stop_retained($bvfoa) and ($alt_pep !~ /X/) and ($ref_pep !~ /X/)) ); | |
| 680 } | |
| 681 | |
| 682 sub missense_variant { | |
| 683 my $bvfoa = shift; | |
| 684 | |
| 685 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); | |
| 686 | |
| 687 return 0 unless defined $ref_pep; | |
| 688 | |
| 689 return 0 if affects_start_codon($bvfoa); | |
| 690 return 0 if stop_lost($bvfoa); | |
| 691 return 0 if stop_gained($bvfoa); | |
| 692 return 0 if partial_codon($bvfoa); | |
| 693 | |
| 694 return 0 if inframe_deletion($bvfoa); | |
| 695 return 0 if inframe_insertion($bvfoa); | |
| 696 | |
| 697 return ( $ref_pep ne $alt_pep ); | |
| 698 } | |
| 699 | |
| 700 sub inframe_insertion { | |
| 701 my $bvfoa = shift; | |
| 702 | |
| 703 # sequence variant | |
| 704 if($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) { | |
| 705 my ($ref_codon, $alt_codon) = _get_codon_alleles($bvfoa); | |
| 706 | |
| 707 return 0 unless defined $ref_codon; | |
| 708 | |
| 709 return ( | |
| 710 (length($alt_codon) > length ($ref_codon)) && | |
| 711 ( ($alt_codon =~ /^\Q$ref_codon\E/) || ($alt_codon =~ /\Q$ref_codon\E$/) ) | |
| 712 ); | |
| 713 } | |
| 714 | |
| 715 # structural variant | |
| 716 elsif($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { | |
| 717 | |
| 718 # TO BE DONE, NO WAY OF KNOWING WHAT SEQUENCE IS INSERTED YET | |
| 719 return 0; | |
| 720 | |
| 721 # must be an insertion | |
| 722 return 0 unless insertion($bvfoa); | |
| 723 | |
| 724 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 725 | |
| 726 my $cds_coords = $bvfo->cds_coords; | |
| 727 | |
| 728 if(scalar @$cds_coords == 1) { | |
| 729 | |
| 730 # wholly within exon | |
| 731 if($cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate')) { | |
| 732 1; | |
| 733 } | |
| 734 } | |
| 735 | |
| 736 # variant partially overlaps | |
| 737 else { | |
| 738 return 0; | |
| 739 } | |
| 740 } | |
| 741 | |
| 742 else { | |
| 743 return 0; | |
| 744 } | |
| 745 } | |
| 746 | |
| 747 sub inframe_deletion { | |
| 748 my $bvfoa = shift; | |
| 749 | |
| 750 # sequence variant | |
| 751 if($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) { | |
| 752 my ($ref_codon, $alt_codon) = _get_codon_alleles($bvfoa); | |
| 753 | |
| 754 return 0 unless defined $ref_codon; | |
| 755 | |
| 756 return ( | |
| 757 (length($alt_codon) < length ($ref_codon)) && | |
| 758 ( ($ref_codon =~ /^\Q$alt_codon\E/) || ($ref_codon =~ /\Q$alt_codon\E$/) ) | |
| 759 ); | |
| 760 } | |
| 761 | |
| 762 # structural variant | |
| 763 elsif($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { | |
| 764 | |
| 765 # must be a deletion | |
| 766 return 0 unless deletion($bvfoa); | |
| 767 | |
| 768 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 769 my $cds_coords = $bvfo->cds_coords; | |
| 770 my $exons = $bvfo->_exons; | |
| 771 my $bvf = $bvfo->base_variation_feature; | |
| 772 | |
| 773 # in exon | |
| 774 return ( | |
| 775 scalar @$cds_coords == 1 and | |
| 776 $cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate') and | |
| 777 scalar grep {complete_within_feature($bvfoa, $_)} @$exons and | |
| 778 $bvf->length() % 3 == 0 | |
| 779 ); | |
| 780 } | |
| 781 | |
| 782 else { | |
| 783 return 0; | |
| 784 } | |
| 785 } | |
| 786 | |
| 787 sub stop_gained { | |
| 788 my $bvfoa = shift; | |
| 789 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 790 | |
| 791 return 0 unless $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele'); | |
| 792 | |
| 793 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); | |
| 794 | |
| 795 return 0 unless defined $ref_pep; | |
| 796 | |
| 797 return ( ($alt_pep =~ /\*/) and ($ref_pep !~ /\*/) ); | |
| 798 } | |
| 799 | |
| 800 sub stop_lost { | |
| 801 my $bvfoa = shift; | |
| 802 | |
| 803 # sequence variant | |
| 804 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) { | |
| 805 my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); | |
| 806 | |
| 807 return 0 unless defined $ref_pep; | |
| 808 | |
| 809 return ( ($alt_pep !~ /\*/) and ($ref_pep =~ /\*/) ); | |
| 810 } | |
| 811 | |
| 812 # structural variant | |
| 813 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) { | |
| 814 return 0 unless deletion($bvfoa); | |
| 815 | |
| 816 my $tr = $bvfoa->transcript; | |
| 817 my $bvf = $bvfoa->base_variation_feature; | |
| 818 | |
| 819 my ($tr_crs, $tr_cre) = ($tr->coding_region_start, $tr->coding_region_end); | |
| 820 return 0 unless defined($tr_crs) && defined($tr_cre); | |
| 821 | |
| 822 if($tr->strand == 1) { | |
| 823 return overlap($tr_cre-2, $tr_cre, $bvf->start, $bvf->end); | |
| 824 } | |
| 825 else { | |
| 826 return overlap($tr_crs, $tr_crs + 2, $bvf->start, $bvf->end); | |
| 827 } | |
| 828 } | |
| 829 | |
| 830 else { | |
| 831 return 0; | |
| 832 } | |
| 833 } | |
| 834 | |
| 835 sub frameshift { | |
| 836 my $bvfoa = shift; | |
| 837 | |
| 838 # sequence variant | |
| 839 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) { | |
| 840 | |
| 841 return 0 if partial_codon($bvfoa); | |
| 842 | |
| 843 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 844 | |
| 845 return 0 unless defined $bvfo->cds_start && defined $bvfo->cds_end; | |
| 846 | |
| 847 my $var_len = $bvfo->cds_end - $bvfo->cds_start + 1; | |
| 848 | |
| 849 my $allele_len = $bvfoa->seq_length; | |
| 850 | |
| 851 # if the allele length is undefined then we can't call a frameshift | |
| 852 | |
| 853 return 0 unless defined $allele_len; | |
| 854 | |
| 855 return abs( $allele_len - $var_len ) % 3; | |
| 856 } | |
| 857 | |
| 858 # structural variant | |
| 859 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) { | |
| 860 my $bvf = $bvfoa->base_variation_feature; | |
| 861 my $exons = $bvfoa->base_variation_feature_overlap->_exons; | |
| 862 | |
| 863 return ( | |
| 864 ( | |
| 865 deletion($bvfoa) or | |
| 866 copy_number_loss($bvfoa) | |
| 867 ) and | |
| 868 scalar grep {complete_within_feature($bvfoa, $_)} @$exons and | |
| 869 $bvf->length % 3 != 0 | |
| 870 ); | |
| 871 | |
| 872 # TODO INSERTIONS | |
| 873 } | |
| 874 | |
| 875 else { | |
| 876 return 0; | |
| 877 } | |
| 878 } | |
| 879 | |
| 880 sub partial_codon { | |
| 881 my $bvfoa = shift; | |
| 882 | |
| 883 my $bvfo = $bvfoa->base_variation_feature_overlap; | |
| 884 | |
| 885 return 0 unless defined $bvfo->translation_start; | |
| 886 | |
| 887 my $cds_length = length $bvfo->_translateable_seq; | |
| 888 | |
| 889 my $codon_cds_start = ($bvfo->translation_start * 3) - 2; | |
| 890 | |
| 891 my $last_codon_length = $cds_length - ($codon_cds_start - 1); | |
| 892 | |
| 893 return ( $last_codon_length < 3 and $last_codon_length > 0 ); | |
| 894 } | |
| 895 | |
| 896 sub coding_unknown { | |
| 897 my $bvfoa = shift; | |
| 898 | |
| 899 # sequence variant | |
| 900 if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) { | |
| 901 return (within_cds($bvfoa) and ((not $bvfoa->peptide) or (not _get_peptide_alleles($bvfoa)) or ($bvfoa->peptide =~ /X/)) and (not (frameshift($bvfoa) or inframe_deletion($bvfoa)))); | |
| 902 } | |
| 903 | |
| 904 # structural variant | |
| 905 elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) { | |
| 906 return (within_cds($bvfoa) and not(inframe_insertion($bvfoa) or inframe_deletion($bvfoa) or frameshift($bvfoa))); | |
| 907 } | |
| 908 | |
| 909 else { | |
| 910 return 0; | |
| 911 } | |
| 912 } | |
| 913 | |
| 914 #package Bio::EnsEMBL::Variation::RegulatoryFeatureVariationAllele; | |
| 915 | |
| 916 sub within_regulatory_feature { | |
| 917 my $rfva = shift; | |
| 918 return within_feature($rfva); | |
| 919 } | |
| 920 | |
| 921 #package Bio::EnsEMBL::Variation::ExternalFeatureVariationAllele; | |
| 922 | |
| 923 sub within_external_feature { | |
| 924 my $efva = shift; | |
| 925 return (within_feature($efva) and (not within_miRNA_target_site($efva))); | |
| 926 } | |
| 927 | |
| 928 #sub within_miRNA_target_site { | |
| 929 # my $efva = shift; | |
| 930 # | |
| 931 # my $fset = $efva->variation_feature_overlap->feature->feature_set; | |
| 932 # | |
| 933 # return ($fset && $fset->name eq 'miRanda miRNA targets'); | |
| 934 #} | |
| 935 | |
| 936 #package Bio::EnsEMBL::Variation::MotifFeatureVariationAllele; | |
| 937 | |
| 938 #sub within_motif_feature { | |
| 939 # my $mfva = shift; | |
| 940 # return ( | |
| 941 # within_feature($mfva) and | |
| 942 # !increased_binding_affinity($mfva) and | |
| 943 # !decreased_binding_affinity($mfva) | |
| 944 # ); | |
| 945 #} | |
| 946 | |
| 947 sub within_motif_feature { | |
| 948 my $mfva = shift; | |
| 949 return within_feature($mfva); | |
| 950 } | |
| 951 | |
| 952 #sub increased_binding_affinity { | |
| 953 # my $mfva = shift; | |
| 954 # my $change = $mfva->binding_affinity_change; | |
| 955 # return (within_feature($mfva) and (defined $change) and ($change > 0)); | |
| 956 #} | |
| 957 # | |
| 958 #sub decreased_binding_affinity { | |
| 959 # my $mfva = shift; | |
| 960 # my $change = $mfva->binding_affinity_change; | |
| 961 # return (within_feature($mfva) and (defined $change) and ($change < 0)); | |
| 962 #} | |
| 963 | |
| 964 sub contains_entire_feature { | |
| 965 my $vfo = shift; | |
| 966 | |
| 967 my $bvf = $vfo->base_variation_feature; | |
| 968 my $feat = $vfo->feature; | |
| 969 | |
| 970 return ( ($bvf->start <= $feat->start) && ($bvf->end >= $feat->end) ); | |
| 971 } | |
| 972 | |
| 973 1; | |
| 974 |
