Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Variation/Utils/VariationEffect.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/Utils/VariationEffect.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,974 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::Variation::Utils::VariationEffect + +=head1 DESCRIPTION + +This module defines a set of predicate subroutines that check the effect of a +Bio::EnsEMBL::Variation::VariationFeature on some other Bio::EnsEMBL::Feature. +All of these predicates take a VariationFeatureOverlapAllele as their first and +only argument and return a true or false value depending on whether the effect +being checked for holds or not. The link between these predicates and the +specific effect is configured in the Bio::EnsEMBL::Variation::Utils::Config +module and a list of OverlapConsequence objects that represent a link between, +for example, a Sequence Ontology consequence term, and the predicate that +checks for it is provided in the Bio::EnsEMBL::Variation::Utils::Constants +module. If you want to add a new consequence you should write a predicate in +this module and then add an entry in the configuration file. + +=cut + +package Bio::EnsEMBL::Variation::Utils::VariationEffect; + +use strict; +use warnings; + +use base qw(Exporter); + +our @EXPORT_OK = qw(overlap within_cds MAX_DISTANCE_FROM_TRANSCRIPT within_intron stop_lost affects_start_codon $UPSTREAM_DISTANCE $DOWNSTREAM_DISTANCE); + +use constant MAX_DISTANCE_FROM_TRANSCRIPT => 5000; + +our $UPSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT; +our $DOWNSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT; + +#package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele; + +sub overlap { + my ( $f1_start, $f1_end, $f2_start, $f2_end ) = @_; + + return ( ($f1_end >= $f2_start) and ($f1_start <= $f2_end) ); +} + +sub within_feature { + my ($bvfoa, $feat) = @_; + my $bvf = $bvfoa->base_variation_feature; + $feat = $bvfoa->feature unless defined($feat); + + return overlap( + $bvf->start, + $bvf->end, + $feat->start, + $feat->end + ); +} + +sub partial_overlap_feature { + my ($bvfoa, $feat) = @_; + my $bvf = $bvfoa->base_variation_feature; + $feat = $bvfoa->feature unless defined($feat); + + return ( + within_feature(@_) and + (not complete_overlap_feature(@_)) and + (($bvf->end > $feat->end) or ($bvf->start < $feat->start)) + ); +} + +sub complete_within_feature { + my ($bvfoa, $feat) = @_; + my $bvf = $bvfoa->base_variation_feature; + $feat = $bvfoa->feature unless defined($feat); + + return ( + ($bvf->start >= $feat->start) and + ($bvf->end <= $feat->end) + ); +} + +sub complete_overlap_feature { + my ($bvfoa, $feat) = @_; + my $bvf = $bvfoa->base_variation_feature; + $feat = $bvfoa->feature unless defined($feat); + + return ( + ($bvf->start <= $feat->start) and + ($bvf->end >= $feat->end) + ); +} + +sub deletion { + my $bvfoa = shift; + + my $bvf = $bvfoa->base_variation_feature; + + # sequence variant will have alleles + if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) { + my ($ref_allele, $alt_allele) = _get_alleles($bvfoa); + return ( + (defined($ref_allele) && ($alt_allele eq '') and $ref_allele) or + $bvf->allele_string =~ /deletion/i + ); + } + + # structural variant depends on class + if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { + return ( + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'deletion') or + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /deletion/i) or + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /loss/i) + ); + } + + else { return 0; } +} + +sub insertion { + my $bvfoa = shift; + + my $bvf = $bvfoa->base_variation_feature; + + # sequence variant will have alleles + if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) { + my ($ref_allele, $alt_allele) = _get_alleles($bvfoa); + return ( + (defined($ref_allele) && ($ref_allele eq '') and $alt_allele) or + $bvf->allele_string =~ /insertion/i + ); + } + + # structural variant depends on class + if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { + return ( + duplication($bvfoa) or + tandem_duplication($bvfoa) or + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'insertion') or + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /insertion/i) or + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /gain/i) + ); + } + + else { return 0; } +} + +sub copy_number_gain { + my $bvfoa = shift; + + return (duplication($bvfoa) or tandem_duplication($bvfoa) or $bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /gain/i); +} + +sub copy_number_loss { + my $bvfoa = shift; + + return $bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /loss/i; +} + +sub duplication { + my $bvfoa = shift; + + return ( + ( + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'duplication') or + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /duplication/i) + ) and + (not tandem_duplication($bvfoa)) + ); +} + +sub tandem_duplication { + my $bvfoa = shift; + + my $bvf = $bvfoa->base_variation_feature; + + # for sequence variants, check sequence vs ref + if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) { + my ($ref_allele, $alt_allele) = _get_alleles($bvfoa); + + return 0 unless $ref_allele and $alt_allele; + return 0 unless + length($alt_allele) > length($ref_allele) and + length($alt_allele) % length($ref_allele) == 0; + + my $copies = length($alt_allele) / length($ref_allele); + + return $alt_allele eq $ref_allele x $copies; + } + + # structural variant depends on class + if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { + return ( + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) eq 'tandem_duplication') or + ($bvfoa->base_variation_feature->class_SO_term(undef, 1) =~ /tandem_duplication/i) + ); + } +} + +sub feature_ablation { + my $bvfoa = shift; + + return (deletion($bvfoa) and complete_overlap_feature($bvfoa)); +} + +sub feature_amplification { + my $bvfoa = shift; + + return (copy_number_gain($bvfoa) && complete_overlap_feature($bvfoa)); +} + +sub feature_elongation { + my $bvfoa = shift; + + return ( + complete_within_feature($bvfoa) and + (copy_number_gain($bvfoa) or insertion($bvfoa)) and + not( + $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele') and + (inframe_insertion($bvfoa) or stop_lost($bvfoa)) + ) + ); +} + +sub feature_truncation { + my $bvfoa = shift; + + return ( + (partial_overlap_feature($bvfoa) or complete_within_feature($bvfoa)) and + (copy_number_loss($bvfoa) or deletion($bvfoa)) and + not( + $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele') and + (inframe_deletion($bvfoa) or stop_gained($bvfoa)) + ) + ); +} + +#sub transcript_fusion { +# #my $bvfoa = shift; +# #my $bvf = $bvfoa->base_variation_feature; +# +# return 0; +# +# #my $transcripts = $bvf->_get_overlapping_Transcripts(); +#} + +sub _before_start { + my ($bvf, $feat, $dist) = @_; + + return ( ($bvf->end >= ($feat->start - $dist)) and + ($bvf->end < $feat->start) ); +} + +sub _after_end { + my ($bvf, $feat, $dist) = @_; + return ( ($bvf->start <= ($feat->end + $dist)) + and ($bvf->start > $feat->end) ); +} + +sub _upstream { + my ($bvf, $feat, $dist) = @_; + return $feat->strand == 1 ? + _before_start($bvf, $feat, $dist) : + _after_end($bvf, $feat, $dist); +} + +sub _downstream { + my ($bvf, $feat, $dist) = @_; + return $feat->strand == 1 ? + _after_end($bvf, $feat, $dist) : + _before_start($bvf, $feat, $dist); +} + +#package Bio::EnsEMBL::Variation::TranscriptVariationAllele; + +sub upstream { + my $vfoa = shift; + my $bvf = $vfoa->base_variation_feature; + my $feat = $vfoa->feature; + + return _upstream($bvf, $feat, $UPSTREAM_DISTANCE); +} + +sub downstream { + my $vfoa = shift; + my $bvf = $vfoa->base_variation_feature; + my $feat = $vfoa->feature; + + return _downstream($bvf, $feat, $DOWNSTREAM_DISTANCE); +} + +sub affects_transcript { + my ($bvf, $tran) = @_; + + return 0 unless $tran->isa('Bio::EnsEMBL::Transcript'); + + return overlap( + $bvf->start, + $bvf->end, + $tran->start - 5000, + $tran->end + 5000 + ); +} + +sub within_transcript { + my $bvfoa = shift; + return within_feature($bvfoa); +} + +sub within_nmd_transcript { + my $bvfoa = shift; + my $tran = $bvfoa->transcript; + + return ( within_transcript($bvfoa) and ($tran->biotype eq 'nonsense_mediated_decay') ); +} + +sub within_non_coding_gene { + my $bvfoa = shift; + my $tran = $bvfoa->transcript; + + return ( within_transcript($bvfoa) and (not $tran->translation) and (not within_mature_miRNA($bvfoa))); +} + +sub non_coding_exon_variant { + my $bvfoa = shift; + + return 0 unless within_non_coding_gene($bvfoa); + + my $bvf = $bvfoa->base_variation_feature; + my $exons = $bvfoa->base_variation_feature_overlap->_exons; + + if(scalar grep {overlap($bvf->start, $bvf->end, $_->start, $_->end)} @$exons) { + return 1; + } + else { + return 0; + } +} + +sub within_miRNA { + my $bvfoa = shift; + my $tran = $bvfoa->transcript; + + # don't call this for now + + return 0; + + return ( within_transcript($bvfoa) and ($tran->biotype eq 'miRNA') ); +} + +sub within_mature_miRNA { + my $bvfoa = shift; + + my $bvfo = $bvfoa->base_variation_feature_overlap; + my $bvf = $bvfoa->base_variation_feature; + my $tran = $bvfoa->transcript; + + return 0 unless ( within_transcript($bvfoa) and ($tran->biotype eq 'miRNA') ); + + my ($attribute) = @{ $tran->get_all_Attributes('miRNA') }; + + if (defined $attribute && $attribute->value =~ /(\d+)-(\d+)/) { + for my $coord ($bvfo->_mapper->cdna2genomic($1, $2, $tran->strand)) { + if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { + if (overlap( + $bvf->start, + $bvf->end, + $coord->start, + $coord->end) ) { + return 1; + } + } + } + } + + return 0; +} + +sub donor_splice_site { + my $bvfoa = shift; + my $tran = $bvfoa->transcript; + + return $tran->strand == 1 ? + $bvfoa->base_variation_feature_overlap->_intron_effects->{start_splice_site} : + $bvfoa->base_variation_feature_overlap->_intron_effects->{end_splice_site}; +} + +sub acceptor_splice_site { + my $bvfoa = shift; + my $tran = $bvfoa->transcript; + + return $tran->strand == 1 ? + $bvfoa->base_variation_feature_overlap->_intron_effects->{end_splice_site} : + $bvfoa->base_variation_feature_overlap->_intron_effects->{start_splice_site}; +} + +sub essential_splice_site { + my $bvfoa = shift; + + return ( acceptor_splice_site($bvfoa) or donor_splice_site($bvfoa) ); +} + +sub splice_region { + my $bvfoa = shift; + + return 0 if donor_splice_site($bvfoa); + return 0 if acceptor_splice_site($bvfoa); + return 0 if essential_splice_site($bvfoa); + + return $bvfoa->base_variation_feature_overlap->_intron_effects->{splice_region}; +} + +sub within_intron { + my $bvfoa = shift; + + return $bvfoa->base_variation_feature_overlap->_intron_effects->{intronic}; +} + +sub within_cds { + my $bvfoa = shift; + my $bvf = $bvfoa->base_variation_feature; + my $tran = $bvfoa->transcript; + my $bvfo = $bvfoa->base_variation_feature_overlap; + + my $cds_coords = $bvfoa->base_variation_feature_overlap->cds_coords; + + if (@$cds_coords > 0) { + for my $coord (@$cds_coords) { + if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { + if ($coord->end > 0 && $coord->start <= length($bvfo->_translateable_seq)) { + return 1; + } + } + } + } + + # we also need to check if the vf is in a frameshift intron within the CDS + + if (defined $tran->translation && + $bvfoa->base_variation_feature_overlap->_intron_effects->{within_frameshift_intron}) { + + return overlap( + $bvf->start, + $bvf->end, + $tran->coding_region_start, + $tran->coding_region_end, + ); + } + + return 0; +} + +sub within_cdna { + my $bvfoa = shift; + my $bvf = $bvfoa->base_variation_feature; + my $tran = $bvfoa->transcript; + my $bvfo = $bvfoa->base_variation_feature_overlap; + + my $cdna_coords = $bvfo->cdna_coords; + + if (@$cdna_coords > 0) { + for my $coord (@$cdna_coords) { + if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { + if ($coord->end > 0 && $coord->start <= $tran->length) { + return 1; + } + } + } + } + + # we also need to check if the vf is in a frameshift intron within the cDNA + + if ($bvfoa->base_variation_feature_overlap->_intron_effects->{within_frameshift_intron}) { + return within_transcript($bvfoa); + } + + return 0; +} + +sub _before_coding { + my ($bvf, $tran) = @_; + return 0 unless defined $tran->translation; + + my $bvf_s = $bvf->start; + my $bvf_e = $bvf->end; + my $t_s = $tran->start; + my $cds_s = $tran->coding_region_start; + + # we need to special case insertions just before the CDS start + if ($bvf_s == $bvf_e+1 && $bvf_s == $cds_s) { + return 1; + } + + return overlap($bvf_s, $bvf_e, $t_s, $cds_s-1); +} + +sub _after_coding { + my ($bvf, $tran) = @_; + return 0 unless defined $tran->translation; + + my $bvf_s = $bvf->start; + my $bvf_e = $bvf->end; + my $t_e = $tran->end; + my $cds_e = $tran->coding_region_end; + + # we need to special case insertions just after the CDS end + if ($bvf_s == $bvf_e+1 && $bvf_e == $cds_e) { + return 1; + } + + return overlap($bvf_s, $bvf_e, $cds_e+1, $t_e); +} + +sub within_5_prime_utr { + my $bvfoa = shift; + my $bvf = $bvfoa->base_variation_feature; + my $tran = $bvfoa->transcript; + + my $five_prime_of_coding = + $tran->strand == 1 ? + _before_coding($bvf, $tran) : + _after_coding($bvf, $tran); + + return ( $five_prime_of_coding and within_cdna($bvfoa) ); +} + +sub within_3_prime_utr { + my $bvfoa = shift; + my $bvf = $bvfoa->base_variation_feature; + my $tran = $bvfoa->transcript; + + my $three_prime_of_coding = + $tran->strand == 1 ? + _after_coding($bvf, $tran) : + _before_coding($bvf, $tran); + + return ( $three_prime_of_coding and within_cdna($bvfoa) ); +} + +sub complex_indel { + my $bvfoa = shift; + my $bvf = $bvfoa->base_variation_feature; + + # pass the no_db flag to var_class to ensure we don't rely on the database for it + # as it may not have been set at this stage in the pipeline + my $class = $bvf->var_class(1); + + return 0 unless $class =~ /insertion|deletion|indel/; + + return @{ $bvfoa->base_variation_feature_overlap->cds_coords } > 1; +} + +sub _get_peptide_alleles { + my $bvfoa = shift; + my $bvfo = $bvfoa->base_variation_feature_overlap; + + return () if frameshift($bvfoa); + + my $alt_pep = $bvfoa->peptide; + + return () unless defined $alt_pep; + + my $ref_pep = $bvfo->get_reference_TranscriptVariationAllele->peptide; + + return () unless defined $ref_pep; + + $ref_pep = '' if $ref_pep eq '-'; + $alt_pep = '' if $alt_pep eq '-'; + + return ($ref_pep, $alt_pep); +} + +sub _get_codon_alleles { + my $bvfoa = shift; + my $bvfo = $bvfoa->base_variation_feature_overlap; + + return () if frameshift($bvfoa); + + my $alt_codon = $bvfoa->codon; + + return () unless defined $alt_codon; + + my $ref_codon = $bvfo->get_reference_TranscriptVariationAllele->codon; + + return () unless defined $ref_codon; + + $ref_codon = '' if $ref_codon eq '-'; + $alt_codon = '' if $alt_codon eq '-'; + + return ($ref_codon, $alt_codon); +} + +sub _get_alleles { + my $bvfoa = shift; + my $bvfo = $bvfoa->base_variation_feature_overlap; + + my $ref_tva = $bvfo->get_reference_VariationFeatureOverlapAllele; + + return () unless defined ($ref_tva); + + my $ref_allele = $ref_tva->variation_feature_seq; + my $alt_allele = $bvfoa->variation_feature_seq; + + return () unless defined($ref_allele) and defined($alt_allele); + + $ref_allele = '' if $ref_allele eq '-'; + $alt_allele = '' if $alt_allele eq '-'; + + return ($ref_allele, $alt_allele); +} + +sub stop_retained { + my $bvfoa = shift; + + my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); + + return 0 unless $ref_pep; + + return ( $alt_pep =~ /\*/ && $ref_pep =~ /\*/ ); +} + +sub affects_start_codon { + my $bvfoa = shift; + my $bvfo = $bvfoa->base_variation_feature_overlap; + + # sequence variant + if($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptVariation')) { + my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); + + return 0 unless $ref_pep; + + return ( ($bvfo->translation_start == 1) and (substr($ref_pep,0,1) ne substr($alt_pep,0,1)) ); + } + + # structural variant + elsif($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariation')) { + my $tr = $bvfo->transcript; + my $bvf = $bvfo->base_variation_feature; + + my ($tr_crs, $tr_cre) = ($tr->coding_region_start, $tr->coding_region_end); + return 0 unless defined($tr_crs) && defined($tr_cre); + + if($tr->strand == 1) { + return overlap($tr_crs, $tr_crs + 2, $bvf->start, $bvf->end); + } + else { + return overlap($tr_cre-2, $tr_cre, $bvf->start, $bvf->end); + } + } + + else { + return 0; + } +} + +sub synonymous_variant { + my $bvfoa = shift; + + my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); + + return 0 unless $ref_pep; + + return ( ($alt_pep eq $ref_pep) and (not stop_retained($bvfoa) and ($alt_pep !~ /X/) and ($ref_pep !~ /X/)) ); +} + +sub missense_variant { + my $bvfoa = shift; + + my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); + + return 0 unless defined $ref_pep; + + return 0 if affects_start_codon($bvfoa); + return 0 if stop_lost($bvfoa); + return 0 if stop_gained($bvfoa); + return 0 if partial_codon($bvfoa); + + return 0 if inframe_deletion($bvfoa); + return 0 if inframe_insertion($bvfoa); + + return ( $ref_pep ne $alt_pep ); +} + +sub inframe_insertion { + my $bvfoa = shift; + + # sequence variant + if($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) { + my ($ref_codon, $alt_codon) = _get_codon_alleles($bvfoa); + + return 0 unless defined $ref_codon; + + return ( + (length($alt_codon) > length ($ref_codon)) && + ( ($alt_codon =~ /^\Q$ref_codon\E/) || ($alt_codon =~ /\Q$ref_codon\E$/) ) + ); + } + + # structural variant + elsif($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { + + # TO BE DONE, NO WAY OF KNOWING WHAT SEQUENCE IS INSERTED YET + return 0; + + # must be an insertion + return 0 unless insertion($bvfoa); + + my $bvfo = $bvfoa->base_variation_feature_overlap; + + my $cds_coords = $bvfo->cds_coords; + + if(scalar @$cds_coords == 1) { + + # wholly within exon + if($cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate')) { + 1; + } + } + + # variant partially overlaps + else { + return 0; + } + } + + else { + return 0; + } +} + +sub inframe_deletion { + my $bvfoa = shift; + + # sequence variant + if($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) { + my ($ref_codon, $alt_codon) = _get_codon_alleles($bvfoa); + + return 0 unless defined $ref_codon; + + return ( + (length($alt_codon) < length ($ref_codon)) && + ( ($ref_codon =~ /^\Q$alt_codon\E/) || ($ref_codon =~ /\Q$alt_codon\E$/) ) + ); + } + + # structural variant + elsif($bvfoa->base_variation_feature->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) { + + # must be a deletion + return 0 unless deletion($bvfoa); + + my $bvfo = $bvfoa->base_variation_feature_overlap; + my $cds_coords = $bvfo->cds_coords; + my $exons = $bvfo->_exons; + my $bvf = $bvfo->base_variation_feature; + + # in exon + return ( + scalar @$cds_coords == 1 and + $cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate') and + scalar grep {complete_within_feature($bvfoa, $_)} @$exons and + $bvf->length() % 3 == 0 + ); + } + + else { + return 0; + } +} + +sub stop_gained { + my $bvfoa = shift; + my $bvfo = $bvfoa->base_variation_feature_overlap; + + return 0 unless $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele'); + + my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); + + return 0 unless defined $ref_pep; + + return ( ($alt_pep =~ /\*/) and ($ref_pep !~ /\*/) ); +} + +sub stop_lost { + my $bvfoa = shift; + + # sequence variant + if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) { + my ($ref_pep, $alt_pep) = _get_peptide_alleles($bvfoa); + + return 0 unless defined $ref_pep; + + return ( ($alt_pep !~ /\*/) and ($ref_pep =~ /\*/) ); + } + + # structural variant + elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) { + return 0 unless deletion($bvfoa); + + my $tr = $bvfoa->transcript; + my $bvf = $bvfoa->base_variation_feature; + + my ($tr_crs, $tr_cre) = ($tr->coding_region_start, $tr->coding_region_end); + return 0 unless defined($tr_crs) && defined($tr_cre); + + if($tr->strand == 1) { + return overlap($tr_cre-2, $tr_cre, $bvf->start, $bvf->end); + } + else { + return overlap($tr_crs, $tr_crs + 2, $bvf->start, $bvf->end); + } + } + + else { + return 0; + } +} + +sub frameshift { + my $bvfoa = shift; + + # sequence variant + if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) { + + return 0 if partial_codon($bvfoa); + + my $bvfo = $bvfoa->base_variation_feature_overlap; + + return 0 unless defined $bvfo->cds_start && defined $bvfo->cds_end; + + my $var_len = $bvfo->cds_end - $bvfo->cds_start + 1; + + my $allele_len = $bvfoa->seq_length; + + # if the allele length is undefined then we can't call a frameshift + + return 0 unless defined $allele_len; + + return abs( $allele_len - $var_len ) % 3; + } + + # structural variant + elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) { + my $bvf = $bvfoa->base_variation_feature; + my $exons = $bvfoa->base_variation_feature_overlap->_exons; + + return ( + ( + deletion($bvfoa) or + copy_number_loss($bvfoa) + ) and + scalar grep {complete_within_feature($bvfoa, $_)} @$exons and + $bvf->length % 3 != 0 + ); + + # TODO INSERTIONS + } + + else { + return 0; + } +} + +sub partial_codon { + my $bvfoa = shift; + + my $bvfo = $bvfoa->base_variation_feature_overlap; + + return 0 unless defined $bvfo->translation_start; + + my $cds_length = length $bvfo->_translateable_seq; + + my $codon_cds_start = ($bvfo->translation_start * 3) - 2; + + my $last_codon_length = $cds_length - ($codon_cds_start - 1); + + return ( $last_codon_length < 3 and $last_codon_length > 0 ); +} + +sub coding_unknown { + my $bvfoa = shift; + + # sequence variant + if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) { + return (within_cds($bvfoa) and ((not $bvfoa->peptide) or (not _get_peptide_alleles($bvfoa)) or ($bvfoa->peptide =~ /X/)) and (not (frameshift($bvfoa) or inframe_deletion($bvfoa)))); + } + + # structural variant + elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) { + return (within_cds($bvfoa) and not(inframe_insertion($bvfoa) or inframe_deletion($bvfoa) or frameshift($bvfoa))); + } + + else { + return 0; + } +} + +#package Bio::EnsEMBL::Variation::RegulatoryFeatureVariationAllele; + +sub within_regulatory_feature { + my $rfva = shift; + return within_feature($rfva); +} + +#package Bio::EnsEMBL::Variation::ExternalFeatureVariationAllele; + +sub within_external_feature { + my $efva = shift; + return (within_feature($efva) and (not within_miRNA_target_site($efva))); +} + +#sub within_miRNA_target_site { +# my $efva = shift; +# +# my $fset = $efva->variation_feature_overlap->feature->feature_set; +# +# return ($fset && $fset->name eq 'miRanda miRNA targets'); +#} + +#package Bio::EnsEMBL::Variation::MotifFeatureVariationAllele; + +#sub within_motif_feature { +# my $mfva = shift; +# return ( +# within_feature($mfva) and +# !increased_binding_affinity($mfva) and +# !decreased_binding_affinity($mfva) +# ); +#} + +sub within_motif_feature { + my $mfva = shift; + return within_feature($mfva); +} + +#sub increased_binding_affinity { +# my $mfva = shift; +# my $change = $mfva->binding_affinity_change; +# return (within_feature($mfva) and (defined $change) and ($change > 0)); +#} +# +#sub decreased_binding_affinity { +# my $mfva = shift; +# my $change = $mfva->binding_affinity_change; +# return (within_feature($mfva) and (defined $change) and ($change < 0)); +#} + +sub contains_entire_feature { + my $vfo = shift; + + my $bvf = $vfo->base_variation_feature; + my $feat = $vfo->feature; + + return ( ($bvf->start <= $feat->start) && ($bvf->end >= $feat->end) ); +} + +1; +
