diff variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlapAllele.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlapAllele.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,360 @@
+=head1 LICENSE
+
+ Copyright (c) 1999-2012 The European Bioinformatics Institute and
+ Genome Research Limited.  All rights reserved.
+
+ This software is distributed under a modified Apache license.
+ For license details, please see
+
+   http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+ Please email comments or questions to the public Ensembl
+ developers list at <dev@ensembl.org>.
+
+ Questions may also be sent to the Ensembl help desk at
+ <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele
+
+=head1 SYNOPSIS
+
+    use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
+    
+    my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
+        -variation_feature_overlap  => $vfo,
+        -variation_feature_seq      => 'A',
+        -is_reference               => 0,
+    );
+
+    print "sequence with respect to the feature: ", $vfoa->feature_seq, "\n";
+    print "sequence with respect to the variation feature: ", $vfoa->variation_feature_seq, "\n";
+    print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $vfoa->get_all_OverlapConsequences }), "\n";
+
+=head1 DESCRIPTION
+
+A VariationFeatureOverlapAllele object represents a single allele of a 
+VariationFeatureOverlap. It is the super-class of various feature-specific allele
+classes such as TranscriptVariationAllele and RegulatoryFeatureVariationAllele and 
+contains methods not specific to any particular feature type. Ordinarily you will 
+not create these objects yourself, but instead you would create e.g. a 
+TranscriptVariation object which will then create VariationFeatureOverlapAlleles 
+based on the allele string of the associated VariationFeature. 
+
+=cut
+
+package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
+use Bio::EnsEMBL::Utils::Exception qw(throw);
+use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
+
+use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlapAllele);
+
+our $UNAMBIGUOUS_NUCLEOTIDES = qr/^[ACGT-]+$/i;
+
+our $ALL_NUCLEOTIDES = qr/^[ACGTUMRWSYKVHDBXN-]+$/i;
+
+our $SPECIFIED_LENGTH = qr /(\d+) BP (INSERTION|DELETION)/i;
+
+=head2 new
+
+  Arg [-VARIATION_FEATURE_OVERLAP] : 
+    The Bio::EnsEMBL::VariationFeatureOverlap with which this allele is 
+    associated
+
+  Arg [-VARIATION_FEATURE_SEQ] :
+    The allele sequence with respect to the associated VariationFeature
+
+  Arg [-IS_REFERENCE] :
+    A flag indicating if this allele is the reference allele or not
+
+  Example : 
+    my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
+        -variation_feature_ovelap   => $vfo,
+        -variation_feature_seq      => 'A',
+        -is_reference               => 0
+    );
+
+  Description: Constructs a new VariationFeatureOverlapAllele instance given a 
+               VariationFeatureOverlap and the sequence of the allele
+  Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance 
+  Exceptions : throws unless both VARIATION_FEATURE_OVERLAP and VARIATION_FEATURE_SEQ 
+               are supplied
+  Status     : At Risk
+
+=cut 
+
+sub new {
+    my $class = shift;
+
+    my %args = @_;
+
+    # swap a '-variation_feature_overlap' argument for a '-base_variation_feature_overlap'
+    # and a '-variation_feature' for a '-base_variation_feature' for the superclass
+
+    for my $arg (keys %args) {
+        if (lc($arg) eq '-variation_feature_overlap') {
+            $args{'-base_variation_feature_overlap'} = delete $args{$arg};
+        }
+    }
+
+    my $self = $class->SUPER::new(%args);
+
+    assert_ref($self->base_variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
+
+    my (
+        $variation_feature_seq, 
+    ) = rearrange([qw(
+            VARIATION_FEATURE_SEQ 
+        )], %args);
+
+
+    throw("Allele sequence required (variation "+$self->variation_feature->variation_name+")") 
+        unless $variation_feature_seq;
+
+    $self->{variation_feature_seq} = $variation_feature_seq;
+
+    return $self;
+}
+
+sub new_fast {
+    my ($class, $hashref) = @_;
+    
+    # swap a variation_feature_overlap argument for a base_variation_feature_overlap one
+
+    if ($hashref->{variation_feature_overlap}) {
+        $hashref->{base_variation_feature_overlap} = delete $hashref->{variation_feature_overlap};
+    }
+    
+    # and call the superclass
+
+    return $class->SUPER::new_fast($hashref);
+}
+
+=head2 dbID
+
+  Description: Get/set the dbID of this VariationFeatureOverlapAllele
+  Returntype : integer
+  Exceptions : none 
+  Status     : At Risk
+
+=cut
+
+sub dbID {
+    my ($self, $dbID) = @_;
+    $self->{dbID} = $dbID if defined $dbID;
+    return $self->{dbID};
+}
+
+=head2 variation_feature_overlap
+
+  Description: Get/set the associated VariationFeatureOverlap
+  Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlap
+  Exceptions : throws if the argument is the wrong type
+  Status     : At Risk
+
+=cut
+
+sub variation_feature_overlap {
+    my ($self, $variation_feature_overlap) = @_;
+    
+    if ($variation_feature_overlap) {
+        assert_ref($variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
+    }
+    
+    return $self->base_variation_feature_overlap($variation_feature_overlap);
+}
+
+=head2 variation_feature
+
+  Description: Get the associated VariationFeature
+  Returntype : Bio::EnsEMBL::Variation::VariationFeature
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub variation_feature {
+    my $self = shift;
+    return $self->variation_feature_overlap->variation_feature;
+}
+
+=head2 feature_seq
+
+  Description: Get the sequence of this allele relative to the associated Feature.
+               This will be the same as the variation_feature_seq when the associated
+               VariationFeature is on the same strand as the Feature, or the reverse
+               complement when the strands differ.
+  Returntype : string
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub feature_seq {
+    my $self = shift;
+    
+    unless ($self->{feature_seq}) {
+        
+        # check if we need to reverse complement the variation_feature_seq
+        
+        if (($self->variation_feature->strand != $self->feature->strand) && $self->seq_is_dna) {
+            my $vf_seq = $self->variation_feature_seq;
+            reverse_comp(\$vf_seq);
+            $self->{feature_seq} = $vf_seq;
+        }
+        else {
+            $self->{feature_seq} = $self->{variation_feature_seq};
+        }
+    }
+    
+    return $self->{feature_seq};
+}
+
+=head2 variation_feature_seq
+
+  Args [1]   : The allele sequence relative to the VariationFeature
+  Description: Get/set the sequence of this allele relative to the associated VariationFeature.
+  Returntype : string
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub variation_feature_seq {
+    # the sequence of this allele relative to the variation feature
+    my ($self, $variation_feature_seq) = @_;
+    $self->{variation_feature_seq} = $variation_feature_seq if $variation_feature_seq;
+    return $self->{variation_feature_seq};
+}
+
+=head2 seq_is_unambiguous_dna
+
+  Description: identify if the sequence of this allele is unambiguous DNA 
+               i.e. if we can meaningfully translate it
+  Returntype : bool
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub seq_is_unambiguous_dna {
+    my $self = shift;
+
+    unless (defined $self->{seq_is_unambiguous_dna}) {
+        $self->{seq_is_unambiguous_dna} = 
+            $self->{variation_feature_seq} =~ /$UNAMBIGUOUS_NUCLEOTIDES/ ? 1 : 0;
+    }
+    
+    return $self->{seq_is_unambiguous_dna};
+}
+
+=head2 seq_is_dna
+
+  Description: identify if the sequence of this allele is DNA including ambiguity 
+               codes, use seq_is_unambiguous_dna to check for alleles that do not
+               include ambiguity codes
+  Returntype : bool
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub seq_is_dna {
+    my $self = shift;
+
+    unless (defined $self->{seq_is_dna}) {
+        $self->{seq_is_dna} = 
+            $self->{variation_feature_seq} =~ /$ALL_NUCLEOTIDES/ ? 1 : 0;
+    }
+    
+    return $self->{seq_is_dna};
+}
+
+=head2 seq_length
+
+  Description: return the length of this allele sequence, this is better than
+               just using length($vfoa->feature_seq) because we check if the
+               sequence is valid DNA, and also look for allele strings like 
+               "(3 BP INSERTION)" to determine the length
+  Returntype : int or undef if we cannot determine the length
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub seq_length {
+    my $self = shift;
+
+    my $seq = $self->variation_feature_seq;
+
+    if ($self->seq_is_dna) {
+        if ($seq eq '-') {
+            return 0;
+        }
+        else {
+            return length($seq);
+        }
+    }
+    elsif ($seq =~ /$SPECIFIED_LENGTH/) {
+        return $1;
+    }
+    
+    return undef;
+}
+
+=head2 allele_string
+
+  Description: Return a '/' delimited string of the reference allele variation_feature_seq 
+               and the variation_feature_seq of this allele
+  Returntype : string
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub allele_string {
+    my $self = shift;
+    
+    my $ref = $self->variation_feature_overlap->get_reference_VariationFeatureOverlapAllele->variation_feature_seq;
+    
+    # for the HGMDs and CNV probes where the alleles are artificially set to be
+    # the same, just return the reference sequence
+    
+    if ($ref eq $self->variation_feature_seq) {
+        return $ref;
+    }
+    else {
+        return $ref.'/'.$self->variation_feature_seq;
+    }
+}
+
+
+sub _convert_to_sara {
+    my $self = shift;
+    
+    my $oc = Bio::EnsEMBL::Variation::OverlapConsequence->new_fast({
+        'label'        => 'SARA',
+        'description'  => 'Same as reference allele',
+        'rank'         => '99',
+        'display_term' => 'SARA',
+        'SO_term'      => 'SARA',
+    });
+    
+    $self->add_OverlapConsequence($oc);
+    
+    return $self;
+}
+
+1;
+