view variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlapAllele.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line source

=head1 LICENSE

 Copyright (c) 1999-2012 The European Bioinformatics Institute and
 Genome Research Limited.  All rights reserved.

 This software is distributed under a modified Apache license.
 For license details, please see

   http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

 Please email comments or questions to the public Ensembl
 developers list at <dev@ensembl.org>.

 Questions may also be sent to the Ensembl help desk at
 <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele

=head1 SYNOPSIS

    use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
    
    my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
        -variation_feature_overlap  => $vfo,
        -variation_feature_seq      => 'A',
        -is_reference               => 0,
    );

    print "sequence with respect to the feature: ", $vfoa->feature_seq, "\n";
    print "sequence with respect to the variation feature: ", $vfoa->variation_feature_seq, "\n";
    print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $vfoa->get_all_OverlapConsequences }), "\n";

=head1 DESCRIPTION

A VariationFeatureOverlapAllele object represents a single allele of a 
VariationFeatureOverlap. It is the super-class of various feature-specific allele
classes such as TranscriptVariationAllele and RegulatoryFeatureVariationAllele and 
contains methods not specific to any particular feature type. Ordinarily you will 
not create these objects yourself, but instead you would create e.g. a 
TranscriptVariation object which will then create VariationFeatureOverlapAlleles 
based on the allele string of the associated VariationFeature. 

=cut

package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
use Bio::EnsEMBL::Utils::Exception qw(throw);
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);

use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlapAllele);

our $UNAMBIGUOUS_NUCLEOTIDES = qr/^[ACGT-]+$/i;

our $ALL_NUCLEOTIDES = qr/^[ACGTUMRWSYKVHDBXN-]+$/i;

our $SPECIFIED_LENGTH = qr /(\d+) BP (INSERTION|DELETION)/i;

=head2 new

  Arg [-VARIATION_FEATURE_OVERLAP] : 
    The Bio::EnsEMBL::VariationFeatureOverlap with which this allele is 
    associated

  Arg [-VARIATION_FEATURE_SEQ] :
    The allele sequence with respect to the associated VariationFeature

  Arg [-IS_REFERENCE] :
    A flag indicating if this allele is the reference allele or not

  Example : 
    my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
        -variation_feature_ovelap   => $vfo,
        -variation_feature_seq      => 'A',
        -is_reference               => 0
    );

  Description: Constructs a new VariationFeatureOverlapAllele instance given a 
               VariationFeatureOverlap and the sequence of the allele
  Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance 
  Exceptions : throws unless both VARIATION_FEATURE_OVERLAP and VARIATION_FEATURE_SEQ 
               are supplied
  Status     : At Risk

=cut 

sub new {
    my $class = shift;

    my %args = @_;

    # swap a '-variation_feature_overlap' argument for a '-base_variation_feature_overlap'
    # and a '-variation_feature' for a '-base_variation_feature' for the superclass

    for my $arg (keys %args) {
        if (lc($arg) eq '-variation_feature_overlap') {
            $args{'-base_variation_feature_overlap'} = delete $args{$arg};
        }
    }

    my $self = $class->SUPER::new(%args);

    assert_ref($self->base_variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');

    my (
        $variation_feature_seq, 
    ) = rearrange([qw(
            VARIATION_FEATURE_SEQ 
        )], %args);


    throw("Allele sequence required (variation "+$self->variation_feature->variation_name+")") 
        unless $variation_feature_seq;

    $self->{variation_feature_seq} = $variation_feature_seq;

    return $self;
}

sub new_fast {
    my ($class, $hashref) = @_;
    
    # swap a variation_feature_overlap argument for a base_variation_feature_overlap one

    if ($hashref->{variation_feature_overlap}) {
        $hashref->{base_variation_feature_overlap} = delete $hashref->{variation_feature_overlap};
    }
    
    # and call the superclass

    return $class->SUPER::new_fast($hashref);
}

=head2 dbID

  Description: Get/set the dbID of this VariationFeatureOverlapAllele
  Returntype : integer
  Exceptions : none 
  Status     : At Risk

=cut

sub dbID {
    my ($self, $dbID) = @_;
    $self->{dbID} = $dbID if defined $dbID;
    return $self->{dbID};
}

=head2 variation_feature_overlap

  Description: Get/set the associated VariationFeatureOverlap
  Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlap
  Exceptions : throws if the argument is the wrong type
  Status     : At Risk

=cut

sub variation_feature_overlap {
    my ($self, $variation_feature_overlap) = @_;
    
    if ($variation_feature_overlap) {
        assert_ref($variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap');
    }
    
    return $self->base_variation_feature_overlap($variation_feature_overlap);
}

=head2 variation_feature

  Description: Get the associated VariationFeature
  Returntype : Bio::EnsEMBL::Variation::VariationFeature
  Exceptions : none
  Status     : At Risk

=cut

sub variation_feature {
    my $self = shift;
    return $self->variation_feature_overlap->variation_feature;
}

=head2 feature_seq

  Description: Get the sequence of this allele relative to the associated Feature.
               This will be the same as the variation_feature_seq when the associated
               VariationFeature is on the same strand as the Feature, or the reverse
               complement when the strands differ.
  Returntype : string
  Exceptions : none
  Status     : At Risk

=cut

sub feature_seq {
    my $self = shift;
    
    unless ($self->{feature_seq}) {
        
        # check if we need to reverse complement the variation_feature_seq
        
        if (($self->variation_feature->strand != $self->feature->strand) && $self->seq_is_dna) {
            my $vf_seq = $self->variation_feature_seq;
            reverse_comp(\$vf_seq);
            $self->{feature_seq} = $vf_seq;
        }
        else {
            $self->{feature_seq} = $self->{variation_feature_seq};
        }
    }
    
    return $self->{feature_seq};
}

=head2 variation_feature_seq

  Args [1]   : The allele sequence relative to the VariationFeature
  Description: Get/set the sequence of this allele relative to the associated VariationFeature.
  Returntype : string
  Exceptions : none
  Status     : At Risk

=cut

sub variation_feature_seq {
    # the sequence of this allele relative to the variation feature
    my ($self, $variation_feature_seq) = @_;
    $self->{variation_feature_seq} = $variation_feature_seq if $variation_feature_seq;
    return $self->{variation_feature_seq};
}

=head2 seq_is_unambiguous_dna

  Description: identify if the sequence of this allele is unambiguous DNA 
               i.e. if we can meaningfully translate it
  Returntype : bool
  Exceptions : none
  Status     : At Risk

=cut

sub seq_is_unambiguous_dna {
    my $self = shift;

    unless (defined $self->{seq_is_unambiguous_dna}) {
        $self->{seq_is_unambiguous_dna} = 
            $self->{variation_feature_seq} =~ /$UNAMBIGUOUS_NUCLEOTIDES/ ? 1 : 0;
    }
    
    return $self->{seq_is_unambiguous_dna};
}

=head2 seq_is_dna

  Description: identify if the sequence of this allele is DNA including ambiguity 
               codes, use seq_is_unambiguous_dna to check for alleles that do not
               include ambiguity codes
  Returntype : bool
  Exceptions : none
  Status     : At Risk

=cut

sub seq_is_dna {
    my $self = shift;

    unless (defined $self->{seq_is_dna}) {
        $self->{seq_is_dna} = 
            $self->{variation_feature_seq} =~ /$ALL_NUCLEOTIDES/ ? 1 : 0;
    }
    
    return $self->{seq_is_dna};
}

=head2 seq_length

  Description: return the length of this allele sequence, this is better than
               just using length($vfoa->feature_seq) because we check if the
               sequence is valid DNA, and also look for allele strings like 
               "(3 BP INSERTION)" to determine the length
  Returntype : int or undef if we cannot determine the length
  Exceptions : none
  Status     : At Risk

=cut

sub seq_length {
    my $self = shift;

    my $seq = $self->variation_feature_seq;

    if ($self->seq_is_dna) {
        if ($seq eq '-') {
            return 0;
        }
        else {
            return length($seq);
        }
    }
    elsif ($seq =~ /$SPECIFIED_LENGTH/) {
        return $1;
    }
    
    return undef;
}

=head2 allele_string

  Description: Return a '/' delimited string of the reference allele variation_feature_seq 
               and the variation_feature_seq of this allele
  Returntype : string
  Exceptions : none
  Status     : At Risk

=cut

sub allele_string {
    my $self = shift;
    
    my $ref = $self->variation_feature_overlap->get_reference_VariationFeatureOverlapAllele->variation_feature_seq;
    
    # for the HGMDs and CNV probes where the alleles are artificially set to be
    # the same, just return the reference sequence
    
    if ($ref eq $self->variation_feature_seq) {
        return $ref;
    }
    else {
        return $ref.'/'.$self->variation_feature_seq;
    }
}


sub _convert_to_sara {
    my $self = shift;
    
    my $oc = Bio::EnsEMBL::Variation::OverlapConsequence->new_fast({
        'label'        => 'SARA',
        'description'  => 'Same as reference allele',
        'rank'         => '99',
        'display_term' => 'SARA',
        'SO_term'      => 'SARA',
    });
    
    $self->add_OverlapConsequence($oc);
    
    return $self;
}

1;