view variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlap.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

 Copyright (c) 1999-2012 The European Bioinformatics Institute and
 Genome Research Limited.  All rights reserved.

 This software is distributed under a modified Apache license.
 For license details, please see

   http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

 Please email comments or questions to the public Ensembl
 developers list at <dev@ensembl.org>.

 Questions may also be sent to the Ensembl help desk at
 <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::Variation::VariationFeatureOverlap

=head1 SYNOPSIS

    use Bio::EnsEMBL::Variation::VariationFeatureOverlap;

    my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new(
        -feature            => $feature,
        -variation_feature  => $var_feat
    );

    print "consequence type: ", (join ",", @{ $vfo->consequence_type }), "\n";
    print "most severe consequence: ", $vfo->display_consequence, "\n";

=head1 DESCRIPTION

A VariationFeatureOverlap represents a VariationFeature which is in close
proximity to another Ensembl Feature. It is the superclass of feature-specific
objects such as TranscriptVariation and RegulatoryFeatureVariation, and has
methods common to all such objects. You will not normally instantiate this
class directly, instead instantiating one of the feature-specific subclasses.

=cut

package Bio::EnsEMBL::Variation::VariationFeatureOverlap;

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Sequence qw(expand);
use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;

use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlap);

=head2 new

  Arg [-FEATURE] : 
    The Bio::EnsEMBL::Feature associated with the given VariationFeature

  Arg [-VARIATION_FEATURE] :
    The Bio::EnsEMBL::VariationFeature associated with the given Feature

  Arg [-ADAPTOR] :
    A Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor

  Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] :
    A flag indiciating if ambiguous single nucleotide alleles should be disambiguated
    when constructing the VariationFeatureOverlapAllele objects, e.g. a Variationfeature
    with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit
    ourselves to single nucleotide alleles to avoid the combinatorial explosion if we
    allowed longer alleles with potentially many ambiguous bases.

  Example : 
    my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new(
        -feature           => $feature,
        -variation_feature => $var_feat
    );

  Description: Constructs a new VariationFeatureOverlap instance given a VariationFeature
               and a Feature
  Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlap instance 
  Exceptions : throws unless both VARIATION_FEATURE and FEATURE are supplied, or if the
               supplied ADAPTOR is the wrong class
  Status     : At Risk

=cut 

sub new {

    my $class = shift;
   
    my %args = @_;

    # swap a '-variation_feature' argument for a '-base_variation_feature' one for the superclass

    for my $arg (keys %args) {
        if (lc($arg) eq '-variation_feature') {
            $args{'-base_variation_feature'} = delete $args{$arg};
        }
    }

    my $self = $class->SUPER::new(%args);

    my (
        $adaptor, 
        $ref_feature, 
        $disambiguate_sn_alleles,
        $no_ref_check,
    ) = rearrange([qw(
            ADAPTOR 
            REF_FEATURE 
            DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES
            NO_REF_CHECK
        )], %args);

    my $variation_feature = $self->base_variation_feature;

    assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature');
    assert_ref($adaptor, 'Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor') if $adaptor;

    $ref_feature ||= $variation_feature->slice;

    $self->{adaptor}        = $adaptor;
    $self->{ref_feature}    = $ref_feature;

    my $ref_allele;
    
    # we take the reference allele sequence from the reference sequence, not from the allele string
    unless($no_ref_check) {
        $ref_allele = $ref_feature->subseq(
            $variation_feature->start, 
            $variation_feature->end, 
            $variation_feature->strand
        );
    }

    # get the variation feature allele string, expand it, and split it into separate alleles
    
    my $allele_string = $variation_feature->allele_string;
    
    expand(\$allele_string);

    my @alleles = split /\//, $allele_string;
    
    $ref_allele = $alleles[0] if $no_ref_check;
    $ref_allele = '-' unless $ref_allele;
  
    if ($disambiguate_sn_alleles) {
        
        # if this flag is set, disambiguate any ambiguous single nucleotide alleles, so  
        # e.g. an allele string like T/M would be equivalent to an allele string of T/A/C
        # we only do this for single nucleotide alleles to avoid the combinatorial explosion
        # of long allele strings with potentially many ambiguous bases (because ensembl 
        # genomes want this functionality)

        my @possible_alleles;

        for my $allele (@alleles) {
            
            if ($allele !~ /^[ACGT-]+$/ && length($allele) == 1) {
                for my $possible ( split //, unambiguity_code($allele) ) {
                    push @possible_alleles, $possible;
                }
            }
            else {
                # the allele is either unambiguous or longer than 1 nucleotide, so add it unaltered
                push @possible_alleles, $allele;
            }
        }

        @alleles = @possible_alleles;
    }

    # make sure the alleles are unique
    
    # we also want to deal with alleles like (T)0 which expand into 
    # an empty string and we want to treat this as a deletion, so 
    # we replace
    # any empty strings with '-'
    
    @alleles = keys %{ { map { ($_ || '-') => 1 } @alleles } };

    # create an object representing the reference allele
    
    my $ref_vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
        -variation_feature_overlap   => $self,
        -variation_feature_seq       => $ref_allele,
        -is_reference                => 1,
    );
    
    $self->add_VariationFeatureOverlapAllele($ref_vfoa);

    # create objects representing the alternate alleles
    
    for my $allele (@alleles) {
        
        next if $allele eq $ref_allele;
       
        my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
            -variation_feature_overlap  => $self,
            -variation_feature_seq      => $allele,
            -is_reference               => 0,
        );
       
        $self->add_VariationFeatureOverlapAllele($vfoa);
    }
    
    return $self;
}

sub new_fast {
    my ($class, $hashref) = @_;

    # swap a variation_feature argument for a base_variation_feature one
    
    if ($hashref->{variation_feature}) {
        $hashref->{base_variation_feature} = delete $hashref->{variation_feature};
    }

    return $class->SUPER::new_fast($hashref);
}

sub dbID {
    my $self = shift;
    
    unless ($self->{dbID}) {
        # we don't really have a dbID, so concatenate all the dbIDs of our alleles

        $self->{dbID} = join '_', map { $_->dbID } @{ $self->get_all_alternate_VariationFeatureOverlapAlleles };
    }

    return $self->{dbID};
}

=head2 variation_feature

  Arg [1]    : (optional) A Bio::EnsEMBL::Variation::VariationFeature
  Description: Get/set the associated VariationFeature, lazy-loading it if required
  Returntype : Bio::EnsEMBL::Variation::VariationFeature
  Exceptions : throws if the argument is the wrong type
  Status     : At Risk

=cut

sub variation_feature {
    my ($self, $variation_feature) = @_;

    if ($variation_feature) {
        assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature');
        $self->base_variation_feature($variation_feature);
    }
    
    if (my $vf_id = $self->{_variation_feature_id}) {
        
        # lazy-load the VariationFeature
        
        if (my $adap = $self->{adaptor}) {
            if (my $vfa = $adap->db->get_VariationFeatureAdaptor) {
                if (my $vf = $vfa->fetch_by_dbID($vf_id)) {
                    $self->base_variation_feature($vf);
                    delete $self->{_variation_feature_id};
                }
            }
        }
    }
    
    return $self->base_variation_feature;
}

sub _variation_feature_id {

    # get the dbID of the variation feature, using the VariationFeature object 
    # if we have one, or the internal hash value if we don't
    
    my $self = shift;
    
    if (my $vf = $self->{variation_feature}) {
        return $vf->dbID;
    }
    elsif (my $id = $self->{_variation_feature_id}) {
        return $id;
    }
    else {
        return undef;
    }
}

sub get_VariationFeatureOverlapAllele_for_allele_seq {
    my ($self, $allele_seq) = @_;
    return $self->{_alleles_by_seq}->{$allele_seq};
}

=head2 add_VariationFeatureOverlapAllele

  Arg [1]    : A Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
  Description: Add an allele to this VariationFeatureOverlap
  Returntype : none
  Exceptions : throws if the argument is not the expected type
  Status     : At Risk

=cut

sub add_VariationFeatureOverlapAllele {
    my ($self, $vfoa) = @_;

    assert_ref($vfoa, 'Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele');

    $self->add_BaseVariationFeatureOverlapAllele($vfoa);

    $self->{_alleles_by_seq}->{ $vfoa->variation_feature_seq } = $vfoa;
}

=head2 get_reference_VariationFeatureOverlapAllele

  Description: Get the object representing the reference allele of this VariationFeatureOverlapAllele
  Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
  Exceptions : none
  Status     : At Risk

=cut

sub get_reference_VariationFeatureOverlapAllele {
    my $self = shift;
    return $self->get_reference_BaseVariationFeatureOverlapAllele(@_);
}

=head2 get_all_alternate_VariationFeatureOverlapAlleles

  Description: Get a list of the alternate alleles of this VariationFeatureOverlapAllele
  Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects
  Exceptions : none
  Status     : At Risk

=cut

sub get_all_alternate_VariationFeatureOverlapAlleles {
    my $self = shift;
    return $self->get_all_alternate_BaseVariationFeatureOverlapAlleles(@_);
}

=head2 get_all_VariationFeatureOverlapAlleles

  Description: Get a list of the all the alleles, both reference and alternate, of this
               VariationFeatureOverlap
  Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects
  Exceptions : none
  Status     : At Risk

=cut

sub get_all_VariationFeatureOverlapAlleles {
    my $self = shift;
    return $self->get_all_BaseVariationFeatureOverlapAlleles(@_);
}

sub _convert_to_sara {
    my $self = shift;
    
    my $ref_allele = $self->{reference_allele};
    $ref_allele->_convert_to_sara;
    
    $self->{alt_alleles} = [$ref_allele];
}

sub _rearrange_alleles {
    my $self = shift;
    my $keep_alleles = shift;
    
    # fix alt alleles
    my $alt_alleles = $self->{alt_alleles};
    my @new_alleles = grep {$keep_alleles->{$_->variation_feature_seq}} @$alt_alleles;
    $self->{alt_alleles} = scalar @new_alleles ? \@new_alleles : $alt_alleles;
    
    # copy to ref allele if homozygous non-ref
    $self->{reference_allele} = $self->{alt_alleles}->[0] if scalar keys %$keep_alleles == 1;
}

1;