diff variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlap.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlap.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,384 @@
+=head1 LICENSE
+
+ Copyright (c) 1999-2012 The European Bioinformatics Institute and
+ Genome Research Limited.  All rights reserved.
+
+ This software is distributed under a modified Apache license.
+ For license details, please see
+
+   http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+ Please email comments or questions to the public Ensembl
+ developers list at <dev@ensembl.org>.
+
+ Questions may also be sent to the Ensembl help desk at
+ <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::Variation::VariationFeatureOverlap
+
+=head1 SYNOPSIS
+
+    use Bio::EnsEMBL::Variation::VariationFeatureOverlap;
+
+    my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new(
+        -feature            => $feature,
+        -variation_feature  => $var_feat
+    );
+
+    print "consequence type: ", (join ",", @{ $vfo->consequence_type }), "\n";
+    print "most severe consequence: ", $vfo->display_consequence, "\n";
+
+=head1 DESCRIPTION
+
+A VariationFeatureOverlap represents a VariationFeature which is in close
+proximity to another Ensembl Feature. It is the superclass of feature-specific
+objects such as TranscriptVariation and RegulatoryFeatureVariation, and has
+methods common to all such objects. You will not normally instantiate this
+class directly, instead instantiating one of the feature-specific subclasses.
+
+=cut
+
+package Bio::EnsEMBL::Variation::VariationFeatureOverlap;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Sequence qw(expand);
+use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
+use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele;
+
+use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlap);
+
+=head2 new
+
+  Arg [-FEATURE] : 
+    The Bio::EnsEMBL::Feature associated with the given VariationFeature
+
+  Arg [-VARIATION_FEATURE] :
+    The Bio::EnsEMBL::VariationFeature associated with the given Feature
+
+  Arg [-ADAPTOR] :
+    A Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor
+
+  Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] :
+    A flag indiciating if ambiguous single nucleotide alleles should be disambiguated
+    when constructing the VariationFeatureOverlapAllele objects, e.g. a Variationfeature
+    with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit
+    ourselves to single nucleotide alleles to avoid the combinatorial explosion if we
+    allowed longer alleles with potentially many ambiguous bases.
+
+  Example : 
+    my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new(
+        -feature           => $feature,
+        -variation_feature => $var_feat
+    );
+
+  Description: Constructs a new VariationFeatureOverlap instance given a VariationFeature
+               and a Feature
+  Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlap instance 
+  Exceptions : throws unless both VARIATION_FEATURE and FEATURE are supplied, or if the
+               supplied ADAPTOR is the wrong class
+  Status     : At Risk
+
+=cut 
+
+sub new {
+
+    my $class = shift;
+   
+    my %args = @_;
+
+    # swap a '-variation_feature' argument for a '-base_variation_feature' one for the superclass
+
+    for my $arg (keys %args) {
+        if (lc($arg) eq '-variation_feature') {
+            $args{'-base_variation_feature'} = delete $args{$arg};
+        }
+    }
+
+    my $self = $class->SUPER::new(%args);
+
+    my (
+        $adaptor, 
+        $ref_feature, 
+        $disambiguate_sn_alleles,
+        $no_ref_check,
+    ) = rearrange([qw(
+            ADAPTOR 
+            REF_FEATURE 
+            DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES
+            NO_REF_CHECK
+        )], %args);
+
+    my $variation_feature = $self->base_variation_feature;
+
+    assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature');
+    assert_ref($adaptor, 'Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor') if $adaptor;
+
+    $ref_feature ||= $variation_feature->slice;
+
+    $self->{adaptor}        = $adaptor;
+    $self->{ref_feature}    = $ref_feature;
+
+    my $ref_allele;
+    
+    # we take the reference allele sequence from the reference sequence, not from the allele string
+    unless($no_ref_check) {
+        $ref_allele = $ref_feature->subseq(
+            $variation_feature->start, 
+            $variation_feature->end, 
+            $variation_feature->strand
+        );
+    }
+
+    # get the variation feature allele string, expand it, and split it into separate alleles
+    
+    my $allele_string = $variation_feature->allele_string;
+    
+    expand(\$allele_string);
+
+    my @alleles = split /\//, $allele_string;
+    
+    $ref_allele = $alleles[0] if $no_ref_check;
+    $ref_allele = '-' unless $ref_allele;
+  
+    if ($disambiguate_sn_alleles) {
+        
+        # if this flag is set, disambiguate any ambiguous single nucleotide alleles, so  
+        # e.g. an allele string like T/M would be equivalent to an allele string of T/A/C
+        # we only do this for single nucleotide alleles to avoid the combinatorial explosion
+        # of long allele strings with potentially many ambiguous bases (because ensembl 
+        # genomes want this functionality)
+
+        my @possible_alleles;
+
+        for my $allele (@alleles) {
+            
+            if ($allele !~ /^[ACGT-]+$/ && length($allele) == 1) {
+                for my $possible ( split //, unambiguity_code($allele) ) {
+                    push @possible_alleles, $possible;
+                }
+            }
+            else {
+                # the allele is either unambiguous or longer than 1 nucleotide, so add it unaltered
+                push @possible_alleles, $allele;
+            }
+        }
+
+        @alleles = @possible_alleles;
+    }
+
+    # make sure the alleles are unique
+    
+    # we also want to deal with alleles like (T)0 which expand into 
+    # an empty string and we want to treat this as a deletion, so 
+    # we replace
+    # any empty strings with '-'
+    
+    @alleles = keys %{ { map { ($_ || '-') => 1 } @alleles } };
+
+    # create an object representing the reference allele
+    
+    my $ref_vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
+        -variation_feature_overlap   => $self,
+        -variation_feature_seq       => $ref_allele,
+        -is_reference                => 1,
+    );
+    
+    $self->add_VariationFeatureOverlapAllele($ref_vfoa);
+
+    # create objects representing the alternate alleles
+    
+    for my $allele (@alleles) {
+        
+        next if $allele eq $ref_allele;
+       
+        my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new(
+            -variation_feature_overlap  => $self,
+            -variation_feature_seq      => $allele,
+            -is_reference               => 0,
+        );
+       
+        $self->add_VariationFeatureOverlapAllele($vfoa);
+    }
+    
+    return $self;
+}
+
+sub new_fast {
+    my ($class, $hashref) = @_;
+
+    # swap a variation_feature argument for a base_variation_feature one
+    
+    if ($hashref->{variation_feature}) {
+        $hashref->{base_variation_feature} = delete $hashref->{variation_feature};
+    }
+
+    return $class->SUPER::new_fast($hashref);
+}
+
+sub dbID {
+    my $self = shift;
+    
+    unless ($self->{dbID}) {
+        # we don't really have a dbID, so concatenate all the dbIDs of our alleles
+
+        $self->{dbID} = join '_', map { $_->dbID } @{ $self->get_all_alternate_VariationFeatureOverlapAlleles };
+    }
+
+    return $self->{dbID};
+}
+
+=head2 variation_feature
+
+  Arg [1]    : (optional) A Bio::EnsEMBL::Variation::VariationFeature
+  Description: Get/set the associated VariationFeature, lazy-loading it if required
+  Returntype : Bio::EnsEMBL::Variation::VariationFeature
+  Exceptions : throws if the argument is the wrong type
+  Status     : At Risk
+
+=cut
+
+sub variation_feature {
+    my ($self, $variation_feature) = @_;
+
+    if ($variation_feature) {
+        assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature');
+        $self->base_variation_feature($variation_feature);
+    }
+    
+    if (my $vf_id = $self->{_variation_feature_id}) {
+        
+        # lazy-load the VariationFeature
+        
+        if (my $adap = $self->{adaptor}) {
+            if (my $vfa = $adap->db->get_VariationFeatureAdaptor) {
+                if (my $vf = $vfa->fetch_by_dbID($vf_id)) {
+                    $self->base_variation_feature($vf);
+                    delete $self->{_variation_feature_id};
+                }
+            }
+        }
+    }
+    
+    return $self->base_variation_feature;
+}
+
+sub _variation_feature_id {
+
+    # get the dbID of the variation feature, using the VariationFeature object 
+    # if we have one, or the internal hash value if we don't
+    
+    my $self = shift;
+    
+    if (my $vf = $self->{variation_feature}) {
+        return $vf->dbID;
+    }
+    elsif (my $id = $self->{_variation_feature_id}) {
+        return $id;
+    }
+    else {
+        return undef;
+    }
+}
+
+sub get_VariationFeatureOverlapAllele_for_allele_seq {
+    my ($self, $allele_seq) = @_;
+    return $self->{_alleles_by_seq}->{$allele_seq};
+}
+
+=head2 add_VariationFeatureOverlapAllele
+
+  Arg [1]    : A Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
+  Description: Add an allele to this VariationFeatureOverlap
+  Returntype : none
+  Exceptions : throws if the argument is not the expected type
+  Status     : At Risk
+
+=cut
+
+sub add_VariationFeatureOverlapAllele {
+    my ($self, $vfoa) = @_;
+
+    assert_ref($vfoa, 'Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele');
+
+    $self->add_BaseVariationFeatureOverlapAllele($vfoa);
+
+    $self->{_alleles_by_seq}->{ $vfoa->variation_feature_seq } = $vfoa;
+}
+
+=head2 get_reference_VariationFeatureOverlapAllele
+
+  Description: Get the object representing the reference allele of this VariationFeatureOverlapAllele
+  Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub get_reference_VariationFeatureOverlapAllele {
+    my $self = shift;
+    return $self->get_reference_BaseVariationFeatureOverlapAllele(@_);
+}
+
+=head2 get_all_alternate_VariationFeatureOverlapAlleles
+
+  Description: Get a list of the alternate alleles of this VariationFeatureOverlapAllele
+  Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub get_all_alternate_VariationFeatureOverlapAlleles {
+    my $self = shift;
+    return $self->get_all_alternate_BaseVariationFeatureOverlapAlleles(@_);
+}
+
+=head2 get_all_VariationFeatureOverlapAlleles
+
+  Description: Get a list of the all the alleles, both reference and alternate, of this
+               VariationFeatureOverlap
+  Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub get_all_VariationFeatureOverlapAlleles {
+    my $self = shift;
+    return $self->get_all_BaseVariationFeatureOverlapAlleles(@_);
+}
+
+sub _convert_to_sara {
+    my $self = shift;
+    
+    my $ref_allele = $self->{reference_allele};
+    $ref_allele->_convert_to_sara;
+    
+    $self->{alt_alleles} = [$ref_allele];
+}
+
+sub _rearrange_alleles {
+    my $self = shift;
+    my $keep_alleles = shift;
+    
+    # fix alt alleles
+    my $alt_alleles = $self->{alt_alleles};
+    my @new_alleles = grep {$keep_alleles->{$_->variation_feature_seq}} @$alt_alleles;
+    $self->{alt_alleles} = scalar @new_alleles ? \@new_alleles : $alt_alleles;
+    
+    # copy to ref allele if homozygous non-ref
+    $self->{reference_allele} = $self->{alt_alleles}->[0] if scalar keys %$keep_alleles == 1;
+}
+
+1;