Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlap.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlap.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,384 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::Variation::VariationFeatureOverlap + +=head1 SYNOPSIS + + use Bio::EnsEMBL::Variation::VariationFeatureOverlap; + + my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new( + -feature => $feature, + -variation_feature => $var_feat + ); + + print "consequence type: ", (join ",", @{ $vfo->consequence_type }), "\n"; + print "most severe consequence: ", $vfo->display_consequence, "\n"; + +=head1 DESCRIPTION + +A VariationFeatureOverlap represents a VariationFeature which is in close +proximity to another Ensembl Feature. It is the superclass of feature-specific +objects such as TranscriptVariation and RegulatoryFeatureVariation, and has +methods common to all such objects. You will not normally instantiate this +class directly, instead instantiating one of the feature-specific subclasses. + +=cut + +package Bio::EnsEMBL::Variation::VariationFeatureOverlap; + +use strict; +use warnings; + +use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); +use Bio::EnsEMBL::Utils::Exception qw(throw warning); +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Bio::EnsEMBL::Utils::Sequence qw(expand); +use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code); +use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele; + +use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlap); + +=head2 new + + Arg [-FEATURE] : + The Bio::EnsEMBL::Feature associated with the given VariationFeature + + Arg [-VARIATION_FEATURE] : + The Bio::EnsEMBL::VariationFeature associated with the given Feature + + Arg [-ADAPTOR] : + A Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor + + Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] : + A flag indiciating if ambiguous single nucleotide alleles should be disambiguated + when constructing the VariationFeatureOverlapAllele objects, e.g. a Variationfeature + with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit + ourselves to single nucleotide alleles to avoid the combinatorial explosion if we + allowed longer alleles with potentially many ambiguous bases. + + Example : + my $vfo = Bio::EnsEMBL::Variation::VariationFeatureOverlap->new( + -feature => $feature, + -variation_feature => $var_feat + ); + + Description: Constructs a new VariationFeatureOverlap instance given a VariationFeature + and a Feature + Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlap instance + Exceptions : throws unless both VARIATION_FEATURE and FEATURE are supplied, or if the + supplied ADAPTOR is the wrong class + Status : At Risk + +=cut + +sub new { + + my $class = shift; + + my %args = @_; + + # swap a '-variation_feature' argument for a '-base_variation_feature' one for the superclass + + for my $arg (keys %args) { + if (lc($arg) eq '-variation_feature') { + $args{'-base_variation_feature'} = delete $args{$arg}; + } + } + + my $self = $class->SUPER::new(%args); + + my ( + $adaptor, + $ref_feature, + $disambiguate_sn_alleles, + $no_ref_check, + ) = rearrange([qw( + ADAPTOR + REF_FEATURE + DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES + NO_REF_CHECK + )], %args); + + my $variation_feature = $self->base_variation_feature; + + assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature'); + assert_ref($adaptor, 'Bio::EnsEMBL::Variation::DBSQL::VariationFeatureOverlapAdaptor') if $adaptor; + + $ref_feature ||= $variation_feature->slice; + + $self->{adaptor} = $adaptor; + $self->{ref_feature} = $ref_feature; + + my $ref_allele; + + # we take the reference allele sequence from the reference sequence, not from the allele string + unless($no_ref_check) { + $ref_allele = $ref_feature->subseq( + $variation_feature->start, + $variation_feature->end, + $variation_feature->strand + ); + } + + # get the variation feature allele string, expand it, and split it into separate alleles + + my $allele_string = $variation_feature->allele_string; + + expand(\$allele_string); + + my @alleles = split /\//, $allele_string; + + $ref_allele = $alleles[0] if $no_ref_check; + $ref_allele = '-' unless $ref_allele; + + if ($disambiguate_sn_alleles) { + + # if this flag is set, disambiguate any ambiguous single nucleotide alleles, so + # e.g. an allele string like T/M would be equivalent to an allele string of T/A/C + # we only do this for single nucleotide alleles to avoid the combinatorial explosion + # of long allele strings with potentially many ambiguous bases (because ensembl + # genomes want this functionality) + + my @possible_alleles; + + for my $allele (@alleles) { + + if ($allele !~ /^[ACGT-]+$/ && length($allele) == 1) { + for my $possible ( split //, unambiguity_code($allele) ) { + push @possible_alleles, $possible; + } + } + else { + # the allele is either unambiguous or longer than 1 nucleotide, so add it unaltered + push @possible_alleles, $allele; + } + } + + @alleles = @possible_alleles; + } + + # make sure the alleles are unique + + # we also want to deal with alleles like (T)0 which expand into + # an empty string and we want to treat this as a deletion, so + # we replace + # any empty strings with '-' + + @alleles = keys %{ { map { ($_ || '-') => 1 } @alleles } }; + + # create an object representing the reference allele + + my $ref_vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new( + -variation_feature_overlap => $self, + -variation_feature_seq => $ref_allele, + -is_reference => 1, + ); + + $self->add_VariationFeatureOverlapAllele($ref_vfoa); + + # create objects representing the alternate alleles + + for my $allele (@alleles) { + + next if $allele eq $ref_allele; + + my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new( + -variation_feature_overlap => $self, + -variation_feature_seq => $allele, + -is_reference => 0, + ); + + $self->add_VariationFeatureOverlapAllele($vfoa); + } + + return $self; +} + +sub new_fast { + my ($class, $hashref) = @_; + + # swap a variation_feature argument for a base_variation_feature one + + if ($hashref->{variation_feature}) { + $hashref->{base_variation_feature} = delete $hashref->{variation_feature}; + } + + return $class->SUPER::new_fast($hashref); +} + +sub dbID { + my $self = shift; + + unless ($self->{dbID}) { + # we don't really have a dbID, so concatenate all the dbIDs of our alleles + + $self->{dbID} = join '_', map { $_->dbID } @{ $self->get_all_alternate_VariationFeatureOverlapAlleles }; + } + + return $self->{dbID}; +} + +=head2 variation_feature + + Arg [1] : (optional) A Bio::EnsEMBL::Variation::VariationFeature + Description: Get/set the associated VariationFeature, lazy-loading it if required + Returntype : Bio::EnsEMBL::Variation::VariationFeature + Exceptions : throws if the argument is the wrong type + Status : At Risk + +=cut + +sub variation_feature { + my ($self, $variation_feature) = @_; + + if ($variation_feature) { + assert_ref($variation_feature, 'Bio::EnsEMBL::Variation::VariationFeature'); + $self->base_variation_feature($variation_feature); + } + + if (my $vf_id = $self->{_variation_feature_id}) { + + # lazy-load the VariationFeature + + if (my $adap = $self->{adaptor}) { + if (my $vfa = $adap->db->get_VariationFeatureAdaptor) { + if (my $vf = $vfa->fetch_by_dbID($vf_id)) { + $self->base_variation_feature($vf); + delete $self->{_variation_feature_id}; + } + } + } + } + + return $self->base_variation_feature; +} + +sub _variation_feature_id { + + # get the dbID of the variation feature, using the VariationFeature object + # if we have one, or the internal hash value if we don't + + my $self = shift; + + if (my $vf = $self->{variation_feature}) { + return $vf->dbID; + } + elsif (my $id = $self->{_variation_feature_id}) { + return $id; + } + else { + return undef; + } +} + +sub get_VariationFeatureOverlapAllele_for_allele_seq { + my ($self, $allele_seq) = @_; + return $self->{_alleles_by_seq}->{$allele_seq}; +} + +=head2 add_VariationFeatureOverlapAllele + + Arg [1] : A Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance + Description: Add an allele to this VariationFeatureOverlap + Returntype : none + Exceptions : throws if the argument is not the expected type + Status : At Risk + +=cut + +sub add_VariationFeatureOverlapAllele { + my ($self, $vfoa) = @_; + + assert_ref($vfoa, 'Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele'); + + $self->add_BaseVariationFeatureOverlapAllele($vfoa); + + $self->{_alleles_by_seq}->{ $vfoa->variation_feature_seq } = $vfoa; +} + +=head2 get_reference_VariationFeatureOverlapAllele + + Description: Get the object representing the reference allele of this VariationFeatureOverlapAllele + Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance + Exceptions : none + Status : At Risk + +=cut + +sub get_reference_VariationFeatureOverlapAllele { + my $self = shift; + return $self->get_reference_BaseVariationFeatureOverlapAllele(@_); +} + +=head2 get_all_alternate_VariationFeatureOverlapAlleles + + Description: Get a list of the alternate alleles of this VariationFeatureOverlapAllele + Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects + Exceptions : none + Status : At Risk + +=cut + +sub get_all_alternate_VariationFeatureOverlapAlleles { + my $self = shift; + return $self->get_all_alternate_BaseVariationFeatureOverlapAlleles(@_); +} + +=head2 get_all_VariationFeatureOverlapAlleles + + Description: Get a list of the all the alleles, both reference and alternate, of this + VariationFeatureOverlap + Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele objects + Exceptions : none + Status : At Risk + +=cut + +sub get_all_VariationFeatureOverlapAlleles { + my $self = shift; + return $self->get_all_BaseVariationFeatureOverlapAlleles(@_); +} + +sub _convert_to_sara { + my $self = shift; + + my $ref_allele = $self->{reference_allele}; + $ref_allele->_convert_to_sara; + + $self->{alt_alleles} = [$ref_allele]; +} + +sub _rearrange_alleles { + my $self = shift; + my $keep_alleles = shift; + + # fix alt alleles + my $alt_alleles = $self->{alt_alleles}; + my @new_alleles = grep {$keep_alleles->{$_->variation_feature_seq}} @$alt_alleles; + $self->{alt_alleles} = scalar @new_alleles ? \@new_alleles : $alt_alleles; + + # copy to ref allele if homozygous non-ref + $self->{reference_allele} = $self->{alt_alleles}->[0] if scalar keys %$keep_alleles == 1; +} + +1;