Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlapAllele.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeatureOverlapAllele.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,360 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele + +=head1 SYNOPSIS + + use Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele; + + my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new( + -variation_feature_overlap => $vfo, + -variation_feature_seq => 'A', + -is_reference => 0, + ); + + print "sequence with respect to the feature: ", $vfoa->feature_seq, "\n"; + print "sequence with respect to the variation feature: ", $vfoa->variation_feature_seq, "\n"; + print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $vfoa->get_all_OverlapConsequences }), "\n"; + +=head1 DESCRIPTION + +A VariationFeatureOverlapAllele object represents a single allele of a +VariationFeatureOverlap. It is the super-class of various feature-specific allele +classes such as TranscriptVariationAllele and RegulatoryFeatureVariationAllele and +contains methods not specific to any particular feature type. Ordinarily you will +not create these objects yourself, but instead you would create e.g. a +TranscriptVariation object which will then create VariationFeatureOverlapAlleles +based on the allele string of the associated VariationFeature. + +=cut + +package Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele; + +use strict; +use warnings; + +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); +use Bio::EnsEMBL::Utils::Exception qw(throw); +use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); + +use base qw(Bio::EnsEMBL::Variation::BaseVariationFeatureOverlapAllele); + +our $UNAMBIGUOUS_NUCLEOTIDES = qr/^[ACGT-]+$/i; + +our $ALL_NUCLEOTIDES = qr/^[ACGTUMRWSYKVHDBXN-]+$/i; + +our $SPECIFIED_LENGTH = qr /(\d+) BP (INSERTION|DELETION)/i; + +=head2 new + + Arg [-VARIATION_FEATURE_OVERLAP] : + The Bio::EnsEMBL::VariationFeatureOverlap with which this allele is + associated + + Arg [-VARIATION_FEATURE_SEQ] : + The allele sequence with respect to the associated VariationFeature + + Arg [-IS_REFERENCE] : + A flag indicating if this allele is the reference allele or not + + Example : + my $vfoa = Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele->new( + -variation_feature_ovelap => $vfo, + -variation_feature_seq => 'A', + -is_reference => 0 + ); + + Description: Constructs a new VariationFeatureOverlapAllele instance given a + VariationFeatureOverlap and the sequence of the allele + Returntype : A new Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele instance + Exceptions : throws unless both VARIATION_FEATURE_OVERLAP and VARIATION_FEATURE_SEQ + are supplied + Status : At Risk + +=cut + +sub new { + my $class = shift; + + my %args = @_; + + # swap a '-variation_feature_overlap' argument for a '-base_variation_feature_overlap' + # and a '-variation_feature' for a '-base_variation_feature' for the superclass + + for my $arg (keys %args) { + if (lc($arg) eq '-variation_feature_overlap') { + $args{'-base_variation_feature_overlap'} = delete $args{$arg}; + } + } + + my $self = $class->SUPER::new(%args); + + assert_ref($self->base_variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap'); + + my ( + $variation_feature_seq, + ) = rearrange([qw( + VARIATION_FEATURE_SEQ + )], %args); + + + throw("Allele sequence required (variation "+$self->variation_feature->variation_name+")") + unless $variation_feature_seq; + + $self->{variation_feature_seq} = $variation_feature_seq; + + return $self; +} + +sub new_fast { + my ($class, $hashref) = @_; + + # swap a variation_feature_overlap argument for a base_variation_feature_overlap one + + if ($hashref->{variation_feature_overlap}) { + $hashref->{base_variation_feature_overlap} = delete $hashref->{variation_feature_overlap}; + } + + # and call the superclass + + return $class->SUPER::new_fast($hashref); +} + +=head2 dbID + + Description: Get/set the dbID of this VariationFeatureOverlapAllele + Returntype : integer + Exceptions : none + Status : At Risk + +=cut + +sub dbID { + my ($self, $dbID) = @_; + $self->{dbID} = $dbID if defined $dbID; + return $self->{dbID}; +} + +=head2 variation_feature_overlap + + Description: Get/set the associated VariationFeatureOverlap + Returntype : Bio::EnsEMBL::Variation::VariationFeatureOverlap + Exceptions : throws if the argument is the wrong type + Status : At Risk + +=cut + +sub variation_feature_overlap { + my ($self, $variation_feature_overlap) = @_; + + if ($variation_feature_overlap) { + assert_ref($variation_feature_overlap, 'Bio::EnsEMBL::Variation::VariationFeatureOverlap'); + } + + return $self->base_variation_feature_overlap($variation_feature_overlap); +} + +=head2 variation_feature + + Description: Get the associated VariationFeature + Returntype : Bio::EnsEMBL::Variation::VariationFeature + Exceptions : none + Status : At Risk + +=cut + +sub variation_feature { + my $self = shift; + return $self->variation_feature_overlap->variation_feature; +} + +=head2 feature_seq + + Description: Get the sequence of this allele relative to the associated Feature. + This will be the same as the variation_feature_seq when the associated + VariationFeature is on the same strand as the Feature, or the reverse + complement when the strands differ. + Returntype : string + Exceptions : none + Status : At Risk + +=cut + +sub feature_seq { + my $self = shift; + + unless ($self->{feature_seq}) { + + # check if we need to reverse complement the variation_feature_seq + + if (($self->variation_feature->strand != $self->feature->strand) && $self->seq_is_dna) { + my $vf_seq = $self->variation_feature_seq; + reverse_comp(\$vf_seq); + $self->{feature_seq} = $vf_seq; + } + else { + $self->{feature_seq} = $self->{variation_feature_seq}; + } + } + + return $self->{feature_seq}; +} + +=head2 variation_feature_seq + + Args [1] : The allele sequence relative to the VariationFeature + Description: Get/set the sequence of this allele relative to the associated VariationFeature. + Returntype : string + Exceptions : none + Status : At Risk + +=cut + +sub variation_feature_seq { + # the sequence of this allele relative to the variation feature + my ($self, $variation_feature_seq) = @_; + $self->{variation_feature_seq} = $variation_feature_seq if $variation_feature_seq; + return $self->{variation_feature_seq}; +} + +=head2 seq_is_unambiguous_dna + + Description: identify if the sequence of this allele is unambiguous DNA + i.e. if we can meaningfully translate it + Returntype : bool + Exceptions : none + Status : At Risk + +=cut + +sub seq_is_unambiguous_dna { + my $self = shift; + + unless (defined $self->{seq_is_unambiguous_dna}) { + $self->{seq_is_unambiguous_dna} = + $self->{variation_feature_seq} =~ /$UNAMBIGUOUS_NUCLEOTIDES/ ? 1 : 0; + } + + return $self->{seq_is_unambiguous_dna}; +} + +=head2 seq_is_dna + + Description: identify if the sequence of this allele is DNA including ambiguity + codes, use seq_is_unambiguous_dna to check for alleles that do not + include ambiguity codes + Returntype : bool + Exceptions : none + Status : At Risk + +=cut + +sub seq_is_dna { + my $self = shift; + + unless (defined $self->{seq_is_dna}) { + $self->{seq_is_dna} = + $self->{variation_feature_seq} =~ /$ALL_NUCLEOTIDES/ ? 1 : 0; + } + + return $self->{seq_is_dna}; +} + +=head2 seq_length + + Description: return the length of this allele sequence, this is better than + just using length($vfoa->feature_seq) because we check if the + sequence is valid DNA, and also look for allele strings like + "(3 BP INSERTION)" to determine the length + Returntype : int or undef if we cannot determine the length + Exceptions : none + Status : At Risk + +=cut + +sub seq_length { + my $self = shift; + + my $seq = $self->variation_feature_seq; + + if ($self->seq_is_dna) { + if ($seq eq '-') { + return 0; + } + else { + return length($seq); + } + } + elsif ($seq =~ /$SPECIFIED_LENGTH/) { + return $1; + } + + return undef; +} + +=head2 allele_string + + Description: Return a '/' delimited string of the reference allele variation_feature_seq + and the variation_feature_seq of this allele + Returntype : string + Exceptions : none + Status : At Risk + +=cut + +sub allele_string { + my $self = shift; + + my $ref = $self->variation_feature_overlap->get_reference_VariationFeatureOverlapAllele->variation_feature_seq; + + # for the HGMDs and CNV probes where the alleles are artificially set to be + # the same, just return the reference sequence + + if ($ref eq $self->variation_feature_seq) { + return $ref; + } + else { + return $ref.'/'.$self->variation_feature_seq; + } +} + + +sub _convert_to_sara { + my $self = shift; + + my $oc = Bio::EnsEMBL::Variation::OverlapConsequence->new_fast({ + 'label' => 'SARA', + 'description' => 'Same as reference allele', + 'rank' => '99', + 'display_term' => 'SARA', + 'SO_term' => 'SARA', + }); + + $self->add_OverlapConsequence($oc); + + return $self; +} + +1; +