diff variant_effect_predictor/Bio/EnsEMBL/Variation/TranscriptVariation.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/TranscriptVariation.pm	Fri Aug 03 10:04:48 2012 -0400
@@ -0,0 +1,447 @@
+=head1 LICENSE
+
+ Copyright (c) 1999-2012 The European Bioinformatics Institute and
+ Genome Research Limited.  All rights reserved.
+
+ This software is distributed under a modified Apache license.
+ For license details, please see
+
+   http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+ Please email comments or questions to the public Ensembl
+ developers list at <dev@ensembl.org>.
+
+ Questions may also be sent to the Ensembl help desk at
+ <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::Variation::TranscriptVariation
+
+=head1 SYNOPSIS
+
+    use Bio::EnsEMBL::Variation::TranscriptVariation;
+
+    my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(
+        -transcript        => $transcript,
+        -variation_feature => $var_feat
+    );
+
+    print "consequence type: ", (join ",", @{$tv->consequence_type}), "\n";
+    print "cdna coords: ", $tv->cdna_start, '-', $tv->cdna_end, "\n";
+    print "cds coords: ", $tv->cds_start, '-', $tv->cds_end, "\n";
+    print "pep coords: ", $tv->translation_start, '-',$tv->translation_end, "\n";
+    print "amino acid change: ", $tv->pep_allele_string, "\n";
+    print "codon change: ", $tv->codons, "\n";
+    print "allele sequences: ", (join ",", map { $_->variation_feature_seq } 
+        @{ $tv->get_all_TranscriptVariationAlleles }, "\n";
+
+=head1 DESCRIPTION
+
+A TranscriptVariation object represents a variation feature which is in close
+proximity to an Ensembl transcript. A TranscriptVariation object has several
+attributes which define the relationship of the variation to the transcript.
+
+=cut
+
+package Bio::EnsEMBL::Variation::TranscriptVariation;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
+use Bio::EnsEMBL::Variation::TranscriptVariationAllele;
+use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap within_cds);
+use Bio::EnsEMBL::Variation::BaseTranscriptVariation;
+
+use base qw(Bio::EnsEMBL::Variation::BaseTranscriptVariation Bio::EnsEMBL::Variation::VariationFeatureOverlap);
+
+=head2 new
+
+  Arg [-TRANSCRIPT] : 
+    The Bio::EnsEMBL::Transcript associated with the given VariationFeature
+
+  Arg [-VARIATION_FEATURE] :
+    The Bio::EnsEMBL::VariationFeature associated with the given Transcript
+
+  Arg [-ADAPTOR] :
+    A Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor
+
+  Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] :
+    A flag indiciating if ambiguous single nucleotide alleles should be disambiguated
+    when constructing the TranscriptVariationAllele objects, e.g. a Variationfeature
+    with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit
+    ourselves to single nucleotide alleles to avoid the combinatorial explosion if we
+    allowed longer alleles with potentially many ambiguous bases.
+
+  Example : 
+    my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(
+        -transcript        => $transcript,
+        -variation_feature => $var_feat
+    );
+
+  Description: Constructs a new TranscriptVariation instance given a VariationFeature
+               and a Transcript, most of the work is done in the VariationFeatureOverlap
+               superclass - see there for more documentation.
+  Returntype : A new Bio::EnsEMBL::Variation::TranscriptVariation instance 
+  Exceptions : throws unless both VARIATION_FEATURE and TRANSCRIPT are supplied
+  Status     : At Risk
+
+=cut 
+
+sub new {
+    my $class = shift;
+
+    my %args = @_;
+
+    # swap a '-transcript' argument for a '-feature' one for the superclass
+
+    for my $arg (keys %args) {
+        if (lc($arg) eq '-transcript') {
+            $args{'-feature'} = delete $args{$arg};
+        }
+    }
+
+    # call the superclass constructor
+    my $self = $class->SUPER::new(%args) || return undef;
+
+    # rebless the alleles from vfoas to tvas
+    map { bless $_, 'Bio::EnsEMBL::Variation::TranscriptVariationAllele' } 
+        @{ $self->get_all_TranscriptVariationAlleles };
+    
+    return $self;
+}
+
+sub get_TranscriptVariationAllele_for_allele_seq {
+    my ($self, $allele_seq) = @_;
+    return $self->SUPER::get_VariationFeatureOverlapAllele_for_allele_seq($allele_seq);
+}
+
+=head2 add_TranscriptVariationAllele
+
+  Arg [1]    : A Bio::EnsEMBL::Variation::TranscriptVariationAllele instance
+  Description: Add an allele to this TranscriptVariation
+  Returntype : none
+  Exceptions : throws if the argument is not the expected type
+  Status     : At Risk
+
+=cut
+
+sub add_TranscriptVariationAllele {
+    my ($self, $tva) = @_;
+    assert_ref($tva, 'Bio::EnsEMBL::Variation::TranscriptVariationAllele');
+    return $self->SUPER::add_VariationFeatureOverlapAllele($tva);
+}
+
+=head2 get_reference_TranscriptVariationAllele
+
+  Description: Get the object representing the reference allele of this TranscriptVariation
+  Returntype : Bio::EnsEMBL::Variation::TranscriptVariationAllele instance
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub get_reference_TranscriptVariationAllele {
+    my $self = shift;
+    return $self->SUPER::get_reference_VariationFeatureOverlapAllele(@_);
+}
+
+=head2 get_all_alternate_TranscriptVariationAlleles
+
+  Description: Get a list of the alternate alleles of this TranscriptVariation
+  Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariationAllele objects
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub get_all_alternate_TranscriptVariationAlleles {
+    my $self = shift;
+    return $self->SUPER::get_all_alternate_VariationFeatureOverlapAlleles(@_);
+}
+
+=head2 get_all_TranscriptVariationAlleles
+
+  Description: Get a list of the all the alleles, both reference and alternate, of 
+               this TranscriptVariation
+  Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariationAllele objects
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub get_all_TranscriptVariationAlleles {
+    my $self = shift;
+    return $self->SUPER::get_all_VariationFeatureOverlapAlleles(@_);
+}
+
+
+=head2 cdna_allele_string
+
+  Description: Return a '/' delimited string of the alleles of this variation with respect
+               to the associated transcript
+  Returntype : string
+  Exceptions : None
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub cdna_allele_string {
+    my $self = shift;
+    
+    unless ($self->{_cdna_allele_string}) {
+        $self->{_cdna_allele_string} = join '/', map { $_->feature_seq } @{ $self->get_all_TranscriptVariationAlleles };
+    }
+    
+    return $self->{_cdna_allele_string};
+}
+
+=head2 pep_allele_string
+
+  Description: Return a '/' delimited string of amino acid codes representing
+               all the possible changes made to the peptide by this variation
+  Returntype : string
+  Exceptions : None
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub pep_allele_string {
+    my $self = shift;
+    
+    unless ($self->{_pep_allele_string}) {
+        
+        my @peptides = grep { defined } map { $_->peptide } @{ $self->get_all_TranscriptVariationAlleles };
+        
+        $self->{_pep_allele_string} = join '/', @peptides;
+    }
+    
+    return $self->{_pep_allele_string};
+}
+
+=head2 codons
+
+  Description: Return a '/' delimited string of all possible codon sequences.  
+               The variant sequence within the codon will be capitalised while 
+               the rest of the codon sequence will be in lower case
+  Returntype : string
+  Exceptions : None
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub codons {
+    my $self = shift;
+    
+    unless ($self->{_display_codon_allele_string}) {
+  
+        my @display_codons = grep { defined } map { $_->display_codon } @{ $self->get_all_TranscriptVariationAlleles }; 
+
+        $self->{_display_codon_allele_string} = join '/', @display_codons;
+    }
+
+    return $self->{_display_codon_allele_string};
+}
+
+=head2 codon_position
+
+  Description: For variations that fall in the CDS, returns the base in the
+               codon that this variation falls in
+  Returntype : int - 1, 2 or 3
+  Exceptions : None
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub codon_position {
+    my $self = shift;
+    
+    unless ($self->{_codon_position}) {
+
+        my $cdna_start = $self->cdna_start;
+        
+        my $tran_cdna_start = $self->transcript->cdna_coding_start;
+       
+        # we need to take into account the exon phase
+        my $exon_phase = $self->transcript->start_Exon->phase;
+
+        my $phase_offset = $exon_phase > 0 ? $exon_phase : 0;
+
+        if (defined $cdna_start && defined $tran_cdna_start) {
+            $self->{_codon_position} = (($cdna_start - $tran_cdna_start + $phase_offset) % 3) + 1;
+        }
+    }
+    
+    return $self->{_codon_position};
+}
+
+=head2 affects_cds
+
+  Description: Check if any of this TranscriptVariation's alleles lie within
+               the CDS of the Transcript
+  Returntype : boolean
+  Exceptions : None
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub affects_cds {
+    my $self = shift;
+    return scalar grep { within_cds($_) } @{ $self->get_all_alternate_TranscriptVariationAlleles }; 
+}
+
+=head2 affects_peptide
+
+  Description: Check if any of this TranscriptVariation's alleles change the
+               resultant peptide sequence
+  Returntype : boolean
+  Exceptions : None
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub affects_peptide {
+    my $self = shift;
+    return scalar grep { $_->SO_term =~ /stop|non_syn|frameshift|inframe|initiator/ } map {@{$_->get_all_OverlapConsequences}} @{ $self->get_all_alternate_TranscriptVariationAlleles }; 
+}
+
+
+sub _protein_function_predictions {
+    
+    my ($self, $analysis) = @_;
+
+    my $tran = $self->transcript;
+
+    my $matrix = $tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis};
+
+    unless ($matrix || exists($tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis})) {
+        my $pfpma = $self->{adaptor}->db->get_ProteinFunctionPredictionMatrixAdaptor;
+           
+        $matrix = $pfpma->fetch_by_analysis_translation_md5($analysis, $self->_translation_md5);
+
+        $tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis} = $matrix;
+    }
+
+    return $matrix; 
+}
+
+=head2 hgvs_genomic
+
+  Description: Return the strings representing the genomic-level effect of each of the alleles 
+               of this variation in HGVS format
+  Returntype : hashref where the key is the allele sequence and then value is the HGVS string 
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub hgvs_genomic {
+    return _hgvs_generic(@_,'genomic');
+}
+
+=head2 hgvs_coding
+
+  Description: Return the strings representing the CDS-level effect of each of the alleles 
+               of this variation in HGVS format
+  Returntype : hashref where the key is the allele sequence and then value is the HGVS string 
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub hgvs_coding {
+    deprecate('HGVS coding support has been moved to hgvs_transcript. This method will be removed in the next release.');
+    return _hgvs_generic(@_,'transcript');
+}
+
+=head2 hgvs_transcript
+
+  Description: Return the strings representing the CDS-level effect of each of the alleles 
+               of this variation in HGVS format
+  Returntype : hashref where the key is the allele sequence and then value is the HGVS string 
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub hgvs_transcript {
+    return _hgvs_generic(@_,'transcript');
+}
+
+=head2 hgvs_protein
+
+  Description: Return the strings representing the protein-level effect of each of the alleles 
+               of this variation in HGVS format
+  Returntype : hashref where the key is the allele sequence and then value is the HGVS string 
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub hgvs_protein {
+    return _hgvs_generic(@_,'protein');
+}
+
+=head 
+
+# We haven't implemented support for these methods yet
+
+sub hgvs_rna {
+    return _hgvs_generic(@_,'rna');
+}
+
+sub hgvs_mitochondrial {
+    return _hgvs_generic(@_,'mitochondrial');
+}
+=cut
+
+sub _hgvs_generic {
+    my $self = shift;
+    my $reference = pop;
+    my $hgvs = shift;
+    
+    #ÊThe rna and mitochondrial modes have not yet been implemented, so return undef in case we get a call to these
+    return undef if ($reference =~ m/rna|mitochondrial/);
+    
+    # The HGVS subroutine
+    my $sub = qq{hgvs_$reference};
+    
+    # Loop over the TranscriptVariationAllele objects associated with this TranscriptVariation
+    foreach my $tv_allele (@{ $self->get_all_alternate_TranscriptVariationAlleles }) {
+        
+        #ÊIf an HGVS hash was supplied and the allele exists as key, set the HGVS notation for this allele
+        if (defined($hgvs)) {
+            my $notation = $hgvs->{$tv_allele->variation_feature_seq()};
+            $tv_allele->$sub($notation) if defined $notation;
+        }
+        # Else, add the HGVS notation for this allele to the HGVS hash
+        else {
+	    $hgvs->{$tv_allele->variation_feature_seq()} = $tv_allele->$sub();
+        }
+    }
+    
+    return $hgvs;
+}
+
+sub _prefetch_for_vep {
+    my $self = shift;
+    
+    $self->cdna_coords;
+    $self->cds_coords;
+    $self->translation_coords;
+    $self->pep_allele_string;
+}
+
+
+1;