view variant_effect_predictor/Bio/EnsEMBL/Variation/TranscriptVariation.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

 Copyright (c) 1999-2012 The European Bioinformatics Institute and
 Genome Research Limited.  All rights reserved.

 This software is distributed under a modified Apache license.
 For license details, please see

   http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

 Please email comments or questions to the public Ensembl
 developers list at <dev@ensembl.org>.

 Questions may also be sent to the Ensembl help desk at
 <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::Variation::TranscriptVariation

=head1 SYNOPSIS

    use Bio::EnsEMBL::Variation::TranscriptVariation;

    my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(
        -transcript        => $transcript,
        -variation_feature => $var_feat
    );

    print "consequence type: ", (join ",", @{$tv->consequence_type}), "\n";
    print "cdna coords: ", $tv->cdna_start, '-', $tv->cdna_end, "\n";
    print "cds coords: ", $tv->cds_start, '-', $tv->cds_end, "\n";
    print "pep coords: ", $tv->translation_start, '-',$tv->translation_end, "\n";
    print "amino acid change: ", $tv->pep_allele_string, "\n";
    print "codon change: ", $tv->codons, "\n";
    print "allele sequences: ", (join ",", map { $_->variation_feature_seq } 
        @{ $tv->get_all_TranscriptVariationAlleles }, "\n";

=head1 DESCRIPTION

A TranscriptVariation object represents a variation feature which is in close
proximity to an Ensembl transcript. A TranscriptVariation object has several
attributes which define the relationship of the variation to the transcript.

=cut

package Bio::EnsEMBL::Variation::TranscriptVariation;

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
use Bio::EnsEMBL::Variation::TranscriptVariationAllele;
use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap within_cds);
use Bio::EnsEMBL::Variation::BaseTranscriptVariation;

use base qw(Bio::EnsEMBL::Variation::BaseTranscriptVariation Bio::EnsEMBL::Variation::VariationFeatureOverlap);

=head2 new

  Arg [-TRANSCRIPT] : 
    The Bio::EnsEMBL::Transcript associated with the given VariationFeature

  Arg [-VARIATION_FEATURE] :
    The Bio::EnsEMBL::VariationFeature associated with the given Transcript

  Arg [-ADAPTOR] :
    A Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor

  Arg [-DISAMBIGUATE_SINGLE_NUCLEOTIDE_ALLELES] :
    A flag indiciating if ambiguous single nucleotide alleles should be disambiguated
    when constructing the TranscriptVariationAllele objects, e.g. a Variationfeature
    with an allele string like 'T/M' would be treated as if it were 'T/A/C'. We limit
    ourselves to single nucleotide alleles to avoid the combinatorial explosion if we
    allowed longer alleles with potentially many ambiguous bases.

  Example : 
    my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(
        -transcript        => $transcript,
        -variation_feature => $var_feat
    );

  Description: Constructs a new TranscriptVariation instance given a VariationFeature
               and a Transcript, most of the work is done in the VariationFeatureOverlap
               superclass - see there for more documentation.
  Returntype : A new Bio::EnsEMBL::Variation::TranscriptVariation instance 
  Exceptions : throws unless both VARIATION_FEATURE and TRANSCRIPT are supplied
  Status     : At Risk

=cut 

sub new {
    my $class = shift;

    my %args = @_;

    # swap a '-transcript' argument for a '-feature' one for the superclass

    for my $arg (keys %args) {
        if (lc($arg) eq '-transcript') {
            $args{'-feature'} = delete $args{$arg};
        }
    }

    # call the superclass constructor
    my $self = $class->SUPER::new(%args) || return undef;

    # rebless the alleles from vfoas to tvas
    map { bless $_, 'Bio::EnsEMBL::Variation::TranscriptVariationAllele' } 
        @{ $self->get_all_TranscriptVariationAlleles };
    
    return $self;
}

sub get_TranscriptVariationAllele_for_allele_seq {
    my ($self, $allele_seq) = @_;
    return $self->SUPER::get_VariationFeatureOverlapAllele_for_allele_seq($allele_seq);
}

=head2 add_TranscriptVariationAllele

  Arg [1]    : A Bio::EnsEMBL::Variation::TranscriptVariationAllele instance
  Description: Add an allele to this TranscriptVariation
  Returntype : none
  Exceptions : throws if the argument is not the expected type
  Status     : At Risk

=cut

sub add_TranscriptVariationAllele {
    my ($self, $tva) = @_;
    assert_ref($tva, 'Bio::EnsEMBL::Variation::TranscriptVariationAllele');
    return $self->SUPER::add_VariationFeatureOverlapAllele($tva);
}

=head2 get_reference_TranscriptVariationAllele

  Description: Get the object representing the reference allele of this TranscriptVariation
  Returntype : Bio::EnsEMBL::Variation::TranscriptVariationAllele instance
  Exceptions : none
  Status     : At Risk

=cut

sub get_reference_TranscriptVariationAllele {
    my $self = shift;
    return $self->SUPER::get_reference_VariationFeatureOverlapAllele(@_);
}

=head2 get_all_alternate_TranscriptVariationAlleles

  Description: Get a list of the alternate alleles of this TranscriptVariation
  Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariationAllele objects
  Exceptions : none
  Status     : At Risk

=cut

sub get_all_alternate_TranscriptVariationAlleles {
    my $self = shift;
    return $self->SUPER::get_all_alternate_VariationFeatureOverlapAlleles(@_);
}

=head2 get_all_TranscriptVariationAlleles

  Description: Get a list of the all the alleles, both reference and alternate, of 
               this TranscriptVariation
  Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariationAllele objects
  Exceptions : none
  Status     : At Risk

=cut

sub get_all_TranscriptVariationAlleles {
    my $self = shift;
    return $self->SUPER::get_all_VariationFeatureOverlapAlleles(@_);
}


=head2 cdna_allele_string

  Description: Return a '/' delimited string of the alleles of this variation with respect
               to the associated transcript
  Returntype : string
  Exceptions : None
  Caller     : general
  Status     : Stable

=cut

sub cdna_allele_string {
    my $self = shift;
    
    unless ($self->{_cdna_allele_string}) {
        $self->{_cdna_allele_string} = join '/', map { $_->feature_seq } @{ $self->get_all_TranscriptVariationAlleles };
    }
    
    return $self->{_cdna_allele_string};
}

=head2 pep_allele_string

  Description: Return a '/' delimited string of amino acid codes representing
               all the possible changes made to the peptide by this variation
  Returntype : string
  Exceptions : None
  Caller     : general
  Status     : Stable

=cut

sub pep_allele_string {
    my $self = shift;
    
    unless ($self->{_pep_allele_string}) {
        
        my @peptides = grep { defined } map { $_->peptide } @{ $self->get_all_TranscriptVariationAlleles };
        
        $self->{_pep_allele_string} = join '/', @peptides;
    }
    
    return $self->{_pep_allele_string};
}

=head2 codons

  Description: Return a '/' delimited string of all possible codon sequences.  
               The variant sequence within the codon will be capitalised while 
               the rest of the codon sequence will be in lower case
  Returntype : string
  Exceptions : None
  Caller     : general
  Status     : Stable

=cut

sub codons {
    my $self = shift;
    
    unless ($self->{_display_codon_allele_string}) {
  
        my @display_codons = grep { defined } map { $_->display_codon } @{ $self->get_all_TranscriptVariationAlleles }; 

        $self->{_display_codon_allele_string} = join '/', @display_codons;
    }

    return $self->{_display_codon_allele_string};
}

=head2 codon_position

  Description: For variations that fall in the CDS, returns the base in the
               codon that this variation falls in
  Returntype : int - 1, 2 or 3
  Exceptions : None
  Caller     : general
  Status     : At Risk

=cut

sub codon_position {
    my $self = shift;
    
    unless ($self->{_codon_position}) {

        my $cdna_start = $self->cdna_start;
        
        my $tran_cdna_start = $self->transcript->cdna_coding_start;
       
        # we need to take into account the exon phase
        my $exon_phase = $self->transcript->start_Exon->phase;

        my $phase_offset = $exon_phase > 0 ? $exon_phase : 0;

        if (defined $cdna_start && defined $tran_cdna_start) {
            $self->{_codon_position} = (($cdna_start - $tran_cdna_start + $phase_offset) % 3) + 1;
        }
    }
    
    return $self->{_codon_position};
}

=head2 affects_cds

  Description: Check if any of this TranscriptVariation's alleles lie within
               the CDS of the Transcript
  Returntype : boolean
  Exceptions : None
  Caller     : general
  Status     : At Risk

=cut

sub affects_cds {
    my $self = shift;
    return scalar grep { within_cds($_) } @{ $self->get_all_alternate_TranscriptVariationAlleles }; 
}

=head2 affects_peptide

  Description: Check if any of this TranscriptVariation's alleles change the
               resultant peptide sequence
  Returntype : boolean
  Exceptions : None
  Caller     : general
  Status     : At Risk

=cut

sub affects_peptide {
    my $self = shift;
    return scalar grep { $_->SO_term =~ /stop|non_syn|frameshift|inframe|initiator/ } map {@{$_->get_all_OverlapConsequences}} @{ $self->get_all_alternate_TranscriptVariationAlleles }; 
}


sub _protein_function_predictions {
    
    my ($self, $analysis) = @_;

    my $tran = $self->transcript;

    my $matrix = $tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis};

    unless ($matrix || exists($tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis})) {
        my $pfpma = $self->{adaptor}->db->get_ProteinFunctionPredictionMatrixAdaptor;
           
        $matrix = $pfpma->fetch_by_analysis_translation_md5($analysis, $self->_translation_md5);

        $tran->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis} = $matrix;
    }

    return $matrix; 
}

=head2 hgvs_genomic

  Description: Return the strings representing the genomic-level effect of each of the alleles 
               of this variation in HGVS format
  Returntype : hashref where the key is the allele sequence and then value is the HGVS string 
  Exceptions : none
  Status     : At Risk

=cut

sub hgvs_genomic {
    return _hgvs_generic(@_,'genomic');
}

=head2 hgvs_coding

  Description: Return the strings representing the CDS-level effect of each of the alleles 
               of this variation in HGVS format
  Returntype : hashref where the key is the allele sequence and then value is the HGVS string 
  Exceptions : none
  Status     : At Risk

=cut

sub hgvs_coding {
    deprecate('HGVS coding support has been moved to hgvs_transcript. This method will be removed in the next release.');
    return _hgvs_generic(@_,'transcript');
}

=head2 hgvs_transcript

  Description: Return the strings representing the CDS-level effect of each of the alleles 
               of this variation in HGVS format
  Returntype : hashref where the key is the allele sequence and then value is the HGVS string 
  Exceptions : none
  Status     : At Risk

=cut

sub hgvs_transcript {
    return _hgvs_generic(@_,'transcript');
}

=head2 hgvs_protein

  Description: Return the strings representing the protein-level effect of each of the alleles 
               of this variation in HGVS format
  Returntype : hashref where the key is the allele sequence and then value is the HGVS string 
  Exceptions : none
  Status     : At Risk

=cut

sub hgvs_protein {
    return _hgvs_generic(@_,'protein');
}

=head 

# We haven't implemented support for these methods yet

sub hgvs_rna {
    return _hgvs_generic(@_,'rna');
}

sub hgvs_mitochondrial {
    return _hgvs_generic(@_,'mitochondrial');
}
=cut

sub _hgvs_generic {
    my $self = shift;
    my $reference = pop;
    my $hgvs = shift;
    
    #ÊThe rna and mitochondrial modes have not yet been implemented, so return undef in case we get a call to these
    return undef if ($reference =~ m/rna|mitochondrial/);
    
    # The HGVS subroutine
    my $sub = qq{hgvs_$reference};
    
    # Loop over the TranscriptVariationAllele objects associated with this TranscriptVariation
    foreach my $tv_allele (@{ $self->get_all_alternate_TranscriptVariationAlleles }) {
        
        #ÊIf an HGVS hash was supplied and the allele exists as key, set the HGVS notation for this allele
        if (defined($hgvs)) {
            my $notation = $hgvs->{$tv_allele->variation_feature_seq()};
            $tv_allele->$sub($notation) if defined $notation;
        }
        # Else, add the HGVS notation for this allele to the HGVS hash
        else {
	    $hgvs->{$tv_allele->variation_feature_seq()} = $tv_allele->$sub();
        }
    }
    
    return $hgvs;
}

sub _prefetch_for_vep {
    my $self = shift;
    
    $self->cdna_coords;
    $self->cds_coords;
    $self->translation_coords;
    $self->pep_allele_string;
}


1;