view variant_effect_predictor/Bio/EnsEMBL/Variation/TranscriptVariationAllele.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

 Copyright (c) 1999-2012 The European Bioinformatics Institute and
 Genome Research Limited.  All rights reserved.

 This software is distributed under a modified Apache license.
 For license details, please see

   http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

 Please email comments or questions to the public Ensembl
 developers list at <dev@ensembl.org>.

 Questions may also be sent to the Ensembl help desk at
 <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::Variation::TranscriptVariationAllele

=head1 SYNOPSIS

    use Bio::EnsEMBL::Variation::TranscriptVariationAllele;
    
    my $tva = Bio::EnsEMBL::Variation::TranscriptVariationAllele->new(
        -transcript_variation   => $tv,
        -variation_feature_seq  => 'A',
        -is_reference           => 0,
    );

    print "sequence with respect to the transcript: ", $tva->feature_seq, "\n";
    print "sequence with respect to the variation feature: ", $tva->variation_feature_seq, "\n";
    print "consequence SO terms: ", (join ",", map { $_->SO_term } @{ $tva->get_all_OverlapConsequences }), "\n";
    print "amino acid change: ", $tva->peptide_allele_string, "\n";
    print "resulting codon: ", $tva->codon, "\n";
    print "reference codon: ", $tva->transcript_variation->get_reference_TranscriptVariationAllele->codon, "\n";
    print "PolyPhen prediction: ", $tva->polyphen_prediction, "\n";
    print "SIFT prediction: ", $tva->sift_prediction, "\n";

=head1 DESCRIPTION

A TranscriptVariationAllele object represents a single allele of a TranscriptVariation.
It provides methods that are specific to the sequence of the allele, such as codon,
peptide etc. Methods that depend only on position (e.g. CDS start) will be found in 
the associated TranscriptVariation. Ordinarily you will not create these objects 
yourself, but instead you would create a TranscriptVariation object which will then 
construct TranscriptVariationAlleles based on the allele string of the associated
VariationFeature. 

Note that any methods that are not specific to Transcripts will be found in the 
VariationFeatureOverlapAllele superclass.

=cut

package Bio::EnsEMBL::Variation::TranscriptVariationAllele;

use strict;
use warnings;

use Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix qw($AA_LOOKUP);
use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate);
use Bio::EnsEMBL::Variation::Utils::Sequence qw(hgvs_variant_notation format_hgvs_string);
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(within_cds within_intron stop_lost affects_start_codon);

use base qw(Bio::EnsEMBL::Variation::VariationFeatureOverlapAllele Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele);


our $DEBUG = 0;

sub new_fast {
    my ($class, $hashref) = @_;
    
    # swap a transcript_variation argument for a variation_feature_overlap one

    if ($hashref->{transcript_variation}) {
        $hashref->{variation_feature_overlap} = delete $hashref->{transcript_variation};
    }
    
    # and call the superclass

    return $class->SUPER::new_fast($hashref);
}

=head2 transcript_variation

  Description: Get/set the associated TranscriptVariation
  Returntype : Bio::EnsEMBL::Variation::TranscriptVariation
  Exceptions : throws if the argument is the wrong type
  Status     : At Risk

=cut

sub transcript_variation {
    my ($self, $tv) = @_;
    assert_ref($tv, 'Bio::EnsEMBL::Variation::TranscriptVariation') if $tv;
    return $self->variation_feature_overlap($tv);
}

=head2 variation_feature

  Description: Get the associated VariationFeature
  Returntype : Bio::EnsEMBL::Variation::VariationFeature
  Exceptions : none
  Status     : At Risk

=cut

sub variation_feature {
    my $self = shift;
    return $self->transcript_variation->variation_feature;
}

=head2 pep_allele_string

  Description: Return a '/' delimited string of the reference peptide and the 
               peptide resulting from this allele, or a single peptide if this
               allele does not change the peptide (e.g. because it is synonymous)
  Returntype : string or undef if this allele is not in the CDS
  Exceptions : none
  Status     : At Risk

=cut

sub pep_allele_string {
    my ($self) = @_;
    
    my $pep = $self->peptide;
    
    return undef unless $pep;
    
    my $ref_pep = $self->transcript_variation->get_reference_TranscriptVariationAllele->peptide;
    
    return undef unless $ref_pep;
    
    return $ref_pep ne $pep ? $ref_pep.'/'.$pep : $pep;
}

=head2 codon_allele_string

  Description: Return a '/' delimited string of the reference codon and the 
               codon resulting from this allele 
  Returntype : string or undef if this allele is not in the CDS
  Exceptions : none
  Status     : At Risk

=cut

sub codon_allele_string {
    my ($self) = @_;
    
    my $codon = $self->codon;
    
    return undef unless $codon;
    
    my $ref_codon = $self->transcript_variation->get_reference_TranscriptVariationAllele->codon;
    
    return $ref_codon.'/'.$codon;
}

=head2 display_codon_allele_string

  Description: Return a '/' delimited string of the reference display_codon and the 
               display_codon resulting from this allele. The display_codon identifies
               the nucleotides affected by this variant in UPPER CASE and other 
               nucleotides in lower case
  Returntype : string or undef if this allele is not in the CDS
  Exceptions : none
  Status     : At Risk

=cut

sub display_codon_allele_string {
    my ($self) = @_;
    
    my $display_codon = $self->display_codon;
    
    return undef unless $display_codon;
    
    my $ref_display_codon = $self->transcript_variation->get_reference_TranscriptVariationAllele->display_codon;
    
    return undef unless $ref_display_codon;
    
    return $ref_display_codon.'/'.$display_codon;
}

=head2 peptide

  Description: Return the amino acid sequence that this allele is predicted to result in
  Returntype : string or undef if this allele is not in the CDS or is a frameshift
  Exceptions : none
  Status     : At Risk

=cut

sub peptide {
    my ($self, $peptide) = @_;
    
    $self->{peptide} = $peptide if $peptide;
    
    unless ($self->{peptide}) {

        return undef unless $self->seq_is_unambiguous_dna;
        
        if (my $codon = $self->codon) {
            
            # the codon method can set the peptide in some circumstances 
            # so check here before we try an (expensive) translation
            return $self->{peptide} if $self->{peptide};
            
            # translate the codon sequence to establish the peptide allele
            
            # allow for partial codons - split the sequence into whole and partial
            # e.g. AAAGG split into AAA and GG            
            my $whole_codon   = substr($codon, 0, int(length($codon) / 3) * 3);
            my $partial_codon = substr($codon, int(length($codon) / 3) * 3);
            
            my $pep = '';
            
            if($whole_codon) {
                # for mithocondrial dna we need to to use a different codon table
                my $codon_table = $self->transcript_variation->_codon_table;
                
                my $codon_seq = Bio::Seq->new(
                    -seq        => $whole_codon,
                    -moltype    => 'dna',
                    -alphabet   => 'dna',
                );
                
                $pep .= $codon_seq->translate(undef, undef, undef, $codon_table)->seq;
            }
            
            if($partial_codon) {
                $pep .= 'X';
            }
            
            $pep ||= '-';
           
            $self->{peptide} = $pep;
        }
    }
    
    return $self->{peptide};
}

=head2 codon

  Description: Return the codon sequence that this allele is predicted to result in
  Returntype : string or undef if this allele is not in the CDS or is a frameshift
  Exceptions : none
  Status     : At Risk

=cut

sub codon {
    my ($self, $codon) = @_;
    
    $self->{codon} = $codon if defined $codon;
    
    my $tv = $self->transcript_variation;      
    
    return undef unless $tv->translation_start && $tv->translation_end;
   
    return undef unless $self->seq_is_dna;
    
    unless ($self->{codon}) {
      
        # try to calculate the codon sequence
    
        my $seq = $self->feature_seq;
        
        $seq = '' if $seq eq '-';
        
        # calculate necessary coords and lengths
        
        my $codon_cds_start = $tv->translation_start * 3 - 2;
        my $codon_cds_end   = $tv->translation_end * 3;
        my $codon_len       = $codon_cds_end - $codon_cds_start + 1;
        my $vf_nt_len       = $tv->cds_end - $tv->cds_start + 1;
        my $allele_len      = $self->seq_length;
        
        if ($allele_len != $vf_nt_len) {
            if (abs($allele_len - $vf_nt_len) % 3) {
                # this is a frameshift variation, we don't attempt to 
                # calculate the resulting codon or peptide change as this 
                # could get quite complicated 
                return undef;
            }
        }

        # splice the allele sequence into the CDS
        
        my $cds = $tv->_translateable_seq;
    
        substr($cds, $tv->cds_start-1, $vf_nt_len) = $seq;
        
        # and extract the codon sequence
        
        my $codon = substr($cds, $codon_cds_start-1, $codon_len + ($allele_len - $vf_nt_len));
        
        if (length($codon) < 1) {
            $self->{codon}   = '-';
            $self->{peptide} = '-';
        }
        else {
             $self->{codon} = $codon;
        }
    }
    
    return $self->{codon};
}

=head2 display_codon

  Description: Return the codon sequence that this allele is predicted to result in
               with the affected nucleotides identified in UPPER CASE and other 
               nucleotides in lower case
  Returntype : string or undef if this allele is not in the CDS or is a frameshift
  Exceptions : none
  Status     : At Risk

=cut

sub display_codon {
    my $self = shift;

    unless ($self->{_display_codon}) {

        if ($self->codon && defined $self->transcript_variation->codon_position) {
            
            my $display_codon = lc $self->codon;

            # if this allele is an indel then just return all lowercase
            
            if ($self->feature_seq ne '-') {
                
                # codon_position is 1-based, while substr assumes the string starts at 0
                
                my $pos = $self->transcript_variation->codon_position - 1;

                my $len = length $self->feature_seq;

                substr($display_codon, $pos, $len) = uc substr($display_codon, $pos, $len);
            }

            $self->{_display_codon} = $display_codon;
        }
    }

    return $self->{_display_codon};
}

=head2 polyphen_prediction

  Description: Return the qualitative PolyPhen-2 prediction for the effect of this allele.
               (Note that we currently only have PolyPhen predictions for variants that 
               result in single amino acid substitutions in human)
  Returntype : string (one of 'probably damaging', 'possibly damaging', 'benign', 'unknown')
               if this is a non-synonymous mutation and a prediction is available, undef
               otherwise
  Exceptions : none
  Status     : At Risk

=cut

sub polyphen_prediction {
    my ($self, $classifier, $polyphen_prediction) = @_;
    
    $classifier ||= 'humvar';
    
    my $analysis = "polyphen_${classifier}";
    
    $self->{$analysis}->{prediction} = $polyphen_prediction if $polyphen_prediction;
    
    unless ($self->{$analysis}->{prediction}) {
        my ($prediction, $score) = $self->_protein_function_prediction($analysis);
        $self->{$analysis}->{score} = $score;
        $self->{$analysis}->{prediction} = $prediction;
    }
    
    return $self->{$analysis}->{prediction};
}

=head2 polyphen_score

  Description: Return the PolyPhen-2 probability that this allele is deleterious (Note that we 
               currently only have PolyPhen predictions for variants that result in single 
               amino acid substitutions in human)
  Returntype : float between 0 and 1 if this is a non-synonymous mutation and a prediction is 
               available, undef otherwise
  Exceptions : none
  Status     : At Risk

=cut

sub polyphen_score {
    my ($self, $classifier, $polyphen_score) = @_;
    
    $classifier ||= 'humvar';

    my $analysis = "polyphen_${classifier}";
    
    $self->{$analysis}->{score} = $polyphen_score if defined $polyphen_score;

    unless ($self->{$analysis}->{score}) {
        my ($prediction, $score) = $self->_protein_function_prediction($analysis);
        $self->{$analysis}->{score} = $score;
        $self->{$analysis}->{prediction} = $prediction;
    }

    return $self->{$analysis}->{score};
}

=head2 sift_prediction

  Description: Return the qualitative SIFT prediction for the effect of this allele.
               (Note that we currently only have SIFT predictions for variants that 
               result in single amino acid substitutions in human)
  Returntype : string (one of 'tolerated', 'deleterious') if this is a non-synonymous 
               mutation and a prediction is available, undef otherwise
  Exceptions : none
  Status     : At Risk

=cut

sub sift_prediction {
    my ($self, $sift_prediction) = @_;
    
    $self->{sift_prediction} = $sift_prediction if $sift_prediction;
    
    unless ($self->{sift_prediction}) {
        my ($prediction, $score) = $self->_protein_function_prediction('sift');
        $self->{sift_score} = $score;
        $self->{sift_prediction} = $prediction unless $self->{sift_prediction};
    }
    
    return $self->{sift_prediction};
}

=head2 sift_score

  Description: Return the SIFT score for this allele (Note that we currently only have SIFT 
               predictions for variants that result in single amino acid substitutions in human)
  Returntype : float between 0 and 1 if this is a non-synonymous mutation and a prediction is 
               available, undef otherwise
  Exceptions : none
  Status     : At Risk

=cut

sub sift_score {
    my ($self, $sift_score) = @_;

    $self->{sift_score} = $sift_score if defined $sift_score;

    unless ($self->{sift_score}) {
        my ($prediction, $score) = $self->_protein_function_prediction('sift');
        $self->{sift_score} = $score;
        $self->{sift_prediction} = $prediction;
    }

    return $self->{sift_score};
}


sub _protein_function_prediction {
    my ($self, $analysis) = @_;

    # we can only get results for variants that cause a single amino acid substitution, 
    # so check the peptide allele string first

    if ($self->pep_allele_string && $self->pep_allele_string =~ /^[A-Z]\/[A-Z]$/ && defined $AA_LOOKUP->{$self->peptide}) {
        
        if (my $matrix = $self->transcript_variation->_protein_function_predictions($analysis)) {
            
            my ($prediction, $score) = $matrix->get_prediction(
                $self->transcript_variation->translation_start,
                $self->peptide,
            );

            return wantarray ? ($prediction, $score) : $prediction;
        }
    }
    
    return undef;
}

=head2 hgvs_genomic

  Description: Return a string representing the genomic-level effect of this allele in HGVS format
  Returntype : string 
  Exceptions : none
  Status     : At Risk

=cut

sub hgvs_genomic {
    return _hgvs_generic(@_,'genomic');
}

=head2 hgvs_coding

  Description: Return a string representing the CDS-level effect of this allele in HGVS format
  Returntype : string or undef if this allele is not in the CDS 
  Exceptions : none
  Status     : At Risk

=cut

sub hgvs_coding {

    deprecate('HGVS coding support has been moved to hgvs_transcript. This method will be removed in the next release.');
    return hgvs_transcript(@_);
}


=head2 hgvs_transcript
    
  Description: Return a string representing the CDS-level effect of this allele in HGVS format
  Returntype : string or undef if this allele is not in the CDS
  Exceptions : none
  Status     : At Risk
    
=cut
    
    
sub hgvs_transcript {


    my $self = shift;
    my $notation = shift;   

    ##### set if string supplied
    $self->{hgvs_transcript} = $notation   if defined $notation;
    ##### return if held 
    return $self->{hgvs_transcript}        if defined $self->{hgvs_transcript} ;
    return $self->{hgvs_coding}            if defined $self->{hgvs_coding} ;

    my $variation_feature_sequence  = $self->variation_feature_seq() ;

    ### don't try to handle odd characters
    return undef if $variation_feature_sequence =~ m/[^ACGT\-]/ig;

    ### no result for reference allele
    return undef if $self->is_reference ==1; 

    ### else evaluate
       
    ### get reference sequence strand
    my $refseq_strand = $self->transcript_variation->transcript->strand();

    if($DEBUG ==1){
    my $var_name = $self->transcript_variation->variation_feature->variation_name();
    print "\nHGVS transcript: Checking $var_name refseq strand => $refseq_strand seq name : " . $self->transcript_variation->transcript_stable_id() . " var strand " . $self->transcript_variation->variation_feature->strand() . " vf st " . $self->variation_feature->strand()  ." seqname: " . $self->variation_feature->seqname()  ." seq: " . $self->variation_feature_seq ."\n";
    }

    my $hgvs_notation ; ### store components of HGVS string in hash
   
    ### vf strand is relative to transcript or transcript slice
    if( $self->transcript_variation->variation_feature->strand() <0 && $refseq_strand >0 ||
    $self->transcript_variation->variation_feature->strand() >0 && $refseq_strand < 0
    ){    
    reverse_comp(\$variation_feature_sequence);
    }
        
    ### need to get ref seq from slice transcript is on for intron labelling    
    my ($slice_start, $slice_end, $slice) = $self->_var2transcript_slice_coords();
    
   
    unless($slice_start){
    #warn "TVA: VF not within transcript - no HGVS\n";
    return undef;
    }
        
    ### decide event type from HGVS nomenclature   
    $hgvs_notation = hgvs_variant_notation(  $variation_feature_sequence,    ### alt_allele,
                         $slice->seq(),                  ### using this to extract ref allele
                         $slice_start,
                         $slice_end
    );
                       
    ### This should not happen
    unless($hgvs_notation->{'type'}){
    #warn "Error - not continuing; no HGVS annotation\n";
    return undef;
    }    
  
    ### create reference name - transcript name & seq version 
    $hgvs_notation->{'ref_name'} =  $self->transcript_variation->transcript_stable_id() . "." . $self->transcript_variation->transcript->version();
  

    ### get position relative to transcript features [use HGVS coords not variation feature coords due to dups]
    $hgvs_notation->{start} = $self->_get_cDNA_position( $hgvs_notation->{start} );
    $hgvs_notation->{end}   = $self->_get_cDNA_position( $hgvs_notation->{end} );
    

 # Make sure that start is always less than end
    my ($exon_start_coord, $intron_start_offset) = $hgvs_notation->{start} =~ m/(\-?[0-9]+)\+?(\-?[0-9]+)?/;
    my ($exon_end_coord,   $intron_end_offset)   = $hgvs_notation->{end} =~ m/(\-?[0-9]+)\+?(\-?[0-9]+)?/;
    $intron_start_offset ||= 0;
    $intron_end_offset   ||= 0;

    ($hgvs_notation->{start},$hgvs_notation->{end}) = ($hgvs_notation->{end},$hgvs_notation->{start}) if 
    (($exon_start_coord + $intron_start_offset) > ($exon_end_coord + $intron_end_offset));



    if($self->transcript->cdna_coding_start()){
    $hgvs_notation->{'numbering'} = "c";  ### set 'c' if transcript is coding 
    }    
    else{
    $hgvs_notation->{'numbering'} = "n";  ### set 'n' if transcript non-coding 
    } 
    ### generic formatting 
    $self->{hgvs_transcript} =  format_hgvs_string( $hgvs_notation);

    if($DEBUG ==1){print "HGVS notation for var " . $self->transcript_variation->variation_feature->variation_name() . " from hgvs transcript : " . $self->{hgvs_transcript} . " \n";}
    
    return $self->{hgvs_transcript};
   
}


=head2 hgvs_protein

  Description: Return a string representing the protein-level effect of this allele in HGVS format
  Returntype : string or undef if this allele is not in the CDS 
  Exceptions : none
  Status     : At Risk

=cut

sub hgvs_protein {
    
    my $self     = shift;
    my $notation = shift;  
    my $hgvs_notation; 
    
    if($DEBUG ==1){print "\nStarting hgvs_protein with ".  $self->transcript_variation->variation_feature->variation_name() . "\n"; }

    ### set if string supplied
    $self->{hgvs_protein} = $notation  if defined $notation;
    
    ### return if set
    return $self->{hgvs_protein}       if defined $self->{hgvs_protein} ;
    
    ### don't try to handle odd characters
    return undef if $self->variation_feature_seq() =~ m/[^ACGT\-]/ig;

    #### else, check viable input and create notation
    return if $self->is_reference();

    unless ($self->transcript_variation->translation_start() && $self->transcript_variation->translation_end()){   
        return undef ;
    }

    #### for debug
    #my $var_name = $self->transcript_variation->variation_feature->variation_name();   
    
    ### get reference sequence [add seq version to transcript name]
    $hgvs_notation->{ref_name} = $self->transcript_variation->transcript->translation()->display_id() . "." . $self->transcript_variation->transcript->translation()->version();

    $hgvs_notation->{'numbering'} = 'p';

    ### get default reference location [changed later in some cases eg. duplication]
    $hgvs_notation->{start}   = $self->transcript_variation->translation_start();
    $hgvs_notation->{end}     = $self->transcript_variation->translation_end();  


    ## get default reference & alt peptides  [changed later to hgvs format]
    $hgvs_notation->{alt} = $self->peptide;
    $hgvs_notation->{ref} = $self->transcript_variation->get_reference_TranscriptVariationAllele->peptide;    

    if(defined $hgvs_notation->{alt} && defined $hgvs_notation->{ref}){
        $hgvs_notation = _clip_alleles( $hgvs_notation);
    }

    #### define type - types are different for protein numbering
    $hgvs_notation  = $self->_get_hgvs_protein_type($hgvs_notation);

    ##### Convert ref & alt peptides taking into account HGVS rules
    $hgvs_notation = $self->_get_hgvs_peptides($hgvs_notation);

    ##### String formatting
    $self->{hgvs_protein}  =  $self->_get_hgvs_protein_format($hgvs_notation);
    return $self->{hgvs_protein} ;   
}

### HGVS: format protein string
sub _get_hgvs_protein_format{

    my $self          = shift;
    my $hgvs_notation = shift;

    if ((defined  $hgvs_notation->{ref} && defined $hgvs_notation->{alt} && 
     $hgvs_notation->{ref} eq $hgvs_notation->{alt}) || 
      (defined  $hgvs_notation->{type} && $hgvs_notation->{type} eq "=")){

        ### no protein change - return transcript nomenclature with flag for neutral protein consequence
        $hgvs_notation->{'hgvs'} = $self->hgvs_transcript() . "(p.=)";
        return $hgvs_notation->{'hgvs'} ;
    }

    ### all start with refseq name & numbering type
    $hgvs_notation->{'hgvs'} = $hgvs_notation->{'ref_name'} . ":" . $hgvs_notation->{'numbering'} . ".";    

    ### handle stop_lost seperately regardless of cause by del/delins => p.XposAA1extnum_AA_to_stop
    if(stop_lost($self)){  ### if deletion of stop add extX and number of new aa to alt
        $hgvs_notation->{alt} = substr($hgvs_notation->{alt}, 0, 3);
        if($hgvs_notation->{type} eq "del"){
            my $aa_til_stop =  _stop_loss_extra_AA($self,$hgvs_notation->{start}-1, "del");
            if(defined  $aa_til_stop){
                $hgvs_notation->{alt} .=  "extX" . $aa_til_stop;
            }
        }
        elsif($hgvs_notation->{type} eq ">"){
            my $aa_til_stop =  _stop_loss_extra_AA($self,$hgvs_notation->{start}-1, "subs");
            if(defined  $aa_til_stop){  
              $hgvs_notation->{alt} .=  "extX" . $aa_til_stop;
            }
        }
        else{
           # warn "TVA: stop loss for type $hgvs_notation->{type}  not caught \n";
        }
        $hgvs_notation->{'hgvs'} .=  $hgvs_notation->{ref} . $hgvs_notation->{start} .  $hgvs_notation->{alt} ;
    } 

    elsif( $hgvs_notation->{type} eq "dup"){

        if($hgvs_notation->{start} < $hgvs_notation->{end}){
            ### list only first and last peptides in long duplicated string
            my $ref_pep_first = substr($hgvs_notation->{alt}, 0, 3);
            my $ref_pep_last  = substr($hgvs_notation->{alt}, -3, 3);
            $hgvs_notation->{'hgvs'} .=  $ref_pep_first . $hgvs_notation->{start} .  "_" .  $ref_pep_last . $hgvs_notation->{end} ."dup";
        
        }
        else{
            $hgvs_notation->{'hgvs'} .=  $hgvs_notation->{alt} . $hgvs_notation->{start} .  "dup" ;
        }
        
        print "formating a dup  $hgvs_notation->{hgvs} \n" if $DEBUG==1;
    }

    elsif($hgvs_notation->{type} eq ">"){
        #### substitution

        $hgvs_notation->{'hgvs'}  .=   $hgvs_notation->{ref}. $hgvs_notation->{start} .  $hgvs_notation->{alt};
    }    

    elsif( $hgvs_notation->{type} eq "delins" || $hgvs_notation->{type} eq "ins" ){
    
        #### list first and last aa in reference only
        my $ref_pep_first = substr($hgvs_notation->{ref}, 0, 3);
        my $ref_pep_last;
        if(substr($hgvs_notation->{ref}, -1, 1) eq "X"){
            $ref_pep_last ="X";
        }
        else{
            $ref_pep_last  = substr($hgvs_notation->{ref}, -3, 3);
         }
         if($hgvs_notation->{ref} =~ /X$/){
            ### For stops & add extX & distance to next stop to alt pep
            my $aa_til_stop =  _stop_loss_extra_AA($self,$hgvs_notation->{start}-1, "loss");
            if(defined  $aa_til_stop){  
               $hgvs_notation->{alt} .="extX" . $aa_til_stop;
            }
         }
         if($hgvs_notation->{start} == $hgvs_notation->{end} && $hgvs_notation->{type} eq "delins"){       
             $hgvs_notation->{'hgvs'} .= $ref_pep_first . $hgvs_notation->{start} . $hgvs_notation->{end} . $hgvs_notation->{type} . $hgvs_notation->{alt} ;
         }
         else{        
             ### correct ordering if needed
             if($hgvs_notation->{start} > $hgvs_notation->{end}){       
                 ( $hgvs_notation->{start}, $hgvs_notation->{end}) = ($hgvs_notation->{end}, $hgvs_notation->{start} );
             }
        
             $hgvs_notation->{'hgvs'} .= $ref_pep_first . $hgvs_notation->{start}  . "_"  .  $ref_pep_last . $hgvs_notation->{end} . $hgvs_notation->{type} . $hgvs_notation->{alt} ;
         }
    }     
    
    elsif($hgvs_notation->{type} eq "fs"){
       if(defined $hgvs_notation->{alt} && $hgvs_notation->{alt} eq "X"){ ## stop gained
           $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start}  .  $hgvs_notation->{alt} ;

       }
       else{ ## not immediate stop - count aa until next

          my $aa_til_stop =  _stop_loss_extra_AA($self, $hgvs_notation->{start}-1, "fs");
          if($aa_til_stop ){
              ### use long form if new stop found 
             $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start}  .  $hgvs_notation->{alt} . "fsX$aa_til_stop"  ;
          }
          else{
            ### otherwise use short form 
            $hgvs_notation->{'hgvs'} .= $hgvs_notation->{ref} . $hgvs_notation->{start}  . "fs";
          }
       }
    }

    elsif( $hgvs_notation->{type} eq "del"){
        $hgvs_notation->{alt} =  "del"; 
        if( length($hgvs_notation->{ref}) >3 ){
            my $ref_pep_first = substr($hgvs_notation->{ref}, 0, 3);
            my $ref_pep_last  = substr($hgvs_notation->{ref}, -3, 3);
            $hgvs_notation->{'hgvs'} .=  $ref_pep_first . $hgvs_notation->{start} .  "_" .  $ref_pep_last . $hgvs_notation->{end} .$hgvs_notation->{alt} ;
        }
        else{
            $hgvs_notation->{'hgvs'} .=  $hgvs_notation->{ref} . $hgvs_notation->{start} .  $hgvs_notation->{alt} ;
        }       
    }

    elsif($hgvs_notation->{start} ne $hgvs_notation->{end} ){
        $hgvs_notation->{'hgvs'}  .=  $hgvs_notation->{ref} . $hgvs_notation->{start}  . "_" .  $hgvs_notation->{alt} . $hgvs_notation->{end} ;
    }

    else{
    #### default to substitution    
        $hgvs_notation->{'hgvs'}  .=   $hgvs_notation->{ref}. $hgvs_notation->{start} .  $hgvs_notation->{alt};
    }

    if($DEBUG==1){ print "Returning protein format: $hgvs_notation->{'hgvs'}\n";}
    return $hgvs_notation->{'hgvs'};
}

### HGVS: get type of variation event in protein terms
sub _get_hgvs_protein_type{

    my $self = shift;
    my $hgvs_notation = shift;
   
    ### get allele length
    my ($ref_length, $alt_length ) = $self->_get_allele_length();    


    if( defined $hgvs_notation->{ref} && defined $hgvs_notation->{alt} ){
    ### Run type checks on peptides if available

      if($hgvs_notation->{alt} eq $hgvs_notation->{ref} ){
          #### synonymous indicated by =
          $hgvs_notation->{type} = "=";
      }
      elsif( length($hgvs_notation->{ref}) ==1 && length($hgvs_notation->{alt}) ==1 ) {
 
          $hgvs_notation->{type} = ">";
      }
      elsif($hgvs_notation->{ref} eq "-" || $hgvs_notation->{ref} eq "") {

          $hgvs_notation->{type} = "ins";
      }
      elsif($hgvs_notation->{alt} eq "" ) {

          $hgvs_notation->{type} = "del";
      }
      elsif( (length($hgvs_notation->{alt}) >  length($hgvs_notation->{ref}) ) &&       
           $hgvs_notation->{alt} =~ / $hgvs_notation->{ref}/ ) {
          ### capture duplication event described as TTT/TTTTT 
          $hgvs_notation->{type} = "dup";        
      }

      elsif( (length($hgvs_notation->{alt}) >1 &&  length($hgvs_notation->{ref}) ==1) ||
             (length($hgvs_notation->{alt}) ==1 &&  length($hgvs_notation->{ref}) >1) ) {
          $hgvs_notation->{type} = "delins";
      }
      else{
          $hgvs_notation->{type} = ">";
      }
    }
    
  
    elsif($ref_length ne $alt_length && ($ref_length  - $alt_length)%3 !=0 ){
        $hgvs_notation->{type} = "fs";
    }    
    
    elsif(length($self->variation_feature_seq()) >1  ){
      if($hgvs_notation->{start} == ($hgvs_notation->{end} + 1) ){
          ### convention for insertions - end one less than start
          $hgvs_notation->{type} = "ins";
      }
      elsif( $hgvs_notation->{start} != $hgvs_notation->{end}  ){
          $hgvs_notation->{type} = "delins";
      }
      else{
          $hgvs_notation->{type} = ">";
      }   
    }
    else{
        #print STDERR "DEBUG ".$self->variation_feature->start."\n";
        #warn "Cannot define protein variant type [$ref_length  - $alt_length]\n";
    }
    return $hgvs_notation ;

}

### HGVS: get reference & alternative peptide 
sub _get_hgvs_peptides{

  my $self          = shift;
  my $hgvs_notation = shift;

  if($hgvs_notation->{type} eq "fs"){
    ### ensembl alt/ref peptides not the same as HGVS alt/ref - look up seperately
    $hgvs_notation = $self->_get_fs_peptides($hgvs_notation);    
  }
  elsif($hgvs_notation->{type} eq "ins" ){

    ### HGVS ref are peptides flanking insertion
    $hgvs_notation->{ref} = $self->_get_surrounding_peptides($hgvs_notation->{start});

    if( $hgvs_notation->{alt} =~/\*/){
        ## inserted bases after stop irrelevant; consider as substitution gaining stop MAINTAIN PREVIOUS BEHAVIOUR FOR NOW
        #$hgvs_notation->{alt} =~ s/\*\w+$/\*/;
    }
    else{
        ### Additional check that inserted bases do not duplicate 3' reference sequence [set to type = dup if so]
        $hgvs_notation = $self->_check_for_peptide_duplication($hgvs_notation);
    }
  }
  

  ### Convert peptide to 3 letter code as used in HGVS
  $hgvs_notation->{ref}  = Bio::SeqUtils->seq3(Bio::PrimarySeq->new(-seq => $hgvs_notation->{ref}, -id => 'ref',  -alphabet => 'protein')) || "";
  if( $hgvs_notation->{alt} eq "-"){
    $hgvs_notation->{alt} = "del";
  }
  else{
    $hgvs_notation->{alt}  = Bio::SeqUtils->seq3(Bio::PrimarySeq->new(-seq => $hgvs_notation->{alt}, -id => 'ref',  -alphabet => 'protein')) || "";
  }

  ### handle special cases
  if(    affects_start_codon($self) ){
    #### handle initiator loss -> probably no translation => alt allele is '?'
    $hgvs_notation->{alt}  = "?";    
    $hgvs_notation->{type} = "";
  }

  elsif(  $hgvs_notation->{type} eq "del"){ 
    #### partial last codon    
    $hgvs_notation->{alt} = "del";
  } 
  elsif($hgvs_notation->{type} eq "fs"){
    ### only quote first ref peptide for frameshift
    $hgvs_notation->{ref} = substr($hgvs_notation->{ref},0,3);
  }

  ### set stop to HGVS prefered "X"
  if(defined $hgvs_notation->{ref}){ $hgvs_notation->{ref} =~ s/Ter|Xaa/X/g; }
  if(defined $hgvs_notation->{alt}){ $hgvs_notation->{alt} =~ s/Ter|Xaa/X/g; }
         
  return ($hgvs_notation);                  
                
}

### HGVS:  remove common peptides from alt and ref strings & shift start/end accordingly 
sub _clip_alleles{
    
  my $hgvs_notation = shift;
    
  my $check_alt   = $hgvs_notation->{alt} ;
  my $check_ref   = $hgvs_notation->{ref} ;
  my $check_start = $hgvs_notation->{start};
    
  ### strip same bases from start of string
  for (my $p =0; $p <length ($hgvs_notation->{ref}); $p++){
  my $check_next_ref = substr( $check_ref, 0, 1);
  my $check_next_alt = substr( $check_alt, 0, 1);
    
    if($check_next_ref eq  "*" && $check_next_alt eq "*"){
        ### stop re-created by variant - no protein change
        $hgvs_notation->{type} = "=";

        return($hgvs_notation);
    }
    
    if($check_next_ref eq  $check_next_alt){
        $check_start++;
        $check_ref  = substr( $check_ref, 1);
        $check_alt  = substr( $check_alt, 1);
    }
    else{
        last;
    }
  }
  #### strip same bases from end of string
  for (my $q =0; $q < length ($check_ref); $q++){
    my $check_next_ref = substr( $check_ref, -1, 1);
    my $check_next_alt = substr( $check_alt, -1, 1);
    if($check_next_ref eq  $check_next_alt){
        chop $check_ref;
        chop $check_alt;
    }
    else{
        last;
    }
  }

  $hgvs_notation->{alt}   = $check_alt;
  $hgvs_notation->{ref}   = $check_ref;

  ### check if clipping force type change & adjust location
  if(length ($check_ref) == 0  && length ($check_alt) >= 1){
    ### re-set as ins not delins  
    $hgvs_notation->{type} ="ins";
        ### insertion between ref base and next => adjust next         
    if($hgvs_notation->{end} == $hgvs_notation->{start} ){$hgvs_notation->{end} = $check_start;    }
#    $hgvs_notation->{start} = $check_start;    
  }
  elsif(length ($check_ref) >=1  && length ($check_alt) == 0){
    ### re-set as del not delins  
    $hgvs_notation->{type}  ="del";        
    $hgvs_notation->{start} = $check_start;
  }
  else{
    #### save trimmed peptide string & increased start position
    $hgvs_notation->{start} = $check_start;

  }        

    return $hgvs_notation;
}
    




#### HGVS: check allele lengths to look for frameshifts
sub _get_allele_length{

  my $self       = shift;
  my $ref_length = 0;
  my $alt_length = 0;

  my $al_string = $self->allele_string();
  my $ref_allele   = (split/\//, $al_string)[0];
  $ref_allele =~ s/\-//;
  $ref_length    =  length $ref_allele;

  my $alt_allele = $self->variation_feature_seq();
  $alt_allele =~ s/\-//;
  $alt_length    =  length  $alt_allele;

  return ($ref_length, $alt_length );
  
}

### HGVS: list first different peptide [may not be first changed codon]
sub _get_fs_peptides{

  my $self    = shift;
  my $hgvs_notation = shift;

  ### get CDS with alt variant
  my $alt_cds = $self->_get_alternate_cds();
  return undef unless defined($alt_cds);

  #### get new translation
  my $alt_trans = $alt_cds->translate()->seq();
  
  ### get changed end (currently in single letter AA coding)    
  my $ref_trans  = $self->transcript->translate()->seq();
  $ref_trans    .= "*";   ## appending ref stop for checking purposes 
  
  $hgvs_notation->{start} = $self->transcript_variation->translation_start() ;

  if( $hgvs_notation->{start} > length $alt_trans){ ## deletion of stop, no further AA in alt seq 
    $hgvs_notation->{alt}  = "del";
    $hgvs_notation->{type} = "del";
    return $hgvs_notation;
  }

    while ($hgvs_notation->{start} <= length $alt_trans){
    ### frame shift may result in the same AA#

      $hgvs_notation->{ref} = substr($ref_trans, $hgvs_notation->{start}-1, 1);
      $hgvs_notation->{alt} = substr($alt_trans, $hgvs_notation->{start}-1, 1);

      if($hgvs_notation->{ref} eq "*" && $hgvs_notation->{alt} eq "*"){
          ### variation at stop codon, but maintains stop codon
          return ($hgvs_notation);
      }
      last if $hgvs_notation->{ref} ne $hgvs_notation->{alt};
      $hgvs_notation->{start}++;
    }
    return ($hgvs_notation);

}

#### HGVS: if variant is an insertion, ref pep is initially "-", so seek AA before and after insertion
sub _get_surrounding_peptides{

  my $self    = shift;
  my $ref_pos = shift; 
  my $ref_trans  = $self->transcript->translate()->seq();

  my $end = substr($ref_trans, $ref_pos-1);
  my $ref_string = substr($ref_trans, $ref_pos-1, 2);

  return ($ref_string);

}


#### HGVS: alternate CDS needed to check for new stop when variant disrupts 'reference' stop
sub _get_alternate_cds{
    
  my $self = shift;

  ### get reference sequence
  my $reference_cds_seq = $self->transcript->translateable_seq();
  
  return undef unless defined($self->transcript_variation->cds_start) && defined($self->transcript_variation->cds_end());

  ### get sequences upstream and downstream of variant
  my $upstream_seq   =  substr($reference_cds_seq, 0, ($self->transcript_variation->cds_start() -1) );
  my $downstream_seq =  substr($reference_cds_seq, ($self->transcript_variation->cds_end() ) );

  ### fix alternate allele if deletion or on opposite strand
  my $alt_allele  = $self->variation_feature_seq();
  $alt_allele  =~ s/\-//;
  if( $self->transcript_variation->variation_feature->strand() <0 && $self->transcript_variation->transcript->strand() >0 ||
      $self->transcript_variation->variation_feature->strand() >0 && $self->transcript_variation->transcript->strand() < 0
      ){    
      reverse_comp(\$alt_allele) ;
  }
  ### build alternate seq
  my $alternate_seq  = $upstream_seq . $alt_allele . $downstream_seq ;

  ### create seq obj with alternative allele in the CDS sequence
  my $alt_cds =Bio::PrimarySeq->new(-seq => $alternate_seq,  -id => 'alt_cds', -alphabet => 'dna');

  ### append UTR if available as stop may be disrupted
  my $utr = $self->transcript_variation->transcript->three_prime_utr();

  if (defined $utr) {
  ### append the UTR to the alternative CDS 
    $alt_cds->seq($alt_cds->seq() . $utr->seq()); 
  }
  else{
   ##warn "No UTR available for alternate CDS\n";
  }

  return $alt_cds;
}

### HGVS: if inserted string is identical to 3' reference sequence, describe as duplication
sub _check_for_peptide_duplication{
    
   my $self = shift;
   my $hgvs_notation= shift;

   ##### get reference sequence
   my $reference_cds_seq = $self->transcript->translateable_seq();

   ##### get sequence upstream of variant
   my $upstream_seq   =  substr($reference_cds_seq, 0, ($self->transcript_variation->cds_start() -1) );
   
   ##### create translation to check against inserted peptides 
   my $upstream_cds =Bio::PrimarySeq->new(-seq => $upstream_seq,  -id => 'alt_cds', -alphabet => 'dna');
   my $upstream_trans = $upstream_cds->translate()->seq();
   
   ## Test whether alt peptide matches the reference sequence just before the variant
   my $test_new_start = $hgvs_notation->{'start'} - length($hgvs_notation->{'alt'}) -1 ;
   my $test_seq       =  substr($upstream_trans, $test_new_start, length($hgvs_notation->{'alt'}));

   if ( $test_new_start >= 0 && $test_seq eq $hgvs_notation->{alt}) {

	  $hgvs_notation->{type}   = 'dup';
	  $hgvs_notation->{end}    = $hgvs_notation->{start} -1;
	  $hgvs_notation->{start} -= length($hgvs_notation->{alt});

	}
   
   return $hgvs_notation;

}

#### HGVS: if a stop is lost, seek the next in transcript & count number of extra AA's
sub _stop_loss_extra_AA{

  my $self        = shift;
  my $ref_var_pos = shift;  ### first effected AA - supply for frameshifts
  my $test        = shift;

  return undef unless $ref_var_pos;

  my $extra_aa;

  ### get the sequence with variant added
  my $alt_cds   = $self->_get_alternate_cds();
  return undef unless defined($alt_cds);
  
  ### get new translation
  my $alt_trans = $alt_cds->translate();
  
  my $ref_temp  =  $self->transcript->translate()->seq;
  my $ref_len = length($ref_temp);
  
  if($DEBUG==1){ 
    print "alt translated:\n" . $alt_trans->seq() . "\n";
    print "ref translated:\n$ref_temp\n";;
  }
  
  #### Find the number of residues that are translated until a termination codon is encountered
  if ($alt_trans->seq() =~ m/\*/) {
    if($DEBUG==1){print "Got $+[0] aa before stop, var event at $ref_var_pos \n";}
  
    if($test eq "fs" ){
      ### frame shift - count from first AA effected by variant to stop
      $extra_aa = $+[0] - $ref_var_pos;
      if($DEBUG==1){ print "Stop change ($test): found $extra_aa amino acids before fs stop [ $+[0] - peptide ref_start: $ref_var_pos )]\n";}
    }
  
    else{
      $extra_aa = $+[0]  - 1 - $ref_len;
      if($DEBUG==1){ print "Stop change ($test): found $extra_aa amino acids before next stop [ $+[0] - 1 -normal stop $ref_len)]\n";}        
    }
  }
  
  # A special case is if the first aa is a stop codon => don't display the number of residues until the stop codon
  if(defined $extra_aa && $extra_aa >0){ 
    return $extra_aa ;
  }
  else{ 
    #warn "No stop found in alternate sequence\n";
    return undef;
  }
 
}

=head
# We haven't implemented support for these methods yet

sub hgvs_rna {
    return _hgvs_generic(@_,'rna');
}

sub hgvs_mitochondrial {
    return _hgvs_generic(@_,'mitochondrial');
}

=cut

sub _hgvs_generic {

    my $self = shift;
    my $reference = pop;
    my $notation = shift;
    
    #ĘThe rna and mitochondrial modes have not yet been implemented, so return undef in case we get a call to these
    return undef if ($reference =~ m/rna|mitochondrial/);
    
    my $sub = qq{hgvs_$reference};
    
    $self->{$sub} = $notation if defined $notation;
    
    unless ($self->{$sub}) {

    # Use the transcript this VF is on as the reference feature
        my $reference_feature = $self->transcript;
        # If we want genomic coordinates, the reference_feature should actually be the slice for the underlying seq_region
        $reference_feature = $reference_feature->slice->seq_region_Slice if ($reference eq 'genomic');

        # Calculate the HGVS notation on-the-fly and pass it to the TranscriptVariation in order to distribute the result to the other alleles
        $self->transcript_variation->$sub($self->variation_feature->get_all_hgvs_notations($reference_feature,substr($reference,0,1),undef,undef,$self->transcript_variation));
    }
    
    return $self->{$sub};
}


### HGVS: move variant to transcript slice
sub _var2transcript_slice_coords{

  my $self = shift;
  
  my $ref_slice = $self->transcript->feature_Slice();  #### returns with strand same as feature
  my $tr_vf     = $self->variation_feature->transfer($ref_slice);
          
  # Return undef if this VariationFeature does not fall within the supplied feature.
  return undef if (!defined $tr_vf ||
                   $tr_vf->start  < 1 || 
                   $tr_vf->end    < 1 || 
                   $tr_vf->start  > ($self->transcript->end - $self->transcript->start + 1) || 
                   $tr_vf->end    > ($self->transcript->end - $self->transcript->start + 1)); 
  
  return( $tr_vf->start() , $tr_vf->end(), $ref_slice);
}



### HGVS: get variant position in transcript 

# intron: 
# If the position is in an intron, the boundary position of the closest exon and 
# a + or - offset into the intron is returned.
# Ordered by genome forward not 5' -> 3'

# upstream:
# If the position is 5' of the start codon, it is reported relative to the start codon 
# (-1 being the last nucleotide before the 'A' of ATG).

#downstream:
# If the position is 3' pf the stop codon, it is reported with a '*' prefix and the offset 
# from the start codon (*1 being the first nucleotide after the last position of the stop codon)

sub _get_cDNA_position {

    my $self     = shift;
    my $position = shift; ### start or end of variation   
    
    my $transcript = $self->transcript();
    my $strand     = $transcript->strand();    

    #### TranscriptVariation start/stop coord relative to transcript 
    #### Switch to chromosome coordinates taking into account strand
    $position = ( $strand > 0 ? 
          ( $self->transcript->start() + $position - 1 )  :   
          ( $self->transcript->end()   - $position + 1));


            
    # Get all exons and sort them in positional order
    my @exons   = sort {$a->start() <=> $b->start()} @{$transcript->get_all_Exons()};
    my $n_exons = scalar(@exons);

    my $cdna_position;
  # Loop over the exons and get the coordinates of the variation in exon+intron notation
    for (my $i=0; $i<$n_exons; $i++) {
    
      # Skip if the start point is beyond this exon
      next if ($position > $exons[$i]->end());
    

      # EXONIC:  If the start coordinate is within this exon
      if ($position >= $exons[$i]->start()) {
      # Get the cDNA start coordinate of the exon and add the number of nucleotides from the exon boundary to the variation
      # If the transcript is in the opposite direction, count from the end instead
      $cdna_position = $exons[$i]->cdna_start($transcript) + ( $strand > 0 ? 
                                   ( $position - $exons[$i]->start ) : 
                                   ( $exons[$i]->end() - $position ) 
                                   );
      last;  #### last exon checked
    }
      ## INTRONIC
      # Else the start coordinate is between this exon and the previous one, determine which one is closest and get coordinates relative to that one
      else {

      my $updist   = ($position - $exons[$i-1]->end());
      my $downdist = ($exons[$i]->start() - $position);

      # If the distance to the upstream exon is the shortest, or equal and in the positive orientation, use that
      if ($updist < $downdist || ($updist == $downdist && $strand >= 0)) {
          
          # If the orientation is reversed, we should use the cDNA start and a '-' offset
          $cdna_position = ($strand >= 0 ? 
                $exons[$i-1]->cdna_end($transcript)   . '+' : 
                $exons[$i-1]->cdna_start($transcript) . '-') 
          . $updist;
      }
      # Else if downstream is shortest...
      else {
          # If the orientation is reversed, we should use the cDNA end and a '+' offset
          $cdna_position = ($strand >= 0 ? 
                $exons[$i]->cdna_start($transcript) . '-' : 
                $exons[$i]->cdna_end($transcript)   . '+') 
          . $downdist;
      }
      last; ## last exon checked
      }
  }
  
  # Shift the position to make it relative to the start & stop codons
  my $start_codon  =  $transcript->cdna_coding_start();
  my $stop_codon   =  $transcript->cdna_coding_end();
  
  # Disassemble the cDNA coordinate into the exon and intron parts
    ### just built this now taking it appart again
  my ($cdna_coord, $intron_offset) = $cdna_position =~ m/([0-9]+)([\+\-][0-9]+)?/;
  

  # Start by correcting for the stop codon
  if (defined($stop_codon) && $cdna_coord > $stop_codon) {
    # Get the offset from the stop codon
    $cdna_coord -= $stop_codon;
    # Prepend a * to indicate the position is in the 3' UTR
    $cdna_coord = '*' . $cdna_coord;
  }
  elsif (defined($start_codon)) {
    # If the position is beyond the start codon, add 1 to get the correct offset
    $cdna_coord += ($cdna_coord >= $start_codon);
    # Subtract the position of the start codon
    $cdna_coord -= $start_codon;
  }

  # Re-assemble the cDNA position  [ return exon num & offset & direction for intron eg. 142+363] 
  $cdna_position = $cdna_coord . (defined($intron_offset) ? $intron_offset : '');
  return $cdna_position;
}


1;