view variant_effect_predictor/Bio/EnsEMBL/Variation/AlleleFeature.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

 Copyright (c) 1999-2012 The European Bioinformatics Institute and
 Genome Research Limited.  All rights reserved.

 This software is distributed under a modified Apache license.
 For license details, please see

   http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

 Please email comments or questions to the public Ensembl
 developers list at <dev@ensembl.org>.

 Questions may also be sent to the Ensembl help desk at
 <helpdesk@ensembl.org>.

=cut

# Ensembl module for Bio::EnsEMBL::Variation::AlleleFeature
#
# Copyright (c) 2005 Ensembl
#


=head1 NAME

Bio::EnsEMBL::Variation::AlleleFeature - A genomic position for an allele in a sample.

=head1 SYNOPSIS

    # Allele feature representing a single nucleotide polymorphism
    $af = Bio::EnsEMBL::Variation::AlleleFeature->new
       (-start   => 100,
        -end     => 100,
        -strand  => 1,
        -slice   => $slice,
        -allele_string => 'A',
        -variation_name => 'rs635421',
        -variation => $v);
    ...

    # a allele feature is like any other ensembl feature, can be
    # transformed etc.
    $af = $af->transform('supercontig');

    print $af->start(), "-", $af->end(), '(', $af->strand(), ')', "\n";

    print $af->name(), ":", $af->allele_string();

    # Get the Variation object which this feature represents the genomic
    # position of. If not already retrieved from the DB, this will be
    # transparently lazy-loaded
    my $v = $af->variation();

=head1 DESCRIPTION

This is a class representing the genomic position of a allele in a sample
from the ensembl-variation database.  The actual variation information is
represented by an associated Bio::EnsEMBL::Variation::Variation object. Some
of the information has been denormalized and is available on the feature for
speed purposes.  A AlleleFeature behaves as any other Ensembl feature.
See B<Bio::EnsEMBL::Feature> and B<Bio::EnsEMBL::Variation::Variation>.

=head1 METHODS

=cut

use strict;
use warnings;

package Bio::EnsEMBL::Variation::AlleleFeature;

use Bio::EnsEMBL::Feature;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument  qw(rearrange);
use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
use Bio::EnsEMBL::Variation::ConsequenceType;

our @ISA = ('Bio::EnsEMBL::Feature');

=head2 new

  Arg [-ADAPTOR] :
    see superclass constructor

  Arg [-START] :
    see superclass constructor
  Arg [-END] :
    see superclass constructor

  Arg [-STRAND] :
    see superclass constructor

  Arg [-SLICE] :
    see superclass constructor

  Arg [-VARIATION_NAME] :
    string - the name of the variation this feature is for (denormalisation
    from Variation object).

 Arg [-SOURCE] :
    string - the name of the source where the SNP comes from

  Arg [-VARIATION] :
    int - the variation object associated with this feature.

  Arg [-VARIATION_ID] :
    int - the internal id of the variation object associated with this
    identifier. This may be provided instead of a variation object so that
    the variation may be lazy-loaded from the database on demand.
    
  Arg [-SAMPLE_ID] :
    int - the internal id of the sample object associated with this
    identifier. This may be provided instead of the object so that
    the population/individual may be lazy-loaded from the database on demand.

  Arg [-ALLELE_STRING] :
    string - the allele for this AlleleFeature object.
  
  Arg [-OVERLAP_CONSEQUENCES] :
	listref of Bio::EnsEMBL::Variation::OverlapConsequence objects.

  Example    :
    $af = Bio::EnsEMBL::Variation::AlleleFeature->new
       (-start   => 100,
        -end     => 100,
        -strand  => 1,
        -slice   => $slice,
        -allele_string => 'A',
		-consequence_type => 'NON_SYNONYMOUS_CODING',
        -variation_name => 'rs635421',
	-source => 'Celera',
	-sample_id  => $sample_id,
        -variation => $v);

  Description: Constructor. Instantiates a new AlleleFeature object.
  Returntype : Bio::EnsEMBL::Variation::AlleleFeature
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;

  my $self = $class->SUPER::new(@_);
  my ($allele, $overlap_consequences, $var_name, $variation, $variation_id, $population, $sample_id, $source) =
    rearrange([qw(ALLELE_STRING OVERLAP_CONSEQUENCES VARIATION_NAME 
                  VARIATION VARIATION_ID SAMPLE_ID SOURCE)], @_);

  $self->{'allele_string'}          = $allele;
  $self->{'overlap_consequences'}   = $overlap_consequences;
  $self->{'variation_name'}         = $var_name;
  $self->{'variation'}              = $variation;
  $self->{'_variation_id'}          = $variation_id;
  $self->{'_sample_id'}             = $sample_id;
  $self->{'source'}                 = $source;

  return $self;
}



sub new_fast {
  my $class = shift;
  my $hashref = shift;
  return bless $hashref, $class;
}


=head2 allele_string

  Arg [1]    : string $newval (optional)
               The new value to set the allele attribute to
  Example    : $allele = $obj->allele_string()
  Description: Getter/Setter for the allele attribute.
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub allele_string{
  my $self = shift;
  return $self->{'allele_string'} = shift if(@_);
  
  return $self->{'allele_string'} if ($self->{'_half_genotype'}); #for half genotypes
  return join('|',split (//,unambiguity_code($self->{'allele_string'}))); #for heterozygous alleles
}



=head2 consequence_type

  Arg [1]    : (optional) String $term_type
  Description: Get a list of all the unique consequence terms of this 
               AlleleFeature. By default returns Ensembl display terms
               (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label'
               (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g.
               'non_synonymous_codon') or 'NCBI' (e.g. 'missense')
  Returntype : listref of strings
  Exceptions : none
  Status     : At Risk

=cut

sub consequence_type {
    
  my $self = shift;
  my $term_type = shift;
  
  if($self->_is_sara) {
	return ['SARA'];
  }
  else {
	delete $self->{consequence_type} if defined($term_type);
	
	unless ($self->{consequence_type}) {
	
	  $term_type ||= 'SO';
	  my $method_name = $term_type.($term_type eq 'label' ? '' : '_term');
	  $method_name = 'SO_term' unless $self->most_severe_OverlapConsequence->can($method_name);
	  
	  # work out the terms from the OverlapConsequence objects
	  $self->{consequence_type} = 
		[ $self->_is_sara ? 'SARA' : map { $_->$method_name } @{ $self->get_all_OverlapConsequences } ];
		
	  return $self->{consequence_type};
	}
  }
}


=head2 get_all_OverlapConsequences

  Description: Get a list of all the unique OverlapConsequences of this AlleleFeature 
  Returntype : listref of Bio::EnsEMBL::Variation::OverlapConsequence objects
  Exceptions : none
  Status     : At Risk

=cut

sub get_all_OverlapConsequences {
    my $self = shift;
    return $self->{overlap_consequences}
}


=head2 most_severe_OverlapConsequence

  Description: Get the OverlapConsequence considered (by Ensembl) to be the most severe 
               consequence of all the alleles of this AlleleFeature 
  Returntype : Bio::EnsEMBL::Variation::OverlapConsequence
  Exceptions : none
  Status     : At Risk

=cut

sub most_severe_OverlapConsequence {
    my $self = shift;
    
    unless ($self->{_most_severe_consequence}) {
        
        my $highest;
        
        for my $cons (@{ $self->get_all_OverlapConsequences }) {
            $highest ||= $cons;
            if ($cons->rank < $highest->rank) {
                $highest = $cons;
            }
        }
        
        $self->{_most_severe_consequence} = $highest;
    }
    
    return $self->{_most_severe_consequence};
}

=head2 display_consequence

  Arg [1]    : (optional) String $term_type
  Description: Get the term for the most severe consequence of this 
               AlleleFeature. By default returns Ensembl display terms
               (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label'
               (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g.
               'non_synonymous_codon') or 'NCBI' (e.g. 'missense')
  Returntype : string
  Exceptions : none
  Caller     : webteam
  Status     : At Risk

=cut

sub display_consequence {
  my $self = shift;
  my $term_type = shift;
  
  if($self->_is_sara) {
	return 'SARA';
  }
  else {
	$term_type ||= 'SO';
	my $method_name = $term_type.($term_type eq 'label' ? '' : '_term');
	$method_name = 'SO_term' unless $self->most_severe_OverlapConsequence->can($method_name);
	
	return $self->most_severe_OverlapConsequence->$method_name;
  }
}


=head2 get_all_TranscriptVariations

  Arg [1]     : (optional) listref of Bio::EnsEMBL::Transcript objects
  Example     : $af->get_all_TranscriptVariations;
  Description : Get all the TranscriptVariations associated with this AlleleFeature.
                If the optional list of Transcripts is supplied, get only TranscriptVariations
                associated with those Transcripts.
  Returntype  : listref of Bio::EnsEMBL::Variation::TranscriptVariation objects
  Exceptions  : Thrown on wrong argument type
  Caller      : general
  Status      : At Risk

=cut

sub get_all_TranscriptVariations {
  my $self = shift;
  my $trs = shift;
  
  my $cons = $self->variation_feature->get_all_TranscriptVariations($trs);
  
  # convert the TV to a SARA one if this is a SARA genotype
  if($self->_is_sara) {
	$_->_convert_to_sara foreach @$cons;
  }
  
  # otherwise we need to rearrange the TranscriptVariationAlleles based
  # on the alleles of this genotype
  else {
	my %alleles;
	$alleles{$_} = 1 foreach split /\||\/|\\/, $self->allele_string;
	
	$_->_rearrange_alleles(\%alleles) foreach @$cons;
  }
  
  return $cons;
}

=head2 variation_name

  Arg [1]    : string $newval (optional)
               The new value to set the variation_name attribute to
  Example    : $variation_name = $obj->variation_name()
  Description: Getter/Setter for the variation_name attribute.  This is the
               name of the variation associated with this feature.
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub variation_name{
  my $self = shift;
  return $self->{'variation_name'} = shift if(@_);
  return $self->{'variation_name'};
}

=head2 variation

  Arg [1]    : (optional) Bio::EnsEMBL::Variation::Variation $variation
  Example    : $v = $af->variation();
  Description: Getter/Setter for the variation associated with this feature.
               If not set, and this AlleleFeature has an associated adaptor
               an attempt will be made to lazy-load the variation from the
               database.
  Returntype : Bio::EnsEMBL::Variation::Variation
  Exceptions : throw on incorrect argument
  Caller     : general
  Status     : At Risk

=cut

sub variation {
  my $self = shift;

  if(@_) {
    if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Variation')) {
      throw("Bio::EnsEMBL::Variation::Variation argument expected");
    }
    $self->{'variation'} = shift;
  }
  elsif(!defined($self->{'variation'}) && $self->{'adaptor'} &&
        defined($self->{'_variation_id'})) {
    # lazy-load from database on demand
    my $va = $self->{'adaptor'}->db()->get_VariationAdaptor();
    $self->{'variation'} = $va->fetch_by_dbID($self->{'_variation_id'});
  }

  return $self->{'variation'};
}

=head2 variation_feature

  Arg [1]    : (optional) Bio::EnsEMBL::Variation::VariationFeature $vf
  Example    : $vf = $af->variation_feature();
  Description: Getter/Setter for the variation feature associated with this feature.
               If not set, and this AlleleFeature has an associated adaptor
               an attempt will be made to lazy-load the variation from the
               database.
  Returntype : Bio::EnsEMBL::Variation::VariationFeature
  Exceptions : throw on incorrect argument
  Caller     : general
  Status     : At Risk

=cut

sub variation_feature {
  my $self = shift;

  if(@_) {
    if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
      throw("Bio::EnsEMBL::Variation::VariationFeature argument expected");
    }
    $self->{'variation_feature'} = shift;
  }
  elsif(!defined($self->{'variation_feature'}) && $self->{'adaptor'} &&
        defined($self->{'_variation_feature_id'})) {
    # lazy-load from database on demand
    my $va = $self->{'adaptor'}->db()->get_VariationFeatureAdaptor();
    $self->{'variation_feature'} = $va->fetch_by_dbID($self->{'_variation_feature_id'});
  }

  return $self->{'variation_feature'};
}

=head2 individual

  Arg [1]    : (optional) Bio::EnsEMBL::Variation::Individual $individual
  Example    : $p = $af->individual();
  Description: Getter/Setter for the individual associated with this feature.
               If not set, and this AlleleFeature has an associated adaptor
               an attempt will be made to lazy-load the individual from the
               database.
  Returntype : Bio::EnsEMBL::Variation::Individual
  Exceptions : throw on incorrect argument
  Caller     : general
  Status     : At Risk

=cut

sub individual {
  my $self = shift;

  if(@_) {
    if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Individual')) {
      throw("Bio::EnsEMBL::Variation::Individual argument expected");
    }
    $self->{'individual'} = shift;
  }
  elsif(!defined($self->{'individual'}) && $self->{'adaptor'} &&
        defined($self->{'_sample_id'})) {
    # lazy-load from database on demand
    my $ia = $self->{'adaptor'}->db()->get_IndividualAdaptor();
    $self->{'individual'} = $ia->fetch_by_dbID($self->{'_sample_id'});
    if (!defined $self->{'individual'}){
	warning("AlleleFeature attached to Strain, not Individual");
    }
  }

  return $self->{'individual'};
}


=head2 apply_edit
    
    Arg [1]    : reference to string $seqref
    Arg [2]    : int $start of the seq_ref
    Example    : $sequence = 'ACTGAATATTTAAGGCA';
               $af->apply_edit(\$sequence,$start);
               print $sequence, "\n";
    Description: Applies this AlleleFeature directly to a sequence which is
               passed by reference.  
               If either the start or end of this AlleleFeature are not defined
               this function will not do anything to the passed sequence.
    Returntype : reference to the same sequence that was passed in
    Exceptions : none
    Caller     : Slice
    Status     : At Risk

=cut

sub apply_edit  {

  my $self   = shift;
  my $seqref = shift;

  if(ref($seqref) ne 'SCALAR') {
    throw("Reference to scalar argument expected");
  }

  if(!defined($self->{'start'}) || !defined($self->{'end'})) {
    return $seqref;
  }


  my $len = $self->length;
  my $as = $self->{'allele_string'};
  $as =~ s/\-//g;
  
  substr($$seqref, $self->{'start'}-1, $len) = $as;
  
  return $seqref;

}

=head2 length_diff

  Arg [1]    : none
  Example    : my $diff = $af->length_diff();
  Description: Returns the difference in length caused by applying this
               AlleleFeature to a sequence.  This may be be negative (deletion),
               positive (insertion) or 0 (replacement).
               If either start or end are not defined 0 is returned.
  Returntype : int
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub length_diff  {

  my $self = shift;

  return 0 if(!defined($self->{'end'}) || !defined($self->{'start'}));

  return length($self->{'allele_string'}) - ($self->{'end'} - $self->{'start'} + 1) if ($self->{'allele_string'} ne '-'); 
  return 0 - ($self->{'end'} - $self->{'start'} +1) if ($self->{'allele_string'} eq '-'); #do we need the +1 in the distance ??

}


sub length {
  my $self = shift;
  return $self->{'end'} - $self->{'start'} + 1;
}

=head2 source

  Arg [1]    : string $source (optional)
               The new value to set the source attribute to
  Example    : $source = $vf->source()
  Description: Getter/Setter for the source attribute
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub source{
  my $self = shift;
  return $self->{'source'} = shift if(@_);
  return $self->{'source'};
}

=head2 ref_allele_string

  Args       : None
  Example    : $allele = $obj->ref_allele_string()
  Description: Getter for the reference allele.
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

 sub ref_allele_string{
   my $self = shift;

   my $reference_allele;
   if ( ref ($self->slice) eq 'Bio::EnsEMBL::Slice' ){
       #we already have the reference slice, so just get the sequence
       $reference_allele = $self->seq;
   }
   else{
       #we have a Strain or IndividualSlice, get the reference sequence from the method
       $reference_allele = $self->slice->ref_subseq($self->start,$self->end,$self->strand) || '-';
   }

   return $reference_allele;
 }

=head2 get_all_sources

    Args        : none
    Example     : my @sources = @{$af->get_all_sources()};
    Description : returns a list of all the sources for this
                  AlleleFeature
    ReturnType  : reference to list of strings
    Exceptions  : none
    Caller      : general
    Status      : At Risk
                : Variation database is under development.
=cut

sub get_all_sources{
    my $self = shift;
    
    my @sources;
    my %sources;
    if ($self->{'adaptor'}){
	map {$sources{$_}++} @{$self->{'adaptor'}->get_all_synonym_sources($self)};
	$sources{$self->source}++;
	@sources = keys %sources;
	return \@sources;	
    }
    return \@sources;
}

sub _is_sara{
  my $self = shift;
  
  if(!defined($self->{_is_sara})) {
	my $allele_string = $self->allele_string;
	my $ref = $self->ref_allele_string;
	
	my $is_sara = 1;
	
	foreach my $a(split /\/|\||\\/, $allele_string) {
	  $is_sara = 0 if $ref !~ /$a/i;
	}
	
	$self->{_is_sara} = $is_sara;
  }
  
  return $self->{_is_sara};
}

1;