view variant_effect_predictor/Bio/EnsEMBL/Variation/Variation.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

 Copyright (c) 1999-2012 The European Bioinformatics Institute and
 Genome Research Limited.  All rights reserved.

 This software is distributed under a modified Apache license.
 For license details, please see

   http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

 Please email comments or questions to the public Ensembl
 developers list at <dev@ensembl.org>.

 Questions may also be sent to the Ensembl help desk at
 <helpdesk@ensembl.org>.

=cut

# Ensembl module for Bio::EnsEMBL::Variation::Variation
#
# Copyright (c) 2004 Ensembl
#


=head1 NAME

Bio::EnsEMBL::Variation::Variation - Ensembl representation of a nucleotide variation.

=head1 SYNOPSIS

    $v = Bio::EnsEMBL::Variation::Variation->new(-name   => 'rs123',
                                                 -source => 'dbSNP');

    # add additional synonyms for the same SNP
    $v->add_synonym('dbSNP', 'ss3242');
    $v->add_synonym('TSC', '53253');

    # add some validation states for this SNP
    $v->add_validation_status('freq');
    $v->add_validation_status('cluster');

    # add alleles associated with this SNP
    $a1 = Bio::EnsEMBL::Allele->new(...);
    $a2 = Bio::EnsEMBL::Allele->new(...);
    $v->add_Allele($a1);
    $v->add_Allele($a2);

    # set the flanking sequences
    $v->five_prime_flanking_seq($seq);
    $v->three_prime_flanking_seq($seq);


    ...

    # print out the default name and source of the variation and the version
    print $v->source(), ':',$v->name(), ".",$v->version,"\n";

    # print out every synonym associated with this variation
    @synonyms = @{$v->get_all_synonyms()};
    print "@synonyms\n";

    # print out synonyms and their database associations
    my $sources = $v->get_all_synonym_sources();
    foreach my $src (@$sources) {
      @synonyms = $v->get_all_synonyms($src);
      print "$src: @synonyms\n";
    }


    # print out validation states
    my @vstates = @{$v->get_all_validation_states()};
    print "@validation_states\n";

    # print out flanking sequences
    print "5' flanking: ", $v->five_prime_flanking_seq(), "\n";
    print "3' flanking: ", $v->three_prime_flanking_seq(), "\n";


=head1 DESCRIPTION

This is a class representing a nucleotide variation from the
ensembl-variation database. A variation may be a SNP a multi-base substitution
or an insertion/deletion.  The objects Alleles associated with a Variation
object describe the nucleotide change that Variation represents.

A Variation object has an associated identifier and 0 or more additional
synonyms.  The position of a Variation object on the Genome is represented
by the B<Bio::EnsEMBL::Variation::VariationFeature> class.

=head1 METHODS

=cut


use strict;
use warnings;

package Bio::EnsEMBL::Variation::Variation;

use Bio::EnsEMBL::Storable;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Scalar qw(assert_ref wrap_array);
use Bio::EnsEMBL::Variation::Utils::Sequence qw(ambiguity_code SO_variation_class);
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
use Bio::EnsEMBL::Variation::Utils::Sequence;
use Bio::EnsEMBL::Variation::Utils::Constants qw(%VARIATION_CLASSES); 
use Bio::EnsEMBL::Variation::Failable;
use vars qw(@ISA);
use Scalar::Util qw(weaken);

@ISA = qw(Bio::EnsEMBL::Storable Bio::EnsEMBL::Variation::Failable);

=head2 new

  Arg [-dbID] :
    int - unique internal identifier for snp

  Arg [-ADAPTOR] :
    Bio::EnsEMBL::Variation::DBSQL::VariationAdaptor
    Adaptor which provides database connectivity for this Variation object

  Arg [-NAME] :
    string - the name of this SNP

  Arg [-SOURCE] :
    string - the source of this SNP 

  Arg [-SOURCE_DESCRIPTION] :
    string - description of the SNP source
    
  Arg [-SOURCE_TYPE] :
    string - the source type of this variant

  Arg [-SYNONYMS] :
    reference to hash with list reference values -  keys are source
    names and values are lists of identifiers from that db.
    e.g.: {'dbSNP' => ['ss1231', '1231'], 'TSC' => ['1452']}

  Arg [-ANCESTRAL_ALLELES] :
    string - the ancestral allele of this SNP

  Arg [-ALLELES] :
    reference to list of Bio::EnsEMBL::Variation::Allele objects

  Arg [-VALIDATION_STATES] :
    reference to list of strings

  Arg [-MOLTYPE] :
    string - the moltype of this SNP

  Arg [-FIVE_PRIME_FLANKING_SEQ] :
    string - the five prime flanking nucleotide sequence

  Arg [-THREE_PRIME_FLANKING_SEQ] :
    string - the three prime flanking nucleotide sequence

  Example    : $v = Bio::EnsEMBL::Variation::Variation->new
                    (-name   => 'rs123',
                     -source => 'dbSNP');

  Description: Constructor. Instantiates a new Variation object.
  Returntype : Bio::EnsEMBL::Variation::Variation
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut


sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;

  my ($dbID, $adaptor, $name, $class_so_term, $src, $src_desc, $src_url, $src_type, $is_somatic, $flipped, $syns, $ancestral_allele,
      $alleles, $valid_states, $moltype, $five_seq, $three_seq, $flank_flag, $minor_allele, $minor_allele_frequency, $minor_allele_count, 
      $clinical_significance) =
        rearrange([qw(dbID ADAPTOR NAME CLASS_SO_TERM SOURCE SOURCE_DESCRIPTION SOURCE_URL SOURCE_TYPE IS_SOMATIC 
                      FLIPPED SYNONYMS ANCESTRAL_ALLELE ALLELES VALIDATION_STATES MOLTYPE FIVE_PRIME_FLANKING_SEQ
                      THREE_PRIME_FLANKING_SEQ FLANK_FLAG MINOR_ALLELE MINOR_ALLELE_FREQUENCY MINOR_ALLELE_COUNT 
                      CLINICAL_SIGNIFICANCE)],@_);

  # convert the validation state strings into a bit field
  # this preserves the same order and representation as in the database
  # and filters out invalid states
  my $vcode = Bio::EnsEMBL::Variation::Utils::Sequence::get_validation_code($valid_states);
  
  my $self = bless {
    'dbID' => $dbID,
    'adaptor' => $adaptor,
    'name'   => $name,
    'class_SO_term' => $class_so_term,
    'source' => $src,
    'source_description' => $src_desc,
    'source_url' => $src_url,
    'source_type'=> $src_type,
    'is_somatic' => $is_somatic,
    'flipped' => $flipped,
    'synonyms' => $syns || {},
    'ancestral_allele' => $ancestral_allele,
    'validation_code' => $vcode,
    'moltype' => $moltype,
    'five_prime_flanking_seq' => $five_seq,
    'three_prime_flanking_seq' => $three_seq,
    'flank_flag' => $flank_flag,
    'minor_allele' => $minor_allele,
    'minor_allele_frequency' => $minor_allele_frequency,
    'minor_allele_count' => $minor_allele_count,
    'clinical_significance' => $clinical_significance,
  }, $class;
  
  $self->add_Allele($alleles) if defined($alleles);
  
  return $self;
}

sub new_fast {
  my $class = shift;
  my $hashref = shift;
  return bless $hashref, $class;
}

=head2 has_failed_subsnps

  Description: DEPRECATED: Use has_failed_alleles instead.
  Status     : DEPRECATED

=cut

sub has_failed_subsnps {
    my $self = shift;
  
    deprecate("has_failed_subsnps should no longer be used, use has_failed_alleles instead\n");
    return $self->has_failed_alleles();
}

=head2 has_failed_alleles

  Example    : print "Variation '" . $var->name() . "' has " . ($var->has_failed_alleles() ? "" : "no ") . " failed alleles\n";
  Description: Returns true if this variation has alleles that are flagged as failed
  Returntype : int
  Exceptions : none
  Caller     : general
  Status     : At risk

=cut

sub has_failed_alleles {
    my $self = shift;
  
    map {return 1 if ($_->is_failed())} @{$self->get_all_Alleles()};
    return 0;
}


=head2 add_Allele

  Arg [1]    : Bio::EnsEMBL::Variation::Allele $allele 
  Example    : $v->add_allele(Bio::EnsEMBL::Variation::Allele->new(...));
  Description: Add an Allele to this variation.
  Returntype : none
  Exceptions : throw on incorrect argument
  Caller     : general
  Status     : At Risk

=cut

sub add_Allele {
    my $self = shift;
    my $allele = shift;

    # This method also accepts a list of alleles so wrap the argument in an array and treat as such
    $allele = wrap_array($allele);  
    map {assert_ref($_,'Bio::EnsEMBL::Variation::Allele')} @{$allele};
    
    # Store the allele in the private hash
    $self->{alleles} = [] unless (exists($self->{alleles}));
    push(@{$self->{alleles}},@{$allele});
    
}

=head2 name

  Arg [1]    : string $newval (optional)
               The new value to set the name attribute to
  Example    : $name = $obj->name()
  Description: Getter/Setter for the name attribute
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub name{
  my $self = shift;
  return $self->{'name'} = shift if(@_);
  return $self->{'name'};
}


=head2 get_all_Genes

  Args        : None
  Example     : $genes = $v->get_all_genes();
  Description : Retrieves all the genes where this Variation
                has a consequence.
  ReturnType  : reference to list of Bio::EnsEMBL::Gene
  Exceptions  : None
  Caller      : general
  Status      : At Risk

=cut

sub get_all_Genes{
    my $self = shift;
    my $genes;
    if (defined $self->{'adaptor'}){
  my $UPSTREAM = 5000;
  my $DOWNSTREAM = 5000;
  my $vf_adaptor = $self->adaptor()->db()->get_VariationFeatureAdaptor();
  my $vf_list = $vf_adaptor->fetch_all_by_Variation($self);
  #foreach vf, get the slice is on, us ethe USTREAM and DOWNSTREAM limits to get all the genes, and see if SNP is within the gene
  my $new_slice;
  my $gene_list;
  my $gene_hash;

  foreach my $vf (@{$vf_list}){
      #expand the slice UPSTREAM and DOWNSTREAM
      $new_slice = $vf->feature_Slice()->expand($UPSTREAM,$DOWNSTREAM);
      #get the genes in the new slice
      $gene_list = $new_slice->get_all_Genes();
      foreach my $gene (@{$gene_list}){
    if (($vf->start >= $gene->seq_region_start - $UPSTREAM) && ($vf->start <= $gene->seq_region_end + $DOWNSTREAM) && ($vf->end <= $gene->seq_region_end + $DOWNSTREAM)){
        #the vf is affecting the gene, add to the hash if not present already
        if (!exists $gene_hash->{$gene->dbID}){
      $gene_hash->{$gene->dbID} = $gene;
        }
    }
      }
  }
  #and return all the genes
  push @{$genes}, values %{$gene_hash};
    }
    return $genes;
}




=head2 get_all_VariationFeatures

  Args        : None
  Example     : $vfs = $v->get_all_VariationFeatures();
  Description : Retrieves all VariationFeatures for this Variation
  ReturnType  : reference to list of Bio::EnsEMBL::Variation::VariationFeature
  Exceptions  : None
  Caller      : general
  Status      : At Risk

=cut

sub get_all_VariationFeatures{
  my $self = shift;
  
  if(defined $self->adaptor) {
  
  # get variation feature adaptor
  my $vf_adaptor = $self->adaptor()->db()->get_VariationFeatureAdaptor();
  
  return $vf_adaptor->fetch_all_by_Variation($self);
  }
  
  else {
  warn("No variation database attached");
  return [];
  }
}

=head2 get_VariationFeature_by_dbID

  Args        : None
  Example     : $vf = $v->get_VariationFeature_by_dbID();
  Description : Retrieves a VariationFeature for this Variation by it's internal
        database identifier
  ReturnType  : Bio::EnsEMBL::Variation::VariationFeature
  Exceptions  : None
  Caller      : general
  Status      : At Risk

=cut

sub get_VariationFeature_by_dbID{
  my $self = shift;
  my $dbID = shift;
  
  throw("No dbID defined") unless defined $dbID;
  
  if(defined $self->adaptor) {
  
  # get variation feature adaptor
  my $vf_adaptor = $self->adaptor()->db()->get_VariationFeatureAdaptor();
  
  my $vf = $vf_adaptor->fetch_by_dbID($dbID);
  
  # check defined
  if(defined($vf)) {
    
    # check it is the same variation ID
    if($vf->{_variation_id} == $self->dbID) {
    return $vf;
    }
    
    else {
    warn("Variation dbID for Variation Feature does not match this Variation's dbID");
    return undef;
    }
  }
  
  else {
    return undef;
  }
  }
  
  else {
  warn("No variation database attached");
  return undef;
  }  
}



=head2 get_all_synonyms

  Arg [1]    : (optional) string $source - the source of the synonyms to
               return.
  Example    : @dbsnp_syns = @{$v->get_all_synonyms('dbSNP')};
               @all_syns = @{$v->get_all_synonyms()};
  Description: Retrieves synonyms for this Variation. If a source argument
               is provided all synonyms from that source are returned,
               otherwise all synonyms are returned.
  Returntype : reference to list of strings
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub get_all_synonyms {
    my $self = shift;
    my $source = shift;

    if ($source) {
        $source = [$source];
    }
    else {
        $source = $self->get_all_synonym_sources();
    }
    
    my @synonyms;
    map {push(@synonyms,keys(%{$self->{synonyms}{$_}}))} @{$source};

    return \@synonyms;
}



=head2 get_all_synonym_sources

  Arg [1]    : none
  Example    : my @sources = @{$v->get_all_synonym_sources()};
  Description: Retrieves a list of all the sources for synonyms of this
               Variation.
  Returntype : reference to a list of strings
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub get_all_synonym_sources {
  my $self = shift;
  my @sources = keys %{$self->{'synonyms'}};
  return \@sources;
}



=head2 add_synonym

  Arg [1]    : string $source
  Arg [2]    : string $syn
  Example    : $v->add_synonym('dbSNP', 'ss55331');
  Description: Adds a synonym to this variation.
  Returntype : none
  Exceptions : throw if $source argument is not provided
               throw if $syn argument is not provided
  Caller     : general
  Status     : At Risk

=cut

sub add_synonym {
  my $self   = shift;
  my $source = shift;
  my $syn    = shift;

  throw("source argument is required") if(!$source);
  throw("syn argument is required") if(!$syn);

  $self->{'synonyms'}{$source}{$syn}++;

  return;
}



=head2 get_all_validation_states

  Arg [1]    : none
  Example    : my @vstates = @{$v->get_all_validation_states()};
  Description: Retrieves all validation states for this variation.  Current
               possible validation statuses are 'cluster','freq','submitter',
               'doublehit', 'hapmap'
  Returntype : reference to list of strings
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub get_all_validation_states {
    my $self = shift;
    
    return Bio::EnsEMBL::Variation::Utils::Sequence::get_all_validation_states($self->{'validation_code'});
}




=head2 add_validation_state

  Arg [1]    : string $state
  Example    : $v->add_validation_state('cluster');
  Description: Adds a validation state to this variation.
  Returntype : none
  Exceptions : warning if validation state is not a recognised type
  Caller     : general
  Status     : At Risk

=cut

sub add_validation_state {
    Bio::EnsEMBL::Variation::Utils::Sequence::add_validation_state(@_);
}



=head2 source

  Arg [1]    : string $source (optional)
               The new value to set the source attribute to
  Example    : $source = $v->source()
  Description: Getter/Setter for the source attribute
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source{
  my $self = shift;
  return $self->{'source'} = shift if(@_);
  return $self->{'source'};
}


=head2 source_type

  Arg [1]    : string $source_type (optional)
               The new value to set the source type attribute to
  Example    : $source_type = $v->source_type()
  Description: Getter/Setter for the source type attribute
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : At risk

=cut

sub source_type{
  my $self = shift;
  return $self->{'source_type'} = shift if(@_);
  return $self->{'source_type'};
}


=head2 source_description

  Arg [1]    : string $source_description (optional)
               The new value to set the source description attribute to
  Example    : $source_description = $v->source_description()
  Description: Getter/Setter for the source description attribute
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source_description{
  my $self = shift;
  return $self->{'source_description'} = shift if(@_);
  return $self->{'source_description'};
}



=head2 source_url

  Arg [1]    : string $source_url (optional)
               The new value to set the source URL attribute to
  Example    : $source_url = $v->source_url()
  Description: Getter/Setter for the source URL attribute
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source_url{
  my $self = shift;
  return $self->{'source_url'} = shift if(@_);
  return $self->{'source_url'};
}

=head2 is_somatic

  Arg [1]    : boolean $is_somatic (optional)
               The new value to set the is_somatic flag to
  Example    : $is_somatic = $v->is_somatic
  Description: Getter/Setter for the is_somatic flag, which identifies this variation as either somatic or germline
  Returntype : boolean
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub is_somatic {
  my ($self, $is_somatic) = @_;
  $self->{is_somatic} = $is_somatic if defined $is_somatic;
  return $self->{is_somatic};
}

=head2 flipped

  Arg [1]    : boolean $flipped (optional)
               The new value to set the flipped flag to
  Example    : $flipped = $v->flipped
  Description: Getter/Setter for the flipped flag, which identifies if this
               variation's strand has been flipped during the import process
  Returntype : boolean
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub flipped {
  my ($self, $flipped) = @_;
  $self->{flipped} = $flipped if defined $flipped;
  return $self->{flipped};
}

=head2 get_all_Alleles

  Arg [1]    : none
  Example    : @alleles = @{$v->get_all_Alleles()};
  Description: Retrieves all Alleles associated with this variation
  Returntype : reference to list of Bio::EnsEMBL::Variation::Allele objects
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub get_all_Alleles {
  my $self = shift;
  
  # If the private hash key 'alleles' does not exist, no attempt has been made to load them, so do that
  unless (exists($self->{alleles})) {
      
      # Get an AlleleAdaptor
      assert_ref($self->adaptor(),'Bio::EnsEMBL::Variation::DBSQL::BaseAdaptor');
      my $allele_adaptor = $self->adaptor->db->get_AlleleAdaptor();
      
      $self->add_Allele($allele_adaptor->fetch_all_by_Variation($self));
  } 

  return $self->{alleles};
}



=head2 ancestral_allele

  Arg [1]    : string $ancestral_allele (optional)
  Example    : $ancestral_allele = v->ancestral_allele();
  Description: Getter/Setter ancestral allele associated with this variation
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub ancestral_allele {
  my $self = shift;
  return $self->{'ancestral_allele'} = shift if(@_);
  return $self->{'ancestral_allele'};
}

=head2 moltype

  Arg [1]    : string $moltype (optional)
               The new value to set the moltype attribute to
  Example    : $moltype = v->moltype();
  Description: Getter/Setter moltype associated with this variation
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub moltype {
  my $self = shift;
  return $self->{'moltype'} = shift if(@_);
  return $self->{'moltype'};
}


=head2 five_prime_flanking_seq

  Arg [1]    : string $newval (optional) 
               The new value to set the five_prime_flanking_seq attribute to
  Example    : $five_prime_flanking_seq = $obj->five_prime_flanking_seq()
  Description: Getter/Setter for the five_prime_flanking_seq attribute
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub five_prime_flanking_seq{
  my $self = shift;

  #setter of the flanking sequence
  return $self->{'five_prime_flanking_seq'} = shift if(@_);
  #lazy-load the flanking sequence from the database
  if (!defined $self->{'five_prime_flanking_seq'} && $self->{'adaptor'}){
      my $variation_adaptor = $self->adaptor()->db()->get_VariationAdaptor();
      ($self->{'three_prime_flanking_seq'},$self->{'five_prime_flanking_seq'}) = @{$variation_adaptor->get_flanking_sequence($self->{'dbID'})};
  }
  return $self->{'five_prime_flanking_seq'};
}




=head2 three_prime_flanking_seq

  Arg [1]    : string $newval (optional) 
               The new value to set the three_prime_flanking_seq attribute to
  Example    : $three_prime_flanking_seq = $obj->three_prime_flanking_seq()
  Description: Getter/Setter for the three_prime_flanking_seq attribute
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub three_prime_flanking_seq{
  my $self = shift;

  #setter of the flanking sequence
  return $self->{'three_prime_flanking_seq'} = shift if(@_);
  #lazy-load the flanking sequence from the database
  if (!defined $self->{'three_prime_flanking_seq'} && $self->{'adaptor'}){
      my $variation_adaptor = $self->adaptor()->db()->get_VariationAdaptor();
      ($self->{'three_prime_flanking_seq'},$self->{'five_prime_flanking_seq'}) = @{$variation_adaptor->get_flanking_sequence($self->{'dbID'})};
  }
  return $self->{'three_prime_flanking_seq'};
}


=head2 get_all_IndividualGenotypes

  Args       : none
  Example    : $ind_genotypes = $var->get_all_IndividualGenotypes()
  Description: Getter for IndividualGenotypes for this Variation, returns empty list if 
               there are none 
  Returntype : listref of IndividualGenotypes
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub get_all_IndividualGenotypes {
    my $self = shift;
  my $individual = shift;
    if (defined ($self->{'adaptor'})){
  my $igtya = $self->{'adaptor'}->db()->get_IndividualGenotypeAdaptor();
  
  return $igtya->fetch_all_by_Variation($self, $individual);
    }
    return [];
}

=head2 get_all_PopulationGenotypes

  Args       : none
  Example    : $pop_genotypes = $var->get_all_PopulationGenotypes()
  Description: Getter for PopulationGenotypes for this Variation, returns empty list if 
               there are none. 
  Returntype : listref of PopulationGenotypes
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub get_all_PopulationGenotypes {
    my $self = shift;

    #simulate a lazy-load on demand situation, used by the Glovar team
    if (!defined($self->{'populationGenotypes'}) && defined ($self->{'adaptor'})){
  my $pgtya = $self->{'adaptor'}->db()->get_PopulationGenotypeAdaptor();
  
  return $pgtya->fetch_all_by_Variation($self);
    }
    return $self->{'populationGenotypes'};

}


=head2 add_PopulationGenotype

    Arg [1]     : Bio::EnsEMBL::Variation::PopulationGenotype
    Example     : $v->add_PopulationGenotype($pop_genotype)
    Description : Adds another PopulationGenotype to the Variation object
    Exceptions  : thrown on bad argument
    Caller      : general
    Status      : At Risk

=cut

sub add_PopulationGenotype{
    my $self = shift;

    if (@_){
  if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::PopulationGenotype')) {
      throw("Bio::EnsEMBL::Variation::PopulationGenotype argument expected");
  }
  #a variation can have multiple PopulationGenotypes
  push @{$self->{'populationGenotypes'}},shift;
    }

}


=head2 ambig_code

    Args         : None
    Example      : my $ambiguity_code = $v->ambig_code()
    Description  : Returns the ambigutiy code for the alleles in the Variation
    ReturnType   : String $ambiguity_code
    Exceptions   : none    
    Caller       : General
    Status       : At Risk

=cut 

sub ambig_code{
    my $self = shift;
  
  my $code;
  
  # first try via VF
  if(my @vfs = @{$self->get_all_VariationFeatures}) {
    if(scalar @vfs) {
    $code = $vfs[0]->ambig_code;
    }
  }
  
  # otherwise get it via alleles attatched to this object already
  if(!defined($code)) {
    my $alleles = $self->get_all_Alleles(); #get all Allele objects
    my %alleles; #to get all the different alleles in the Variation
    map {$alleles{$_->allele}++} @{$alleles};
    my $allele_string = join "|",keys %alleles;
    $code = &ambiguity_code($allele_string);
  }
  
  return $code;
}

=head2 var_class

    Args         : None
    Example      : my $variation_class = $vf->var_class()
    Description  : returns the class for the variation, according to dbSNP classification
    ReturnType   : String $variation_class
    Exceptions   : none
    Caller       : General
    Status       : At Risk

=cut

sub var_class{
    my $self = shift;
    
    unless ($self->{class_display_term}) {
       
        unless ($self->{class_SO_term}) {
            # work out the term from the alleles
            
            my $alleles = $self->get_all_Alleles(); #get all Allele objects
            my %alleles; #to get all the different alleles in the Variation
            map {$alleles{$_->allele}++} @{$alleles};
            my $allele_string = join '/',keys %alleles;

            $self->{class_SO_term} = SO_variation_class($allele_string);
        }

        # convert the SO term to the ensembl display term

        $self->{class_display_term} = $self->is_somatic ? 
            $VARIATION_CLASSES{$self->{class_SO_term}}->{somatic_display_term} : 
            $VARIATION_CLASSES{$self->{class_SO_term}}->{display_term};
    }
    
    return $self->{class_display_term};
}

=head2 derived_allele_frequency

  Arg[1]     : Bio::EnsEMBL::Variation::Population  $population 
  Example    : $daf = $variation->derived_allele_frequency($population);
  Description: Gets the derived allele frequency for the population. 
               The DAF is the frequency of the reference allele that is 
               different from the allele in Chimp. If none of the alleles
               is the same as the ancestral, will return reference allele
               frequency
  Returntype : float
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub derived_allele_frequency{
  my $self = shift;
  my $population = shift;
  my $daf;

  if(!ref($population) || !$population->isa('Bio::EnsEMBL::Variation::Population')) {
      throw('Bio::EnsEMBL::Variation::Population argument expected.');
  }
  my $ancestral_allele = $self->ancestral_allele();
  if (defined $ancestral_allele){
  #get reference allele
  my $vf_adaptor = $self->adaptor->db->get_VariationFeatureAdaptor();
  my $vf = shift @{$vf_adaptor->fetch_all_by_Variation($self)};
  my $ref_freq;
  #get allele in population
  my $alleles = $self->get_all_Alleles();
  
  foreach my $allele (@{$alleles}){
    next unless defined $allele->population;
    
    if (($allele->allele eq $vf->ref_allele_string) and ($allele->population->name eq $population->name)){
    $ref_freq = $allele->frequency;
    }
  }
  
  if(defined $ref_freq) {
    if ($ancestral_allele eq $vf->ref_allele_string){
    $daf = 1 - $ref_freq
    }
    elsif ($ancestral_allele ne $vf->ref_allele_string){
    $daf = $ref_freq;
    }
  }
  }
  
  return $daf;
}

=head2 derived_allele

  Arg[1]     : Bio::EnsEMBL::Variation::Population  $population 
  Example    : $da = $variation->derived_allele($population);
  Description: Gets the derived allele for the population. 
  Returntype : float
  Exceptions : none
  Caller     : general
  Status     : At Risk

=cut

sub derived_allele {
     my $self = shift();
     my $population = shift();

     my $population_dbID = $population->dbID();
     my $ancestral_allele_str = $self->ancestral_allele();

     if (not defined($ancestral_allele_str)) {
         return;
     }

     my $alleles = $self->get_all_Alleles();

     my $derived_allele_str;

     foreach my $allele (@{$alleles}) {
         my $allele_population = $allele->population();

         if (defined($allele_population) and
             $allele_population->dbID() == $population_dbID)
         {
             my $allele_str = $allele->allele();

             if ($ancestral_allele_str ne $allele_str) {
                 if (defined($derived_allele_str)) {
                     return;
                 } else {
                     $derived_allele_str = $allele_str;
                 }
             }
         }
     }
     return $derived_allele_str;
}

=head2 minor_allele

  Arg [1]    : string $minor_allele (optional)
               The new minor allele string
  Example    : $ma = $obj->minor_allele()
  Description: Get/set the minor allele of this variation, as reported by dbSNP
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub minor_allele {
    my ($self, $minor_allele) = @_;
    $self->{minor_allele} = $minor_allele if defined $minor_allele;
    return $self->{minor_allele}
}

=head2 minor_allele_frequency

  Arg [1]    : float $minor_allele_frequency (optional)
               The new minor allele frequency
  Example    : $maf = $obj->minor_allele_frequency()
  Description: Get/set the frequency of the minor allele of this variation, as reported by dbSNP
  Returntype : float
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub minor_allele_frequency {
    my ($self, $minor_allele_frequency) = @_;
    $self->{minor_allele_frequency} = $minor_allele_frequency if defined $minor_allele_frequency;
    return $self->{minor_allele_frequency}
}

=head2 minor_allele_count

  Arg [1]    : int $minor_allele_count (optional)
               The new minor allele count
  Example    : $maf_count = $obj->minor_allele_count()
  Description: Get/set the sample count of the minor allele of this variation, as reported by dbSNP
  Returntype : int
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub minor_allele_count {
    my ($self, $minor_allele_count) = @_;
    $self->{minor_allele_count} = $minor_allele_count if defined $minor_allele_count;
    return $self->{minor_allele_count}
}

=head2 clinical_significance

  Arg [1]    : string $clinical_significance (optional)
               The new clinical significance string
  Example    : $ma = $obj->clinical_significance()
  Description: Get/set the clinical significance of this variation, as reported by dbSNP.
               When available, this will be one of the following strings:
                unknown 
                untested
                non-pathogenic
                probable-non-pathogenic
                probable-pathogenic
                pathogenic
                drug-response
                histocompatibility
                other
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub clinical_significance {
    my ($self, $clinical_significance) = @_;
    $self->{clinical_significance} = $clinical_significance if defined $clinical_significance;
    return $self->{clinical_significance}
}

=head2 get_all_VariationAnnotations

  Args       : none
  Example    : my $annotations = $var->get_all_VariationAnnotations()
  Description: Getter for VariationAnnotations for this Variation, returns empty list if 
               there are none. 
  Returntype : listref of VariationAnnotations
  Exceptions : none
  Caller     : general

=cut

sub get_all_VariationAnnotations {
    my $self = shift;

    #ÊAssert the adaptor reference
    assert_ref($self->adaptor(),'Bio::EnsEMBL::Variation::DBSQL::BaseAdaptor');
    
    # Get the annotations from the database
    return $self->adaptor->db->get_VariationAnnotationAdaptor()->fetch_all_by_Variation($self);

}

1;