diff variant_effect_predictor/Bio/EnsEMBL/Variation/AlleleFeature.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Variation/AlleleFeature.pm	Fri Aug 03 10:04:48 2012 -0400
@@ -0,0 +1,646 @@
+=head1 LICENSE
+
+ Copyright (c) 1999-2012 The European Bioinformatics Institute and
+ Genome Research Limited.  All rights reserved.
+
+ This software is distributed under a modified Apache license.
+ For license details, please see
+
+   http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+ Please email comments or questions to the public Ensembl
+ developers list at <dev@ensembl.org>.
+
+ Questions may also be sent to the Ensembl help desk at
+ <helpdesk@ensembl.org>.
+
+=cut
+
+# Ensembl module for Bio::EnsEMBL::Variation::AlleleFeature
+#
+# Copyright (c) 2005 Ensembl
+#
+
+
+=head1 NAME
+
+Bio::EnsEMBL::Variation::AlleleFeature - A genomic position for an allele in a sample.
+
+=head1 SYNOPSIS
+
+    # Allele feature representing a single nucleotide polymorphism
+    $af = Bio::EnsEMBL::Variation::AlleleFeature->new
+       (-start   => 100,
+        -end     => 100,
+        -strand  => 1,
+        -slice   => $slice,
+        -allele_string => 'A',
+        -variation_name => 'rs635421',
+        -variation => $v);
+    ...
+
+    # a allele feature is like any other ensembl feature, can be
+    # transformed etc.
+    $af = $af->transform('supercontig');
+
+    print $af->start(), "-", $af->end(), '(', $af->strand(), ')', "\n";
+
+    print $af->name(), ":", $af->allele_string();
+
+    # Get the Variation object which this feature represents the genomic
+    # position of. If not already retrieved from the DB, this will be
+    # transparently lazy-loaded
+    my $v = $af->variation();
+
+=head1 DESCRIPTION
+
+This is a class representing the genomic position of a allele in a sample
+from the ensembl-variation database.  The actual variation information is
+represented by an associated Bio::EnsEMBL::Variation::Variation object. Some
+of the information has been denormalized and is available on the feature for
+speed purposes.  A AlleleFeature behaves as any other Ensembl feature.
+See B<Bio::EnsEMBL::Feature> and B<Bio::EnsEMBL::Variation::Variation>.
+
+=head1 METHODS
+
+=cut
+
+use strict;
+use warnings;
+
+package Bio::EnsEMBL::Variation::AlleleFeature;
+
+use Bio::EnsEMBL::Feature;
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Utils::Argument  qw(rearrange);
+use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
+use Bio::EnsEMBL::Variation::ConsequenceType;
+
+our @ISA = ('Bio::EnsEMBL::Feature');
+
+=head2 new
+
+  Arg [-ADAPTOR] :
+    see superclass constructor
+
+  Arg [-START] :
+    see superclass constructor
+  Arg [-END] :
+    see superclass constructor
+
+  Arg [-STRAND] :
+    see superclass constructor
+
+  Arg [-SLICE] :
+    see superclass constructor
+
+  Arg [-VARIATION_NAME] :
+    string - the name of the variation this feature is for (denormalisation
+    from Variation object).
+
+ Arg [-SOURCE] :
+    string - the name of the source where the SNP comes from
+
+  Arg [-VARIATION] :
+    int - the variation object associated with this feature.
+
+  Arg [-VARIATION_ID] :
+    int - the internal id of the variation object associated with this
+    identifier. This may be provided instead of a variation object so that
+    the variation may be lazy-loaded from the database on demand.
+    
+  Arg [-SAMPLE_ID] :
+    int - the internal id of the sample object associated with this
+    identifier. This may be provided instead of the object so that
+    the population/individual may be lazy-loaded from the database on demand.
+
+  Arg [-ALLELE_STRING] :
+    string - the allele for this AlleleFeature object.
+  
+  Arg [-OVERLAP_CONSEQUENCES] :
+	listref of Bio::EnsEMBL::Variation::OverlapConsequence objects.
+
+  Example    :
+    $af = Bio::EnsEMBL::Variation::AlleleFeature->new
+       (-start   => 100,
+        -end     => 100,
+        -strand  => 1,
+        -slice   => $slice,
+        -allele_string => 'A',
+		-consequence_type => 'NON_SYNONYMOUS_CODING',
+        -variation_name => 'rs635421',
+	-source => 'Celera',
+	-sample_id  => $sample_id,
+        -variation => $v);
+
+  Description: Constructor. Instantiates a new AlleleFeature object.
+  Returntype : Bio::EnsEMBL::Variation::AlleleFeature
+  Exceptions : none
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub new {
+  my $caller = shift;
+  my $class = ref($caller) || $caller;
+
+  my $self = $class->SUPER::new(@_);
+  my ($allele, $overlap_consequences, $var_name, $variation, $variation_id, $population, $sample_id, $source) =
+    rearrange([qw(ALLELE_STRING OVERLAP_CONSEQUENCES VARIATION_NAME 
+                  VARIATION VARIATION_ID SAMPLE_ID SOURCE)], @_);
+
+  $self->{'allele_string'}          = $allele;
+  $self->{'overlap_consequences'}   = $overlap_consequences;
+  $self->{'variation_name'}         = $var_name;
+  $self->{'variation'}              = $variation;
+  $self->{'_variation_id'}          = $variation_id;
+  $self->{'_sample_id'}             = $sample_id;
+  $self->{'source'}                 = $source;
+
+  return $self;
+}
+
+
+
+sub new_fast {
+  my $class = shift;
+  my $hashref = shift;
+  return bless $hashref, $class;
+}
+
+
+=head2 allele_string
+
+  Arg [1]    : string $newval (optional)
+               The new value to set the allele attribute to
+  Example    : $allele = $obj->allele_string()
+  Description: Getter/Setter for the allele attribute.
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub allele_string{
+  my $self = shift;
+  return $self->{'allele_string'} = shift if(@_);
+  
+  return $self->{'allele_string'} if ($self->{'_half_genotype'}); #for half genotypes
+  return join('|',split (//,unambiguity_code($self->{'allele_string'}))); #for heterozygous alleles
+}
+
+
+
+=head2 consequence_type
+
+  Arg [1]    : (optional) String $term_type
+  Description: Get a list of all the unique consequence terms of this 
+               AlleleFeature. By default returns Ensembl display terms
+               (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label'
+               (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g.
+               'non_synonymous_codon') or 'NCBI' (e.g. 'missense')
+  Returntype : listref of strings
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub consequence_type {
+    
+  my $self = shift;
+  my $term_type = shift;
+  
+  if($self->_is_sara) {
+	return ['SARA'];
+  }
+  else {
+	delete $self->{consequence_type} if defined($term_type);
+	
+	unless ($self->{consequence_type}) {
+	
+	  $term_type ||= 'SO';
+	  my $method_name = $term_type.($term_type eq 'label' ? '' : '_term');
+	  $method_name = 'SO_term' unless $self->most_severe_OverlapConsequence->can($method_name);
+	  
+	  # work out the terms from the OverlapConsequence objects
+	  $self->{consequence_type} = 
+		[ $self->_is_sara ? 'SARA' : map { $_->$method_name } @{ $self->get_all_OverlapConsequences } ];
+		
+	  return $self->{consequence_type};
+	}
+  }
+}
+
+
+=head2 get_all_OverlapConsequences
+
+  Description: Get a list of all the unique OverlapConsequences of this AlleleFeature 
+  Returntype : listref of Bio::EnsEMBL::Variation::OverlapConsequence objects
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub get_all_OverlapConsequences {
+    my $self = shift;
+    return $self->{overlap_consequences}
+}
+
+
+=head2 most_severe_OverlapConsequence
+
+  Description: Get the OverlapConsequence considered (by Ensembl) to be the most severe 
+               consequence of all the alleles of this AlleleFeature 
+  Returntype : Bio::EnsEMBL::Variation::OverlapConsequence
+  Exceptions : none
+  Status     : At Risk
+
+=cut
+
+sub most_severe_OverlapConsequence {
+    my $self = shift;
+    
+    unless ($self->{_most_severe_consequence}) {
+        
+        my $highest;
+        
+        for my $cons (@{ $self->get_all_OverlapConsequences }) {
+            $highest ||= $cons;
+            if ($cons->rank < $highest->rank) {
+                $highest = $cons;
+            }
+        }
+        
+        $self->{_most_severe_consequence} = $highest;
+    }
+    
+    return $self->{_most_severe_consequence};
+}
+
+=head2 display_consequence
+
+  Arg [1]    : (optional) String $term_type
+  Description: Get the term for the most severe consequence of this 
+               AlleleFeature. By default returns Ensembl display terms
+               (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label'
+               (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g.
+               'non_synonymous_codon') or 'NCBI' (e.g. 'missense')
+  Returntype : string
+  Exceptions : none
+  Caller     : webteam
+  Status     : At Risk
+
+=cut
+
+sub display_consequence {
+  my $self = shift;
+  my $term_type = shift;
+  
+  if($self->_is_sara) {
+	return 'SARA';
+  }
+  else {
+	$term_type ||= 'SO';
+	my $method_name = $term_type.($term_type eq 'label' ? '' : '_term');
+	$method_name = 'SO_term' unless $self->most_severe_OverlapConsequence->can($method_name);
+	
+	return $self->most_severe_OverlapConsequence->$method_name;
+  }
+}
+
+
+=head2 get_all_TranscriptVariations
+
+  Arg [1]     : (optional) listref of Bio::EnsEMBL::Transcript objects
+  Example     : $af->get_all_TranscriptVariations;
+  Description : Get all the TranscriptVariations associated with this AlleleFeature.
+                If the optional list of Transcripts is supplied, get only TranscriptVariations
+                associated with those Transcripts.
+  Returntype  : listref of Bio::EnsEMBL::Variation::TranscriptVariation objects
+  Exceptions  : Thrown on wrong argument type
+  Caller      : general
+  Status      : At Risk
+
+=cut
+
+sub get_all_TranscriptVariations {
+  my $self = shift;
+  my $trs = shift;
+  
+  my $cons = $self->variation_feature->get_all_TranscriptVariations($trs);
+  
+  # convert the TV to a SARA one if this is a SARA genotype
+  if($self->_is_sara) {
+	$_->_convert_to_sara foreach @$cons;
+  }
+  
+  # otherwise we need to rearrange the TranscriptVariationAlleles based
+  # on the alleles of this genotype
+  else {
+	my %alleles;
+	$alleles{$_} = 1 foreach split /\||\/|\\/, $self->allele_string;
+	
+	$_->_rearrange_alleles(\%alleles) foreach @$cons;
+  }
+  
+  return $cons;
+}
+
+=head2 variation_name
+
+  Arg [1]    : string $newval (optional)
+               The new value to set the variation_name attribute to
+  Example    : $variation_name = $obj->variation_name()
+  Description: Getter/Setter for the variation_name attribute.  This is the
+               name of the variation associated with this feature.
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub variation_name{
+  my $self = shift;
+  return $self->{'variation_name'} = shift if(@_);
+  return $self->{'variation_name'};
+}
+
+=head2 variation
+
+  Arg [1]    : (optional) Bio::EnsEMBL::Variation::Variation $variation
+  Example    : $v = $af->variation();
+  Description: Getter/Setter for the variation associated with this feature.
+               If not set, and this AlleleFeature has an associated adaptor
+               an attempt will be made to lazy-load the variation from the
+               database.
+  Returntype : Bio::EnsEMBL::Variation::Variation
+  Exceptions : throw on incorrect argument
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub variation {
+  my $self = shift;
+
+  if(@_) {
+    if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Variation')) {
+      throw("Bio::EnsEMBL::Variation::Variation argument expected");
+    }
+    $self->{'variation'} = shift;
+  }
+  elsif(!defined($self->{'variation'}) && $self->{'adaptor'} &&
+        defined($self->{'_variation_id'})) {
+    # lazy-load from database on demand
+    my $va = $self->{'adaptor'}->db()->get_VariationAdaptor();
+    $self->{'variation'} = $va->fetch_by_dbID($self->{'_variation_id'});
+  }
+
+  return $self->{'variation'};
+}
+
+=head2 variation_feature
+
+  Arg [1]    : (optional) Bio::EnsEMBL::Variation::VariationFeature $vf
+  Example    : $vf = $af->variation_feature();
+  Description: Getter/Setter for the variation feature associated with this feature.
+               If not set, and this AlleleFeature has an associated adaptor
+               an attempt will be made to lazy-load the variation from the
+               database.
+  Returntype : Bio::EnsEMBL::Variation::VariationFeature
+  Exceptions : throw on incorrect argument
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub variation_feature {
+  my $self = shift;
+
+  if(@_) {
+    if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
+      throw("Bio::EnsEMBL::Variation::VariationFeature argument expected");
+    }
+    $self->{'variation_feature'} = shift;
+  }
+  elsif(!defined($self->{'variation_feature'}) && $self->{'adaptor'} &&
+        defined($self->{'_variation_feature_id'})) {
+    # lazy-load from database on demand
+    my $va = $self->{'adaptor'}->db()->get_VariationFeatureAdaptor();
+    $self->{'variation_feature'} = $va->fetch_by_dbID($self->{'_variation_feature_id'});
+  }
+
+  return $self->{'variation_feature'};
+}
+
+=head2 individual
+
+  Arg [1]    : (optional) Bio::EnsEMBL::Variation::Individual $individual
+  Example    : $p = $af->individual();
+  Description: Getter/Setter for the individual associated with this feature.
+               If not set, and this AlleleFeature has an associated adaptor
+               an attempt will be made to lazy-load the individual from the
+               database.
+  Returntype : Bio::EnsEMBL::Variation::Individual
+  Exceptions : throw on incorrect argument
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub individual {
+  my $self = shift;
+
+  if(@_) {
+    if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Individual')) {
+      throw("Bio::EnsEMBL::Variation::Individual argument expected");
+    }
+    $self->{'individual'} = shift;
+  }
+  elsif(!defined($self->{'individual'}) && $self->{'adaptor'} &&
+        defined($self->{'_sample_id'})) {
+    # lazy-load from database on demand
+    my $ia = $self->{'adaptor'}->db()->get_IndividualAdaptor();
+    $self->{'individual'} = $ia->fetch_by_dbID($self->{'_sample_id'});
+    if (!defined $self->{'individual'}){
+	warning("AlleleFeature attached to Strain, not Individual");
+    }
+  }
+
+  return $self->{'individual'};
+}
+
+
+=head2 apply_edit
+    
+    Arg [1]    : reference to string $seqref
+    Arg [2]    : int $start of the seq_ref
+    Example    : $sequence = 'ACTGAATATTTAAGGCA';
+               $af->apply_edit(\$sequence,$start);
+               print $sequence, "\n";
+    Description: Applies this AlleleFeature directly to a sequence which is
+               passed by reference.  
+               If either the start or end of this AlleleFeature are not defined
+               this function will not do anything to the passed sequence.
+    Returntype : reference to the same sequence that was passed in
+    Exceptions : none
+    Caller     : Slice
+    Status     : At Risk
+
+=cut
+
+sub apply_edit  {
+
+  my $self   = shift;
+  my $seqref = shift;
+
+  if(ref($seqref) ne 'SCALAR') {
+    throw("Reference to scalar argument expected");
+  }
+
+  if(!defined($self->{'start'}) || !defined($self->{'end'})) {
+    return $seqref;
+  }
+
+
+  my $len = $self->length;
+  my $as = $self->{'allele_string'};
+  $as =~ s/\-//g;
+  
+  substr($$seqref, $self->{'start'}-1, $len) = $as;
+  
+  return $seqref;
+
+}
+
+=head2 length_diff
+
+  Arg [1]    : none
+  Example    : my $diff = $af->length_diff();
+  Description: Returns the difference in length caused by applying this
+               AlleleFeature to a sequence.  This may be be negative (deletion),
+               positive (insertion) or 0 (replacement).
+               If either start or end are not defined 0 is returned.
+  Returntype : int
+  Exceptions : none
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub length_diff  {
+
+  my $self = shift;
+
+  return 0 if(!defined($self->{'end'}) || !defined($self->{'start'}));
+
+  return length($self->{'allele_string'}) - ($self->{'end'} - $self->{'start'} + 1) if ($self->{'allele_string'} ne '-'); 
+  return 0 - ($self->{'end'} - $self->{'start'} +1) if ($self->{'allele_string'} eq '-'); #do we need the +1 in the distance ??
+
+}
+
+
+sub length {
+  my $self = shift;
+  return $self->{'end'} - $self->{'start'} + 1;
+}
+
+=head2 source
+
+  Arg [1]    : string $source (optional)
+               The new value to set the source attribute to
+  Example    : $source = $vf->source()
+  Description: Getter/Setter for the source attribute
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub source{
+  my $self = shift;
+  return $self->{'source'} = shift if(@_);
+  return $self->{'source'};
+}
+
+=head2 ref_allele_string
+
+  Args       : None
+  Example    : $allele = $obj->ref_allele_string()
+  Description: Getter for the reference allele.
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+ sub ref_allele_string{
+   my $self = shift;
+
+   my $reference_allele;
+   if ( ref ($self->slice) eq 'Bio::EnsEMBL::Slice' ){
+       #we already have the reference slice, so just get the sequence
+       $reference_allele = $self->seq;
+   }
+   else{
+       #we have a Strain or IndividualSlice, get the reference sequence from the method
+       $reference_allele = $self->slice->ref_subseq($self->start,$self->end,$self->strand) || '-';
+   }
+
+   return $reference_allele;
+ }
+
+=head2 get_all_sources
+
+    Args        : none
+    Example     : my @sources = @{$af->get_all_sources()};
+    Description : returns a list of all the sources for this
+                  AlleleFeature
+    ReturnType  : reference to list of strings
+    Exceptions  : none
+    Caller      : general
+    Status      : At Risk
+                : Variation database is under development.
+=cut
+
+sub get_all_sources{
+    my $self = shift;
+    
+    my @sources;
+    my %sources;
+    if ($self->{'adaptor'}){
+	map {$sources{$_}++} @{$self->{'adaptor'}->get_all_synonym_sources($self)};
+	$sources{$self->source}++;
+	@sources = keys %sources;
+	return \@sources;	
+    }
+    return \@sources;
+}
+
+sub _is_sara{
+  my $self = shift;
+  
+  if(!defined($self->{_is_sara})) {
+	my $allele_string = $self->allele_string;
+	my $ref = $self->ref_allele_string;
+	
+	my $is_sara = 1;
+	
+	foreach my $a(split /\/|\||\\/, $allele_string) {
+	  $is_sara = 0 if $ref !~ /$a/i;
+	}
+	
+	$self->{_is_sara} = $is_sara;
+  }
+  
+  return $self->{_is_sara};
+}
+
+1;