diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyExceptionFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyExceptionFeatureAdaptor.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,618 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor
+
+=head1 SYNOPSIS
+
+  my $assembly_exception_feature_adaptor =
+    $database_adaptor->get_AssemblyExceptionFeatureAdaptor();
+
+  @assembly_exception_features =
+    $assembly_exception_feature_adaptor->fetch_all_by_Slice($slice);
+
+=head1 DESCRIPTION
+
+Assembly Exception Feature Adaptor - database access for assembly
+exception features.
+
+=head1 METHODS
+
+=cut
+
+package Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor;
+
+use strict;
+use warnings;
+no warnings qw(uninitialized);
+
+use Bio::EnsEMBL::DBSQL::BaseAdaptor;
+use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
+use Bio::EnsEMBL::AssemblyExceptionFeature;
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Utils::Cache;
+
+our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
+
+# set the number of slices you'd like to cache
+our $ASSEMBLY_EXCEPTION_FEATURE_CACHE_SIZE = 100;
+
+=head2 new
+
+  Arg [1]    : list of args @args
+               Superclass constructor arguments
+  Example    : none
+  Description: Constructor which just initializes internal cache structures
+  Returntype : Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor
+  Exceptions : none
+  Caller     : implementing subclass constructors
+  Status     : Stable
+
+=cut
+
+sub new {
+  my $caller = shift;
+  my $class = ref($caller) || $caller;
+
+  my $self = $class->SUPER::new(@_);
+
+  # initialize an LRU cache for slices
+  my %cache;
+  tie(%cache, 'Bio::EnsEMBL::Utils::Cache',
+    $ASSEMBLY_EXCEPTION_FEATURE_CACHE_SIZE);
+
+  $self->{'_aexc_slice_cache'} = \%cache;
+
+  return $self;
+}
+
+=head2 fetch_all
+
+  Arg [1]    : none
+  Example    : my @axfs = @{$axfa->fetch_all()};
+  Description: Retrieves all assembly exception features which are in the
+               database and builds internal caches of the features.
+  Returntype : reference to list of Bio::EnsEMBL::AssemblyExceptionFeatures
+  Exceptions : none
+  Caller     : fetch_by_dbID, fetch_by_Slice
+  Status     : Stable
+
+=cut
+
+sub fetch_all {
+  my $self = shift;
+
+  # this is the "global" cache for all assembly exception features in the db
+  if(defined($self->{'_aexc_cache'})) {
+    return $self->{'_aexc_cache'};
+  }
+
+  my $statement = qq(
+  SELECT    ae.assembly_exception_id,
+            ae.seq_region_id,
+            ae.seq_region_start,
+            ae.seq_region_end,
+            ae.exc_type,
+            ae.exc_seq_region_id,
+            ae.exc_seq_region_start,
+            ae.exc_seq_region_end,
+            ae.ori
+  FROM      assembly_exception ae,
+            coord_system cs,
+            seq_region sr
+  WHERE     cs.species_id = ?
+    AND     sr.coord_system_id = cs.coord_system_id
+    AND     sr.seq_region_id = ae.seq_region_id);
+
+  my $sth = $self->prepare($statement);
+
+  $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
+
+  $sth->execute();
+
+  my ($ax_id, $sr_id, $sr_start, $sr_end,
+      $x_type, $x_sr_id, $x_sr_start, $x_sr_end, $ori);
+
+  $sth->bind_columns(\$ax_id, \$sr_id, \$sr_start, \$sr_end,
+                     \$x_type, \$x_sr_id, \$x_sr_start, \$x_sr_end, \$ori);
+
+  my @features;
+  my $sa = $self->db()->get_SliceAdaptor();
+
+  $self->{'_aexc_dbID_cache'} = {};
+
+  while($sth->fetch()) {
+    my $slice   = $sa->fetch_by_seq_region_id($sr_id);
+    my $x_slice = $sa->fetch_by_seq_region_id($x_sr_id);
+
+    # each row creates TWO features, each of which has alternate_slice
+    # pointing to the "other" one
+
+   
+    my $a = Bio::EnsEMBL::AssemblyExceptionFeature->new
+          ('-dbID'            => $ax_id,
+           '-start'           => $sr_start,
+           '-end'             => $sr_end,
+           '-strand'          => 1,
+           '-adaptor'         => $self,
+           '-slice'           => $slice,
+           '-alternate_slice' => $x_slice->sub_Slice($x_sr_start, $x_sr_end),
+           '-type'            => $x_type);
+   
+    push @features, $a;
+    $self->{'_aexc_dbID_cache'}->{$ax_id} = $a;
+
+    push @features, Bio::EnsEMBL::AssemblyExceptionFeature->new
+          ('-dbID'            => $ax_id,
+           '-start'           => $x_sr_start,
+           '-end'             => $x_sr_end,
+           '-strand'          => 1,
+           '-adaptor'         => $self,
+           '-slice'           => $x_slice,
+           '-alternate_slice' => $slice->sub_Slice($sr_start, $sr_end),
+           '-type'            => "$x_type REF" );
+  }
+
+  $sth->finish();
+
+  $self->{'_aexc_cache'} = \@features;
+  
+  return \@features;
+}
+
+
+=head2 fetch_by_dbID
+
+  Arg [1]    : int $dbID
+  Example    : my $axf = $axfa->fetch_by_dbID(3);
+  Description: Retrieves a single assembly exception feature via its internal
+               identifier.  Note that this only retrieves one of the two
+               assembly exception features which are represented by a single
+               row in the assembly_exception table.
+  Returntype : Bio::EnsEMBL::AssemblyExceptionFeature
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub fetch_by_dbID {
+  my $self = shift;
+  my $dbID = shift;
+
+  if(!exists($self->{'_aexc_dbID_cache'})) {
+    # force loading of cache
+    $self->fetch_all();
+  }
+
+  return $self->{'_aexc_dbID_cache'}->{$dbID};
+}
+
+
+=head2 fetch_all_by_Slice
+
+  Arg [1]    : Bio::EnsEMBL::Slice $slice
+  Example    : my @axfs = @{$axfa->fetch_all_by_Slice($slice)};
+  Description: Retrieves all assembly exception features which overlap the
+               provided slice.  The returned features will be in coordinate
+               system of the slice.
+  Returntype : reference to list of Bio::EnsEMBL::AssemblyException features
+  Exceptions : none
+  Caller     : Feature::get_all_alt_locations, general
+  Status     : Stable
+
+=cut
+
+sub fetch_all_by_Slice {
+  my $self = shift;
+  my $slice = shift;
+
+  my $key= uc($slice->name());
+
+  # return features from the slice cache if present
+  if(exists($self->{'_aexc_slice_cache'}->{$key})) {
+    return $self->{'_aexc_slice_cache'}->{$key};
+  }
+
+  my $all_features = $self->fetch_all();
+
+  my $mcc = $self->db()->get_MetaCoordContainer();
+  my $css = $mcc->fetch_all_CoordSystems_by_feature_type('assembly_exception');
+
+  my @features;
+
+  my $ma = $self->db()->get_AssemblyMapperAdaptor();
+
+  foreach my $cs (@$css) {
+    my $mapper;
+    if($cs->equals($slice->coord_system)) {
+      $mapper = undef;
+    } else {
+      $mapper = $ma->fetch_by_CoordSystems($cs,$slice->coord_system());
+    }
+
+    push @features, @{ $self->_remap($all_features, $mapper, $slice) };
+  }
+
+  $self->{'_aexc_slice_cache'}->{$key} = \@features;
+
+  return \@features;
+}
+
+
+#
+# Given a list of features checks if they are in the correct coord system
+# by looking at the first features slice.  If they are not then they are
+# converted and placed on the slice.
+#
+# Note that this is a re-implementation of a method with the same name in
+# BaseFeatureAdaptor, and in contrast to the latter which maps features in
+# place, this method returns a remapped copy of each feature. The reason for
+# this is to get around conflicts with caching.
+#
+sub _remap {
+  my ($self, $features, $mapper, $slice) = @_;
+
+  # check if any remapping is actually needed
+  if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature') ||
+                    $features->[0]->slice == $slice)) {
+    return $features;
+  }
+
+  # remapping has not been done, we have to do our own conversion from
+  # to slice coords
+
+  my @out;
+
+  my $slice_start = $slice->start();
+  my $slice_end   = $slice->end();
+  my $slice_strand = $slice->strand();
+  my $slice_cs    = $slice->coord_system();
+
+  my ($seq_region, $start, $end, $strand);
+
+  my $slice_seq_region = $slice->seq_region_name();
+
+  foreach my $f (@$features) {
+    # since feats were obtained in contig coords, attached seq is a contig
+    my $fslice = $f->slice();
+    if(!$fslice) {
+      throw("Feature does not have attached slice.\n");
+    }
+    my $fseq_region = $fslice->seq_region_name();
+    my $fcs = $fslice->coord_system();
+
+    if(!$slice_cs->equals($fcs)) {
+      # slice of feature in different coord system, mapping required
+      ($seq_region, $start, $end, $strand) =
+        $mapper->fastmap($fseq_region,$f->start(),$f->end(),$f->strand(),$fcs);
+
+      # undefined start means gap
+      next if(!defined $start);
+    } else {
+      $start      = $f->start();
+      $end        = $f->end();
+      $strand     = $f->strand();
+      $seq_region = $f->slice->seq_region_name();
+    }
+
+    # maps to region outside desired area
+    next if ($start > $slice_end) || ($end < $slice_start) || 
+      ($slice_seq_region ne $seq_region);
+
+    # create new copies of successfully mapped feaatures with shifted start,
+    # end and strand
+    my ($new_start, $new_end);
+    if($slice_strand == -1) {
+      $new_start = $slice_end - $end + 1;
+      $new_end = $slice_end - $start + 1;
+    } else {
+      $new_start = $start - $slice_start + 1;
+      $new_end = $end - $slice_start + 1;
+    }
+    
+    push @out, Bio::EnsEMBL::AssemblyExceptionFeature->new(
+            '-dbID'            => $f->dbID,
+            '-start'           => $new_start,
+            '-end'             => $new_end,
+            '-strand'          => $strand * $slice_strand,
+            '-adaptor'         => $self,
+            '-slice'           => $slice,
+            '-alternate_slice' => $f->alternate_slice,
+            '-type'            => $f->type,
+    );
+  }
+
+  return \@out;
+}
+
+=head2 store
+
+    Arg[1]       : Bio::EnsEMBL::AssemblyException $asx
+    Arg[2]       : Bio::EnsEMBL::AssemblyException $asx2
+
+    Example      : $asx = Bio::EnsEMBL::AssemblyExceptionFeature->new(...)
+                   $asx2 = Bio::EnsEMBL::AssemblyExceptionFeature->new(...)
+                   $asx_seq_region_id = $asx_adaptor->store($asx);
+    Description:  This stores a assembly exception feature in the 
+                  assembly_exception table and returns the assembly_exception_id.
+                  Needs 2 features: one pointing to the Assembly_exception, and the
+                  other pointing to the region in the reference that is being mapped to
+                  Will check that start, end and type are defined, and the alternate
+                  slice is present as well.
+    ReturnType:   int
+    Exceptions:   throw if assembly exception not defined (needs start, end,
+		  type and alternate_slice) of if $asx not a Bio::EnsEMBL::AssemblyException
+    Caller:       general
+    Status:       Stable
+
+=cut
+
+sub store{
+  my $self = shift;
+  my $asx = shift;
+  my $asx2 = shift;
+
+
+  if (! $asx->isa('Bio::EnsEMBL::AssemblyExceptionFeature')){
+    throw("$asx is not a Ensembl assemlby exception -- not stored");
+  }
+  #if already present, return ID in the database
+  my $db = $self->db();
+  if ($asx->is_stored($db)){
+    return $asx->dbID();
+  }
+  #do some checkings for the object
+  #at the moment, the orientation is always 1
+  if (! $asx->start || ! $asx->end ){
+    throw("Assembly exception does not have coordinates");
+  }
+  if ($asx->type !~ /PAR|HAP|PATCH_NOVEL|PATCH_FIX/){
+    throw("Only types of assembly exception features valid are PAR, HAP, PATCH_FIX or PATCH_NOVEL");
+  }
+  if ( !($asx->alternate_slice->isa('Bio::EnsEMBL::Slice')) ){
+    throw("Alternate slice should be a Bio::EnsEMBL::Slice");
+  }
+  #now check the other Assembly exception feature, the one pointing to the REF
+  # region
+  if (!$asx2->isa('Bio::EnsEMBL::AssemblyExceptionFeature')){
+    throw("$asx2 is not a Ensembl assemlby exception -- not stored");
+  }
+  if (! $asx2->start || ! $asx2->end ){
+    throw("Assembly exception does not have coordinates");
+  }
+  if ($asx2->type !~ /HAP REF|PAR REF|PATCH_NOVEL REF|PATCH_FIX REF/){
+    throw("$asx2 should have  type of assembly exception features HAP REF, PAR REF, PATCH_FIX REF or PATCH_NOVEL REF");
+  }
+  if (! ($asx2->alternate_slice->isa('Bio::EnsEMBL::Slice')) ){
+    throw("Alternate slice should be a Bio::EnsEMBL::Slice");
+  }
+  #finally check that both features are pointing to each other slice
+  if ($asx->slice != $asx2->alternate_slice || $asx->alternate_slice != $asx2->slice){
+    throw("Slice and alternate slice in both features are not pointing to each other");
+  }
+  #prepare the SQL
+  my $asx_sql = q{
+	INSERT INTO assembly_exception( seq_region_id, seq_region_start,
+					seq_region_end, 
+					exc_type, exc_seq_region_id,
+					exc_seq_region_start, exc_seq_region_end,
+					ori)
+	    VALUES (?, ?, ?, ?, ?, ?, ?, 1)
+	};
+  
+  my $asx_st = $self->prepare($asx_sql);
+  my $asx_id = undef;
+  my $asx_seq_region_id;
+  my $asx2_seq_region_id;
+  my $original = $asx;
+  my $original2 = $asx2;
+  #check all feature information
+  ($asx, $asx_seq_region_id) = $self->_pre_store($asx);
+  ($asx2, $asx2_seq_region_id) = $self->_pre_store($asx2);
+  
+  #and store it
+  $asx_st->bind_param(1, $asx_seq_region_id, SQL_INTEGER);
+  $asx_st->bind_param(2, $asx->start(), SQL_INTEGER);
+  $asx_st->bind_param(3, $asx->end(), SQL_INTEGER);
+  $asx_st->bind_param(4, $asx->type(), SQL_VARCHAR);
+  $asx_st->bind_param(5, $asx2_seq_region_id, SQL_INTEGER);
+  $asx_st->bind_param(6, $asx2->start(), SQL_INTEGER);
+  $asx_st->bind_param(7, $asx2->end(), SQL_INTEGER);
+  
+  $asx_st->execute();
+  $asx_id = $asx_st->{'mysql_insertid'};
+  
+  #finally, update the dbID and adaptor of the asx and asx2
+  $original->adaptor($self);
+  $original->dbID($asx_id);
+  $original2->adaptor($self);
+  $original2->dbID($asx_id);
+  #and finally update dbID cache with new assembly exception
+  $self->{'_aexc_dbID_cache'}->{$asx_id} = $original;
+  #and update the other caches as well
+  push @{$self->{'_aexc_slice_cache'}->{uc($asx->slice->name)}},$original, $original2;
+  push @{$self->{'_aexc_cache'}}, $original, $original2;
+  
+  return $asx_id;
+}
+
+#
+# Helper function containing some common feature storing functionality
+#
+# Given a Feature this will return a copy (or the same feature if no changes 
+# to the feature are needed) of the feature which is relative to the start
+# of the seq_region it is on. The seq_region_id of the seq_region it is on
+# is also returned.
+#
+# This method will also ensure that the database knows which coordinate
+# systems that this feature is stored in.
+# Since this adaptor doesn't inherit from BaseFeatureAdaptor, we need to copy
+# the code
+#
+
+sub _pre_store {
+  my $self    = shift;
+  my $feature = shift;
+
+  if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
+    throw('Expected Feature argument.');
+  }
+
+  $self->_check_start_end_strand($feature->start(),$feature->end(),
+                                 $feature->strand());
+
+
+  my $db = $self->db();
+
+  my $slice_adaptor = $db->get_SliceAdaptor();
+  my $slice = $feature->slice();
+
+  if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))  ) {
+    throw('Feature must be attached to Slice to be stored.');
+  }
+
+  # make sure feature coords are relative to start of entire seq_region
+
+  if($slice->start != 1 || $slice->strand != 1) {
+    #move feature onto a slice of the entire seq_region
+    $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
+                                             $slice->seq_region_name(),
+                                             undef, #start
+                                             undef, #end
+                                             undef, #strand
+                                             $slice->coord_system->version());
+
+    $feature = $feature->transfer($slice);
+
+    if(!$feature) {
+      throw('Could not transfer Feature to slice of ' .
+            'entire seq_region prior to storing');
+    }
+  }
+
+  # Ensure this type of feature is known to be stored in this coord system.
+  my $cs = $slice->coord_system;
+
+   my $mcc = $db->get_MetaCoordContainer();
+
+  $mcc->add_feature_type($cs, 'assembly_exception', $feature->length);
+
+  my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
+
+  if(!$seq_region_id) {
+    throw('Feature is associated with seq_region which is not in this DB.');
+  }
+
+  return ($feature, $seq_region_id);
+}
+
+#
+# helper function used to validate start/end/strand and 
+# hstart/hend/hstrand etc.
+#
+sub _check_start_end_strand {
+  my $self = shift;
+  my $start = shift;
+  my $end   = shift;
+  my $strand = shift;
+
+  #
+  # Make sure that the start, end, strand are valid
+  #
+  if(int($start) != $start) {
+    throw("Invalid Feature start [$start].  Must be integer.");
+  }
+  if(int($end) != $end) {
+    throw("Invalid Feature end [$end]. Must be integer.");
+  }
+  if(int($strand) != $strand || $strand < -1 || $strand > 1) {
+    throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
+  }
+  if($end < $start) {
+    throw("Invalid Feature start/end [$start/$end]. Start must be less " .
+          "than or equal to end.");
+  }
+
+  return 1;
+}
+
+=head2 remove
+
+  Arg [1]    : $asx Bio::EnsEMBL::AssemblyFeatureException 
+  Example    : $asx_adaptor->remove($asx);
+  Description: This removes a assembly exception feature from the database.  
+  Returntype : none
+  Exceptions : thrown if $asx arg does not implement dbID(), or if
+               $asx->dbID is not a true value
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+#again, this method is generic in BaseFeatureAdaptor, but since this class
+#is not inheriting, need to copy&paste
+sub remove {
+  my ($self, $feature) = @_;
+
+  if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::AssemblyExceptionFeature')) {
+    throw('AssemblyExceptionFeature argument is required');
+  }
+
+  if(!$feature->is_stored($self->db)) {
+    throw("This feature is not stored in this database");
+  }
+
+  my $asx_id = $feature->dbID();
+  my $key = uc($feature->slice->name);
+  my $sth = $self->prepare("DELETE FROM assembly_exception WHERE assembly_exception_id = ?");
+  $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
+  $sth->execute();
+  
+  #and clear cache
+  #and finally update dbID cache
+  delete $self->{'_aexc_dbID_cache'}->{$asx_id};
+  #and remove from cache feature
+  my @features;
+  foreach my $asx (@{$self->{'_aexc_slice_cache'}->{$key}}){
+      if ($asx->dbID != $asx_id){
+	  push @features, $asx;
+      }
+  }
+  $self->{'_aexc_slice_cache'}->{$key} = \@features;
+  @features = ();
+  foreach my $asx (@{$self->{'_aexc_cache'}}){
+      if ($asx->dbID != $asx_id){
+	  push @features, $asx;
+      }
+  }
+  $self->{'_aexc_cache'} = \@features;
+
+#unset the feature dbID ad adaptor
+  $feature->dbID(undef);
+  $feature->adaptor(undef);
+
+  return;
+}
+
+
+1;