diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/PredictionTranscriptAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/PredictionTranscriptAdaptor.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,587 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor -
+Performs database interaction related to PredictionTranscripts
+
+=head1 SYNOPSIS
+
+  # get a prediction transcript adaptor from the database
+  $pta = $database_adaptor->get_PredictionTranscriptAdaptor();
+
+  # get a slice on a region of chromosome 1
+  $sa = $database_adaptor->get_SliceAdaptor();
+
+  $slice = $sa->fetch_by_region( 'chromosome', 'x', 100000, 200000 );
+
+  # get all the prediction transcripts from the slice region
+  $prediction_transcripts = @{ $pta->fetch_all_by_Slice($slice) };
+
+=head1 METHODS
+
+=cut
+
+package Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor;
+
+use vars qw( @ISA );
+use strict;
+
+use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
+use Bio::EnsEMBL::DBSQL::AnalysisAdaptor;
+use Bio::EnsEMBL::PredictionTranscript;
+use Bio::EnsEMBL::Utils::Exception qw(deprecate throw warning);
+
+@ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
+
+
+# _tables
+#
+#  Arg [1]    : none
+#  Example    : none
+#  Description: Implements abstract superclass method to define the table used
+#               to retrieve prediction transcripts from the database
+#  Returntype : string
+#  Exceptions : none
+#  Caller     : generic_fetch
+
+sub _tables {
+  my $self = shift;
+
+  return ['prediction_transcript', 'pt'];
+}
+
+
+# _columns
+
+#  Arg [1]    : none
+#  Example    : none
+#  Description: Implements abstract superclass method to define the columns
+#               retrieved in database queries used to create prediction 
+#               transcripts.
+#  Returntype : list of strings
+#  Exceptions : none
+#  Caller     : generic_fetch
+#
+
+sub _columns {
+  my $self = shift;
+
+  return qw( pt.prediction_transcript_id
+             pt.seq_region_id
+             pt.seq_region_start
+             pt.seq_region_end
+             pt.seq_region_strand
+             pt.analysis_id
+             pt.display_label);
+}
+
+
+=head2 fetch_by_stable_id
+
+  Arg [1]    : string $stable_id
+               The stable id of the transcript to retrieve
+  Example    : $trans = $trans_adptr->fetch_by_stable_id('GENSCAN00000001234');
+  Description: Retrieves a prediction transcript via its display_label.
+               This method is called fetch_by_stable_id for polymorphism with
+               the TranscriptAdaptor.  Prediction transcript display_labels are
+               not necessarily stable in that the same identifier may be reused
+               for a completely different prediction transcript in a subsequent
+               database release.
+  Returntype : Bio::EnsEMBL::PredictionTranscript
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub fetch_by_stable_id {
+  my $self = shift;
+  my $stable_id = shift;
+
+  throw('Stable_id argument expected') if(!$stable_id);
+
+  my $syn = $self->_tables()->[1];
+
+  $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
+  my $pts = $self->generic_fetch("$syn.display_label = ?");
+
+  return (@$pts) ? $pts->[0] : undef;
+}
+
+
+
+=head2 fetch_all_by_Slice
+
+  Arg [1]    : Bio::EnsEMBL::Slice $slice
+               The slice to fetch transcripts on.
+  Arg [3]    : (optional) boolean $load_exons
+               if true, exons will be loaded immediately rather than
+               lazy loaded later.
+  Example    : $transcripts = $
+  Description: Overrides superclass method to optionally load exons
+               immediately rather than lazy-loading them later.  This
+               is more efficient when there are a lot of transcripts whose
+               exons are going to be used.
+  Returntype : reference to list of transcripts
+  Exceptions : thrown if exon cannot be placed on transcript slice
+  Caller     : Slice::get_all_Transcripts
+  Status     : Stable
+
+=cut
+
+sub fetch_all_by_Slice {
+  my $self  = shift;
+  my $slice = shift;
+  my $logic_name = shift;
+  my $load_exons = shift;
+
+  my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name);
+
+  # if there are 0 or 1 transcripts still do lazy-loading
+  if(!$load_exons || @$transcripts < 2) {
+    return $transcripts;
+  }
+
+  # preload all of the exons now, instead of lazy loading later
+  # faster than 1 query per transcript
+
+  # get extent of region spanned by transcripts
+  my ($min_start, $max_end);
+  foreach my $tr (@$transcripts) {
+    if(!defined($min_start) || $tr->seq_region_start() < $min_start) {
+      $min_start = $tr->seq_region_start();
+    }
+    if(!defined($max_end) || $tr->seq_region_end() > $max_end) {
+      $max_end   = $tr->seq_region_end();
+    }
+  }
+
+#  mades no sense, the limit for the slice will be defined by the transcripts
+#  $min_start += $slice->start() - 1;
+#  $max_end   += $slice->start() - 1;
+
+  my $ext_slice;
+
+  if($min_start >= $slice->start() && $max_end <= $slice->end()) {
+    $ext_slice = $slice;
+  } else {
+    my $sa = $self->db()->get_SliceAdaptor();
+    $ext_slice = $sa->fetch_by_region
+      ($slice->coord_system->name(), $slice->seq_region_name(),
+       $min_start,$max_end, $slice->strand(), $slice->coord_system->version());
+  }
+
+  # associate exon identifiers with transcripts
+
+  my %tr_hash = map {$_->dbID => $_} @$transcripts;
+
+  my $tr_id_str = '(' . join(',', keys %tr_hash) . ')';
+
+  my $sth = $self->prepare
+    ("SELECT prediction_transcript_id, prediction_exon_id, exon_rank " .
+     "FROM   prediction_exon " .
+     "WHERE  prediction_transcript_id IN $tr_id_str");
+
+  $sth->execute();
+
+  my ($ex_id, $tr_id, $rank);
+  $sth->bind_columns(\$tr_id, \$ex_id, \$rank);
+
+  my %ex_tr_hash;
+
+  while($sth->fetch()) {
+    $ex_tr_hash{$ex_id} ||= [];
+    push @{$ex_tr_hash{$ex_id}}, [$tr_hash{$tr_id}, $rank];
+  }
+
+  $sth->finish();
+
+  my $ea = $self->db()->get_PredictionExonAdaptor();
+  my $exons = $ea->fetch_all_by_Slice($ext_slice);
+
+  # move exons onto transcript slice, and add them to transcripts
+  foreach my $ex (@$exons) {
+    $ex = $ex->transfer($slice) if($slice != $ext_slice);
+
+    if(!$ex) {
+      throw("Unexpected. PredictionExon could not be transfered onto " .
+            "PredictionTranscript slice.");
+    }
+
+    foreach my $row (@{$ex_tr_hash{$ex->dbID()}}) {
+      my ($tr, $rank) = @$row;
+      $tr->add_Exon($ex, $rank);
+    }
+  }
+
+  return $transcripts;
+}
+
+
+
+
+
+=head2 _objs_from_sth
+
+  Arg [1]    : DBI:st $sth 
+               An executed DBI statement handle
+  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
+               An mapper to be used to convert contig coordinates
+               to assembly coordinates.
+  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
+               A slice to map the prediction transcript to.   
+  Example    : $p_transcripts = $self->_objs_from_sth($sth);
+  Description: Creates a list of Prediction transcripts from an executed DBI
+               statement handle.  The columns retrieved via the statement 
+               handle must be in the same order as the columns defined by the
+               _columns method.  If the slice argument is provided then the
+               the prediction transcripts will be in returned in the coordinate
+               system of the $slice argument.  Otherwise the prediction 
+               transcripts will be returned in the RawContig coordinate system.
+  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
+  Exceptions : none
+  Caller     : superclass generic_fetch
+  Status     : Stable
+
+=cut
+
+sub _objs_from_sth {
+  my ($self, $sth, $mapper, $dest_slice) = @_;
+
+  #
+  # This code is ugly because an attempt has been made to remove as many
+  # function calls as possible for speed purposes.  Thus many caches and
+  # a fair bit of gymnastics is used.
+  #
+
+  my $sa = $self->db()->get_SliceAdaptor();
+  my $aa = $self->db()->get_AnalysisAdaptor();
+
+  my @ptranscripts;
+  my %analysis_hash;
+  my %slice_hash;
+  my %sr_name_hash;
+  my %sr_cs_hash;
+
+  my ($prediction_transcript_id,
+      $seq_region_id,
+      $seq_region_start,
+      $seq_region_end,
+      $seq_region_strand,
+      $analysis_id,
+      $display_label);
+
+  $sth->bind_columns(\$prediction_transcript_id,
+                     \$seq_region_id,
+                     \$seq_region_start,
+                     \$seq_region_end,
+                     \$seq_region_strand,
+                     \$analysis_id,
+                     \$display_label);
+
+  my $asm_cs;
+  my $cmp_cs;
+  my $asm_cs_vers;
+  my $asm_cs_name;
+  my $cmp_cs_vers;
+  my $cmp_cs_name;
+  if($mapper) {
+    $asm_cs = $mapper->assembled_CoordSystem();
+    $cmp_cs = $mapper->component_CoordSystem();
+    $asm_cs_name = $asm_cs->name();
+    $asm_cs_vers = $asm_cs->version();
+    $cmp_cs_name = $cmp_cs->name();
+    $cmp_cs_vers = $cmp_cs->version();
+  }
+
+  my $dest_slice_start;
+  my $dest_slice_end;
+  my $dest_slice_strand;
+  my $dest_slice_length;
+  my $dest_slice_sr_name;
+  my $dest_slice_sr_id;
+  if($dest_slice) {
+    $dest_slice_start  = $dest_slice->start();
+    $dest_slice_end    = $dest_slice->end();
+    $dest_slice_strand = $dest_slice->strand();
+    $dest_slice_length = $dest_slice->length();
+    $dest_slice_sr_name = $dest_slice->seq_region_name();
+    $dest_slice_sr_id =  $dest_slice->get_seq_region_id();
+  }
+
+ FEATURE: while($sth->fetch()) {
+
+    #get the analysis object
+    my $analysis = $analysis_hash{$analysis_id} ||=
+      $aa->fetch_by_dbID($analysis_id);
+    #need to get the internal_seq_region, if present
+    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
+    my $slice = $slice_hash{"ID:".$seq_region_id};
+
+    if(!$slice) {
+      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
+      $slice_hash{"ID:".$seq_region_id} = $slice;
+      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
+      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
+    }
+
+    my $sr_name = $sr_name_hash{$seq_region_id};
+    my $sr_cs   = $sr_cs_hash{$seq_region_id};
+    #
+    # remap the feature coordinates to another coord system 
+    # if a mapper was provided
+    #
+    if($mapper) {
+
+      if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper')  ) {
+	    ( $seq_region_id,  $seq_region_start,
+	      $seq_region_end, $seq_region_strand )
+		=
+		$mapper->map( $sr_name, $seq_region_start, $seq_region_end,
+                          $seq_region_strand, $sr_cs, 1, $dest_slice);
+
+      } else {
+
+	    ( $seq_region_id,  $seq_region_start,
+	      $seq_region_end, $seq_region_strand )
+		=
+		$mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
+                          $seq_region_strand, $sr_cs );
+      }
+
+      #skip features that map to gaps or coord system boundaries
+      next FEATURE if(!defined($seq_region_id));
+
+      #get a slice in the coord system we just mapped to
+#      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
+        $slice = $slice_hash{"ID:".$seq_region_id} ||=
+          $sa->fetch_by_seq_region_id($seq_region_id);
+#      } else {
+#        $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
+#          $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
+#                               $asm_cs_vers);
+#      }
+    }
+
+    #
+    # If a destination slice was provided convert the coords
+    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
+    #
+    if($dest_slice) {
+      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
+	if($dest_slice_strand == 1) {
+	  $seq_region_start = $seq_region_start - $dest_slice_start + 1;
+	  $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
+	} else {
+	  my $tmp_seq_region_start = $seq_region_start;
+	  $seq_region_start = $dest_slice_end - $seq_region_end + 1;
+	  $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
+	  $seq_region_strand *= -1;
+	}
+      }
+
+      #throw away features off the end of the requested slice
+      if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
+	( $dest_slice_sr_id ne $seq_region_id )) {
+	next FEATURE;
+      }
+
+
+      $slice = $dest_slice;
+    }
+
+    # Finally, create the new PredictionTranscript.
+    push( @ptranscripts,
+          $self->_create_feature('Bio::EnsEMBL::PredictionTranscript', {
+                                   '-start'    => $seq_region_start,
+                                   '-end'      => $seq_region_end,
+                                   '-strand'   => $seq_region_strand,
+                                   '-adaptor'  => $self,
+                                   '-slice'    => $slice,
+                                   '-analysis' => $analysis,
+                                   '-dbID' => $prediction_transcript_id,
+                                   '-display_label' => $display_label
+                                 } ) );
+
+  }
+
+  return \@ptranscripts;
+}
+
+
+
+=head2 store
+
+  Arg [1]    : list of Bio::EnsEMBL::PredictionTranscript @pre_transcripts 
+  Example    : $prediction_transcript_adaptor->store(@pre_transcripts);
+  Description: Stores a list of given prediction transcripts in database. 
+               Puts dbID and Adaptor into each object stored object.
+  Returntype : none
+  Exceptions : on wrong argument type 
+  Caller     : general 
+  Status     : Stable
+
+=cut
+
+sub store {
+  my ( $self, @pre_transcripts ) = @_;
+
+  my $ptstore_sth = $self->prepare
+    (qq{INSERT INTO prediction_transcript (seq_region_id, seq_region_start,
+                                           seq_region_end, seq_region_strand, 
+                                           analysis_id, display_label)
+        VALUES( ?, ?, ?, ?, ?, ?)});
+
+  my $ptupdate_sth = $self->prepare
+    (qq{UPDATE prediction_transcript SET display_label = ?
+        WHERE  prediction_transcript_id = ?});
+
+  my $db = $self->db();
+  my $analysis_adaptor = $db->get_AnalysisAdaptor();
+  my $pexon_adaptor = $db->get_PredictionExonAdaptor();
+
+  FEATURE: foreach my $pt (@pre_transcripts) {
+    if(!ref($pt) || !$pt->isa('Bio::EnsEMBL::PredictionTranscript')) {
+      throw('Expected PredictionTranscript argument not [' . ref($pt).']');
+    }
+
+    #skip prediction transcripts that have already been stored
+    if($pt->is_stored($db)) {
+      warning('Not storing already stored prediction transcript '. $pt->dbID);
+      next FEATURE;
+    }
+
+    #get analysis and store it if it is not in the db
+    my $analysis = $pt->analysis();
+    if(!$analysis) {
+      throw('Prediction transcript must have analysis to be stored.');
+    }
+    if(!$analysis->is_stored($db)) {
+      $analysis_adaptor->store($analysis);
+    }
+
+    #ensure that the transcript coordinates are correct, they may not be,
+    #if somebody has done some exon coordinate juggling and not recalculated
+    #the transcript coords.
+    $pt->recalculate_coordinates();
+
+    my $original = $pt;
+    my $seq_region_id;
+    ($pt, $seq_region_id) = $self->_pre_store($pt);
+
+    #store the prediction transcript
+    $ptstore_sth->bind_param(1,$seq_region_id,SQL_INTEGER);
+    $ptstore_sth->bind_param(2,$pt->start,SQL_INTEGER);
+    $ptstore_sth->bind_param(3,$pt->end,SQL_INTEGER);
+    $ptstore_sth->bind_param(4,$pt->strand,SQL_TINYINT);
+    $ptstore_sth->bind_param(5,$analysis->dbID,SQL_INTEGER);
+    $ptstore_sth->bind_param(6,$pt->display_label,SQL_VARCHAR);
+
+    $ptstore_sth->execute();
+
+    my $pt_id = $ptstore_sth->{'mysql_insertid'};
+    $original->dbID($pt_id);
+    $original->adaptor($self);
+
+    #store the exons
+    my $rank = 1;
+    foreach my $pexon (@{$original->get_all_Exons}) {
+      $pexon_adaptor->store($pexon, $pt_id, $rank++);
+    }
+
+    # if a display label was not defined autogenerate one
+    if(!defined($pt->display_label())) {
+      my $zeros = '0' x (11 - length($pt_id));
+      my $display_label = uc($analysis->logic_name()) . $zeros . $pt_id;
+      $ptupdate_sth->bind_param(1,$display_label,SQL_VARCHAR);
+      $ptupdate_sth->bind_param(2,$pt_id,SQL_INTEGER);
+      $ptupdate_sth->execute();
+      $original->display_label($display_label);
+    }
+  }
+}
+
+
+
+=head2 remove
+
+  Arg [1]    : Bio::EnsEMBL::PredictionTranscript $pt 
+  Example    : $prediction_transcript_adaptor->remove($pt);
+  Description: removes given prediction transcript $pt from database. 
+  Returntype : none
+  Exceptions : throws if argument not a  Bio::EnsEMBL::PredictionTranscript
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub remove {
+  my $self = shift;
+  my $pre_trans = shift;
+
+  if(!ref($pre_trans)||!$pre_trans->isa('Bio::EnsEMBL::PredictionTranscript')){
+    throw('Expected PredictionTranscript argument.');
+  }
+
+  if(!$pre_trans->is_stored($self->db())) {
+    warning('PredictionTranscript is not stored in this DB - not removing.');
+    return;
+  }
+
+  #remove all associated prediction exons
+  my $pexon_adaptor = $self->db()->get_PredictionExonAdaptor();
+  foreach my $pexon (@{$pre_trans->get_all_Exons}) {
+    $pexon_adaptor->remove($pexon);
+  }
+
+  #remove the prediction transcript
+  my $sth = $self->prepare( "DELETE FROM prediction_transcript
+                             WHERE prediction_transcript_id = ?" );
+  $sth->bind_param(1,$pre_trans->dbID,SQL_INTEGER);
+  $sth->execute();
+
+  #unset the adaptor and internal id
+  $pre_trans->dbID(undef);
+  $pre_trans->adaptor(undef);
+}
+
+
+=head2 list_dbIDs
+
+  Arg [1]    : none
+  Example    : @feature_ids = @{$prediction_transcript_adaptor->list_dbIDs()};
+  Description: Gets an array of internal ids for all prediction transcript
+               features in the current db
+  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
+  Returntype : list of ints
+  Exceptions : none
+  Caller     : ?
+  Status     : Stable
+
+=cut
+
+sub list_dbIDs {
+   my ($self, $ordered) = @_;
+
+   return $self->_list_dbIDs("prediction_transcript", undef, $ordered);
+}
+
+1;