view variant_effect_predictor/Bio/EnsEMBL/DBSQL/ProteinAlignFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line source

=head1 LICENSE

  Copyright (c) 1999-2012 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor - 
Adaptor for ProteinAlignFeatures

=head1 SYNOPSIS

  $pafa =
    $registry->get_adaptor( 'Human', 'Core', 'ProteinAlignFeature' );

  my @features = @{ $pafa->fetch_all_by_Slice($slice) };

  $pafa->store(@features);

=head1 METHODS

=cut


package Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor;
use vars qw(@ISA);
use strict;

use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
use Bio::EnsEMBL::DnaPepAlignFeature;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);


=head2 store

  Arg [1]    : list of Bio::EnsEMBL::DnaPepAlignFeature @feats
  Example    : $protein_align_feature_adaptor->store(@feats);
  Description: stores a list of ProteinAlignFeatures in the database
  Returntype : none
  Exceptions : throw if any of the provided features cannot be stored
               which may occur if:
                 * The feature does not have an associated Slice
                 * The feature does not have an associated analysis
                 * The Slice the feature is associated with is on a seq_region
                   unknown to this database
              A warning is given if:
                 * The feature has already been stored in this db
  Caller     : Pipeline
  Status     : Stable

=cut


sub store{
 my ($self, @feats) = @_;

  throw("Must call store with features") if( scalar(@feats) == 0 );

  my @tabs = $self->_tables;
  my ($tablename) = @{$tabs[0]};

  my $db = $self->db();
  my $slice_adaptor = $db->get_SliceAdaptor();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

  my $sth = $self->prepare(
     "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
                             seq_region_strand, hit_start, hit_end,
                             hit_name, cigar_line,
                             analysis_id, score, evalue, perc_ident, external_db_id, hcoverage)
     VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?)");

 FEATURE: foreach my $feat ( @feats ) {
   if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) {
     throw("feature must be a Bio::EnsEMBL::DnaPepAlignFeature,"
           . " not a [".ref($feat)."].");
   }

   if($feat->is_stored($db)) {
     warning("PepDnaAlignFeature [".$feat->dbID."] is already stored" .
             " in this database.");
     next FEATURE;
   }

   #sanity check the hstart and hend
   my $hstart  = $feat->hstart();
   my $hend    = $feat->hend();
   $self->_check_start_end_strand($hstart,$hend,1);

   my $cigar_string = $feat->cigar_string();
   if(!$cigar_string) {
     $cigar_string = $feat->length() . 'M';
     warning("DnaPepAlignFeature does not define a cigar_string.\n" .
             "Assuming ungapped block with cigar_string=$cigar_string\n");
   }

   my $hseqname = $feat->hseqname();
   if(!$hseqname) {
     throw("DnaPepAlignFeature must define an hseqname.");
   }

   if(!defined($feat->analysis)) {
     throw("An analysis must be attached to the features to be stored.");
   }

   #store the analysis if it has not been stored yet
   if(!$feat->analysis->is_stored($db)) {
     $analysis_adaptor->store($feat->analysis());
   }

   my $slice = $feat->slice();
   if(!defined($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
     throw("A slice must be attached to the features to be stored.");
   }

   my $original = $feat;
   my $seq_region_id;
   ($feat, $seq_region_id) = $self->_pre_store($feat);

   $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
   $sth->bind_param(2,$feat->start,SQL_INTEGER);
   $sth->bind_param(3,$feat->end,SQL_INTEGER);
   $sth->bind_param(4,$feat->strand,SQL_TINYINT);
   $sth->bind_param(5,$feat->hstart,SQL_INTEGER);
   $sth->bind_param(6,$feat->hend,SQL_INTEGER);
   $sth->bind_param(7,$feat->hseqname,SQL_VARCHAR);
   $sth->bind_param(8,$feat->cigar_string,SQL_LONGVARCHAR);
   $sth->bind_param(9,$feat->analysis->dbID,SQL_INTEGER);
   $sth->bind_param(10,$feat->score,SQL_DOUBLE);
   $sth->bind_param(11,$feat->p_value,SQL_DOUBLE);
   $sth->bind_param(12,$feat->percent_id,SQL_REAL);
   $sth->bind_param(13,$feat->external_db_id,SQL_INTEGER);
   $sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE);

   $sth->execute();
   $original->dbID($sth->{'mysql_insertid'});
   $original->adaptor($self);
 }

 $sth->finish();
}


=head2 _objs_from_sth

  Arg [1]    : DBI statement handle $sth
               an exectuted DBI statement handle generated by selecting 
               the columns specified by _columns() from the table specified 
               by _table()
  Example    : @dna_dna_align_feats = $self->_obj_from_hashref
  Description: PROTECTED implementation of superclass abstract method. 
               Creates DnaDnaAlignFeature objects from a DBI hashref
  Returntype : listref of Bio::EnsEMBL::ProteinAlignFeatures
  Exceptions : none
  Caller     : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
  Status     : Stable

=cut

sub _objs_from_sth {
  my ($self, $sth, $mapper, $dest_slice) = @_;

  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches and
  # a fair bit of gymnastics is used.
  #

  my $sa = $self->db()->get_SliceAdaptor();
  my $aa = $self->db->get_AnalysisAdaptor();

  my @features;
  my %analysis_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;

  my ($protein_align_feature_id, $seq_region_id, $seq_region_start,
      $seq_region_end, $analysis_id, $seq_region_strand, $hit_start,
      $hit_end, $hit_name, $cigar_line, $evalue, $perc_ident, $score,
      $external_db_id, $hcoverage, $external_db_name, $external_display_db_name );

  $sth->bind_columns(\$protein_align_feature_id, \$seq_region_id,
           \$seq_region_start,\$seq_region_end, \$analysis_id,
           \$seq_region_strand, \$hit_start,\$hit_end, \$hit_name,
           \$cigar_line, \$evalue, \$perc_ident, \$score,
           \$external_db_id, \$hcoverage, \$external_db_name, \$external_display_db_name );

  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }

  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
  my $dest_slice_sr_name;
  my $dest_slice_sr_id;
  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
    $dest_slice_sr_name = $dest_slice->seq_region_name();
    $dest_slice_sr_id  = $dest_slice->get_seq_region_id();
  }

  FEATURE: while($sth->fetch()) {
    #get the analysis object
    my $analysis = $analysis_hash{$analysis_id} ||=
      $aa->fetch_by_dbID($analysis_id);

    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};

    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
    #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {

	if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper')  ) {
	    ( $seq_region_id,  $seq_region_start,
	      $seq_region_end, $seq_region_strand )
		=
		$mapper->map( $sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs, 1, $dest_slice);

	} else {

	    ( $seq_region_id,  $seq_region_start,
	      $seq_region_end, $seq_region_strand )
		=
		$mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
                          $seq_region_strand, $sr_cs );
	}

      #skip features that map to gaps or coord system boundaries
      next FEATURE if(!defined($seq_region_id));

 #     #get a slice in the coord system we just mapped to
 #     if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
#      } else {
#        $slice = $slice_hash{"ID:".$seq_region_id} ||=
#          $sa->fetch_by_seq_region_id($asm_cs_name, $sr_name, undef, undef, undef,
#                               $asm_cs_vers);
#      }
    }

    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
        if($dest_slice_strand == 1) {
          $seq_region_start = $seq_region_start - $dest_slice_start + 1;
          $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
        } else {
          my $tmp_seq_region_start = $seq_region_start;
          $seq_region_start = $dest_slice_end - $seq_region_end + 1;
          $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
          $seq_region_strand *= -1;
        }
      }
       
      #throw away features off the end of the requested slice
      if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
	( $dest_slice_sr_id ne $seq_region_id )) {
	next FEATURE;
      }
      $slice = $dest_slice;
    }

    # Finally, create the new ProteinAlignFeature.
    push(
      @features,
      $self->_create_feature_fast(
        'Bio::EnsEMBL::DnaPepAlignFeature', {
          'slice'        => $slice,
          'start'        => $seq_region_start,
          'end'          => $seq_region_end,
          'strand'       => $seq_region_strand,
          'hseqname'     => $hit_name,
          'hstart'       => $hit_start,
          'hend'         => $hit_end,
          'hstrand'      => 1,                  # dna_pep_align features
                                                # are always hstrand 1
          'score'        => $score,
          'p_value'      => $evalue,
          'percent_id'   => $perc_ident,
          'cigar_string' => $cigar_line,
          'analysis'     => $analysis,
          'adaptor'      => $self,
          'dbID'           => $protein_align_feature_id,
          'external_db_id' => $external_db_id,
          'hcoverage'      => $hcoverage,
	  'dbname'         => $external_db_name,
	  'db_display_name' => $external_display_db_name
        } ) );

  }

  return \@features;
}



sub _tables {
  my $self = shift;

  return (['protein_align_feature', 'paf'], ['external_db','exdb']);
}


sub _columns {
  my $self = shift;

  #warning _objs_from_hashref method depends on ordering of this list 
  return qw( paf.protein_align_feature_id
             paf.seq_region_id
             paf.seq_region_start
             paf.seq_region_end
             paf.analysis_id
             paf.seq_region_strand
             paf.hit_start
             paf.hit_end
             paf.hit_name
             paf.cigar_line
             paf.evalue
             paf.perc_ident
             paf.score
             paf.external_db_id
             paf.hcoverage
	     exdb.db_name
	     exdb.db_display_name);
}

sub _left_join{
    return (['external_db',"exdb.external_db_id = paf.external_db_id"]);
}

=head2 list_dbIDs

  Arg [1]    : none
  Example    : @feature_ids = @{$protein_align_feature_adaptor->list_dbIDs()};
  Description: Gets an array of internal ids for all protein align 
               features in the current db
  Arg[1]     : <optional> int. not 0 for the ids to be sorted by the seq_region.
  Returntype : listref of ints
  Exceptions : none
  Caller     : ?
  Status     : Stable

=cut

sub list_dbIDs {
   my ($self,$ordered) = @_;

   return $self->_list_dbIDs("protein_align_feature", undef, $ordered);
}






1;