view variant_effect_predictor/Bio/EnsEMBL/DBSQL/AnalysisAdaptor.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
line wrap: on
line source

=head1 LICENSE

  Copyright (c) 1999-2012 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::DBSQL::AnalysisAdaptor

=head1 SYNOPSIS

  use Bio::EnsEMBL::Registry;

  Bio::EnsEMBL::Registry->load_registry_from_db(
    -host => 'ensembldb.ensembl.org',
    -user => 'anonymous'
  );

  $analysis_adaptor =
    Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "analysis" );

  my $analysis = $analysis_adaptor->fetch_by_logic_name('genscan');

=head1 DESCRIPTION

  Module to encapsulate all db access for persistent class Analysis.
  There should be just one per application and database connection.

=head1 METHODS

=cut


package Bio::EnsEMBL::DBSQL::AnalysisAdaptor;

use Bio::EnsEMBL::Analysis;
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
use Bio::EnsEMBL::Utils::Exception;


use vars qw(@ISA);
use strict;

@ISA = qw( Bio::EnsEMBL::DBSQL::BaseAdaptor);


=head2 new

  Args       : Bio::EnsEMBL::DBSQL::DBAdaptor
  Example    : my $aa = new Bio::EnsEMBL::DBSQL::AnalysisAdaptor();
  Description: Creates a new Bio::EnsEMBL::DBSQL::AnalysisAdaptor object and
               internally loads and caches all the Analysis objects from the 
               database.
  Returntype : Bio::EnsEMBL::DBSQL::AnalysisAdaptor
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::DBAdaptor
  Status     : Stable

=cut

sub new {
  my ($class, $db) = @_;

  my $self = $class->SUPER::new($db);

  #load and cache all of the Analysis objects
  $self->fetch_all;

  return $self;
}


=head2 fetch_all

  Args       : none
  Example    : my @analysis = @{$analysis_adaptor->fetch_all()};
  Description: fetches all of the Analysis objects from the database and caches
               them internally.
  Returntype : listref of Bio::EnsEMBL::Analysis retrieved from the database
  Exceptions : none
  Caller     : AnalysisAdaptor::new
  Status     : Stable

=cut

sub fetch_all {
  my $self = shift;
  my ( $analysis, $dbID );
  my $rowHashRef;

  $self->{_cache} = {};
  $self->{_logic_name_cache} = {};

  my $sth = $self->prepare( q {
    SELECT analysis.analysis_id, logic_name,
           program, program_version, program_file,
           db, db_version, db_file,
           module, module_version,
           gff_source, gff_feature,
           created, parameters, description, display_label, displayable, web_data
    FROM   analysis
    LEFT JOIN analysis_description
    ON analysis.analysis_id = analysis_description.analysis_id } );
  $sth->execute;

  while( $rowHashRef = $sth->fetchrow_hashref ) {
    my $analysis = $self->_objFromHashref( $rowHashRef  );

    $self->{_cache}->{$analysis->dbID}                    = $analysis;
    $self->{_logic_name_cache}->{lc($analysis->logic_name())} = $analysis;
  }

  my @ana = values %{$self->{_cache}};

  return \@ana;
}


=head2 fetch_all_by_feature_class

  Arg [1]    : string $feature_cless - The name of the feature class
  Example    : my @analyses = @{$analysis_adaptor->fetch_all_by_feature_class('Gene');
  Description: Returns all analyses that correspond to a given 
               feature class; see feature_classes method for a list.
  Returntype : Listref of Bio::EnsEMBL::Analysis
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub fetch_all_by_feature_class {
  my $self = shift;
  deprecate("Deprecated. Hard-coded logic is not supported");
  my $feat_class = shift || throw( "Need a feature type, e.g. SimpleFeature" );
 
  my @feature_classes = $self->feature_classes; # List of all feature classes
  my %feat_table_map;
  foreach my $class( @feature_classes ){
    # Map e.g. DnaAlignFeature to dna_align_feature
    my $table = join( "_", map lc, ( $class =~ /([A-Z][a-z]+)/g ) );
    $feat_table_map{$class} = $table;
  }
  $feat_table_map{DensityFeature}='density_type'; # analysis_id in diff table
  my $feat_table = $feat_table_map{$feat_class} || 
      ( warning( "No feature type corresponding to $feat_class" ) &&
        return [] );

  my $sql_t = qq|
SELECT DISTINCT analysis_id FROM %s |;
  
  my $sql = sprintf( $sql_t, $feat_table );
  my $sth = $self->prepare( $sql );
  my $rv  = $sth->execute();
  my $res = $sth->fetchall_arrayref;
  my @analyses;
  foreach my $r( @{$res} ){
    my $analysis = $self->fetch_by_dbID($r->[0]) 
        || throw( "analysis_id $r->[0] from $feat_table table "
                  . "is not in the analysis table!" );
    push @analyses, $analysis;
  }
  return [@analyses];
}


=head2 feature_classes

  Arg [1]    : NONE
  Example    : my @fclasses = $analysis_adaptor->feature_classes;
  Description: Returns a list of the different classes of Ensembl feature 
               object that have an analysis
  Returntype : List of feature classes
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub feature_classes{
  deprecate("Deprecated. Hard-coded logic is not supported");
  # Can't think of a way to do this programatically, so hard-coded
  return qw(
            DensityFeature
            DnaAlignFeature
            Gene
            MarkerFeature
            PredictionTranscript
            ProteinAlignFeature
            ProteinFeature
            QtlFeature
            RepeatFeature
            SimpleFeature
            );
}

=head2 fetch_by_dbID

  Arg [1]    : int $internal_analysis_id - the database id of the analysis 
               record to retrieve
  Example    : my $analysis = $analysis_adaptor->fetch_by_dbID(1);
  Description: Retrieves an Analysis object from the database via its internal
               id.
  Returntype : Bio::EnsEMBL::Analysis
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub fetch_by_dbID {
  my $self = shift;
  my $id = shift;

  if( defined $self->{_cache}->{$id} ) {
    return $self->{_cache}->{$id};
  }

  my $query = q{
    SELECT analysis.analysis_id, logic_name,
           program, program_version, program_file,
           db, db_version, db_file,
           module, module_version,
           gff_source, gff_feature,
           created, parameters, description, display_label, displayable, web_data
    FROM   analysis
    LEFT JOIN analysis_description
    ON analysis.analysis_id = analysis_description.analysis_id
    WHERE  analysis.analysis_id = ? };

  my $sth = $self->prepare($query);
  $sth->bind_param(1,$id,SQL_INTEGER);
  $sth->execute();
  my $rowHashRef = $sth->fetchrow_hashref;
  if( ! defined $rowHashRef ) {
    return undef;
  }

  my $anal = $self->_objFromHashref( $rowHashRef );
  $self->{_cache}->{$anal->dbID} = $anal;
  $self->{_logic_name_cache}->{lc($anal->logic_name())} = $anal;
  return $anal;
}


=head2 fetch_by_logic_name

  Arg [1]    : string $logic_name the logic name of the analysis to retrieve
  Example    : my $analysis = $a_adaptor->fetch_by_logic_name('Eponine');
  Description: Retrieves an analysis object from the database using its unique
               logic name.
  Returntype : Bio::EnsEMBL::Analysis
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub fetch_by_logic_name {
  my ( $self, $logic_name ) = @_;

  my $analysis;
  my $rowHash;

  # Check the cache for the logic name
  if ( defined( $self->{_logic_name_cache}{ lc($logic_name) } ) ) {
    return $self->{_logic_name_cache}{ lc($logic_name) };
  }

  my $sth = $self->prepare(
    qq(
SELECT  analysis.analysis_id,
        logic_name,
        program,
        program_version,
        program_file,
        db,
        db_version,
        db_file,
        module,
        module_version,
        gff_source,
        gff_feature,
        created,
        parameters,
        description,
        display_label,
        displayable,
        web_data
FROM    analysis
  LEFT JOIN analysis_description
    ON  ( analysis.analysis_id = analysis_description.analysis_id )
WHERE  LOWER(logic_name) = ?)
  );

  $sth->bind_param( 1, lc($logic_name), SQL_VARCHAR );
  $sth->execute();
  my $rowHashRef = $sth->fetchrow_hashref();

  if ( !defined($rowHashRef) ) { return undef }

  $analysis = $self->_objFromHashref($rowHashRef);

  # place the analysis in the caches, cross referenced by dbID and
  # logic_name
  $self->{_cache}->{ $analysis->dbID() } = $analysis;
  $self->{_logic_name_cache}->{ lc($logic_name) } = $analysis;

  return $analysis;
} ## end sub fetch_by_logic_name


=head2 store

  Arg [1]    : Bio:EnsEMBL::Analysis $analysis
  Example    : $analysis_adaptor->store($analysis);
  Description: Stores $analysis in db.  If the analysis is already stored in
               the database its dbID and adaptor are updated, but the analysis
               is not stored a second time.
               Sets created date if not already set. Sets dbID and adaptor
               inside $analysis. Returns dbID.
  Returntype : int - dbID of stored analysis
  Exceptions : throw on incorrect argument
               throw if analysis argument does not have a logic name
  Caller     : general
  Status     : Stable

=cut

sub store {
  my $self = shift;
  my $analysis = shift;

  if(!ref($analysis) || !$analysis->isa('Bio::EnsEMBL::Analysis')) {
    throw("Bio::EnsEMBL::Analysis argument expected.");
  }

  if($analysis->is_stored($self->db())) {
    return $analysis->dbID();
  }

  if(!$analysis->logic_name()) {
    throw("Analysis cannot be stored without a valid logic_name");
  }
    
  my $insertion_method = (lc($self->dbc->driver) eq 'sqlite') ? 'INSERT OR IGNORE' : 'INSERT IGNORE';

  my $rows_inserted = 0;
  my $sth;

  if ( $analysis->created() ) {

    # We use insert IGNORE so that this method can be used in a
    # multi-process environment.  If another process has already written
    # this record then there will not be a problem.

    $sth = $self->prepare(
      qq{
          $insertion_method INTO analysis
              (created, logic_name, db, db_version, db_file, program, program_version, program_file, parameters, module, module_version, gff_source, gff_feature)
          VALUES  (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
      }
    );
    $sth->bind_param( 1,  $analysis->created(),          SQL_DATETIME );
    $sth->bind_param( 2,  lc( $analysis->logic_name() ), SQL_VARCHAR );
    $sth->bind_param( 3,  $analysis->db(),               SQL_VARCHAR );
    $sth->bind_param( 4,  $analysis->db_version(),       SQL_VARCHAR );
    $sth->bind_param( 5,  $analysis->db_file(),          SQL_VARCHAR );
    $sth->bind_param( 6,  $analysis->program(),          SQL_VARCHAR );
    $sth->bind_param( 7,  $analysis->program_version(),  SQL_VARCHAR );
    $sth->bind_param( 8,  $analysis->program_file(),     SQL_VARCHAR );
    $sth->bind_param( 9,  $analysis->parameters(),       SQL_VARCHAR );
    $sth->bind_param( 10, $analysis->module(),           SQL_VARCHAR );
    $sth->bind_param( 11, $analysis->module_version(),   SQL_VARCHAR );
    $sth->bind_param( 12, $analysis->gff_source(),       SQL_VARCHAR );
    $sth->bind_param( 13, $analysis->gff_feature(),      SQL_VARCHAR );

    $rows_inserted = $sth->execute();

  } else {
    $sth = $self->prepare(
      qq{
          $insertion_method INTO analysis
              (created, logic_name, db, db_version, db_file, program, program_version, program_file, parameters, module, module_version, gff_source, gff_feature)
          VALUES  (CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
      }
    );

    $sth->bind_param( 1,  $analysis->logic_name,      SQL_VARCHAR );
    $sth->bind_param( 2,  $analysis->db,              SQL_VARCHAR );
    $sth->bind_param( 3,  $analysis->db_version,      SQL_VARCHAR );
    $sth->bind_param( 4,  $analysis->db_file,         SQL_VARCHAR );
    $sth->bind_param( 5,  $analysis->program,         SQL_VARCHAR );
    $sth->bind_param( 6,  $analysis->program_version, SQL_VARCHAR );
    $sth->bind_param( 7,  $analysis->program_file,    SQL_VARCHAR );
    $sth->bind_param( 8,  $analysis->parameters,      SQL_VARCHAR );
    $sth->bind_param( 9,  $analysis->module,          SQL_VARCHAR );
    $sth->bind_param( 10, $analysis->module_version,  SQL_VARCHAR );
    $sth->bind_param( 11, $analysis->gff_source,      SQL_VARCHAR );
    $sth->bind_param( 12, $analysis->gff_feature,     SQL_VARCHAR );

    $rows_inserted = $sth->execute();

  } ## end else [ if ( $analysis->created...)]

  my $dbID;
  # If we need to fetch the timestamp, or the insert failed due to
  # existance of an existing entry, we need to retrieve the entry from
  # the database.  Note: $sth->execute() may return 0E0 on error which
  # is zero, but true which is why the $rows_inserted clause was added.
  if ( !$analysis->created() || !$rows_inserted || $rows_inserted == 0 )
  {
    my $new_analysis =
      $self->fetch_by_logic_name( $analysis->logic_name );

    if ( !$new_analysis ) {
      throw("Could not retrieve just stored analysis from database.\n"
          . "Possibly incorrect db permissions or missing analysis table\n"
      );
    }

    $dbID = $new_analysis->dbID();
    $analysis->created( $new_analysis->created() );
  }
  
  $dbID ||= $sth->{'mysql_insertid'};
  $sth->finish();

  # store description and display_label
  if( defined( $analysis->description() ) || defined( $analysis->display_label() )|| defined( $analysis->web_data() )) {
      $sth = $self->prepare( "INSERT IGNORE INTO analysis_description (analysis_id, display_label, description, displayable, web_data) VALUES (?,?,?,?, ?)");

      $sth->bind_param(1,$dbID,SQL_INTEGER);
      $sth->bind_param(2,$analysis->display_label(),SQL_VARCHAR);
      $sth->bind_param(3,$analysis->description,SQL_LONGVARCHAR);
      $sth->bind_param(4,$analysis->displayable,SQL_TINYINT);
      #$sth->bind_param(5,$analysis->web_data(),SQL_LONGVARCHAR);
      my $web_data;
      $web_data = $self->dump_data($analysis->web_data()) if ($analysis->web_data());
      $sth->bind_param(5,$web_data,SQL_LONGVARCHAR);
      $sth->execute();

      $sth->finish();
  }
  


  $self->{_cache}->{$dbID} = $analysis;
  $self->{_logic_name_cache}{lc($analysis->logic_name)} = $analysis;

  $analysis->adaptor( $self );
  $analysis->dbID( $dbID );

  return $dbID;
}



=head2 update

  Arg [1]    : Bio::EnsEMBL::Analysis $anal
  Example    : $adaptor->update($anal)
  Description: Updates this analysis in the database
  Returntype : int 1 if update is performed, undef if it is not
  Exceptions : throw if arg is not an analysis object
  Caller     : ?
  Status     : Stable

=cut

sub update {
  my $self = shift;
  my $a    = shift;

  if (!ref($a) || !$a->isa('Bio::EnsEMBL::Analysis')) {
    throw("Expected Bio::EnsEMBL::Analysis argument.");
  }

  if(!$a->is_stored($self->db())) {
    return undef;
  }

  my $sth = $self->prepare
    ("UPDATE analysis " .
     "SET created = ?, logic_name = ?, db = ?, db_version = ?, db_file = ?, ".
     "    program = ?, program_version = ?, program_file = ?,  ".
     "    parameters = ?, module = ?, module_version = ?, ".
     "    gff_source = ?, gff_feature = ? " .
     "WHERE analysis_id = ?");



  $sth->bind_param(1,$a->created,SQL_DATETIME);
  $sth->bind_param(2,$a->logic_name,SQL_VARCHAR);
  $sth->bind_param(3,$a->db,SQL_VARCHAR);
  $sth->bind_param(4,$a->db_version,SQL_VARCHAR);
  $sth->bind_param(5,$a->db_file,SQL_VARCHAR);
  $sth->bind_param(6,$a->program,SQL_VARCHAR);
  $sth->bind_param(7,$a->program_version,SQL_VARCHAR);
  $sth->bind_param(8,$a->program_file,SQL_VARCHAR);
  $sth->bind_param(9,$a->parameters,SQL_VARCHAR);
  $sth->bind_param(10,$a->module,SQL_VARCHAR);
  $sth->bind_param(11,$a->module_version,SQL_VARCHAR);
  $sth->bind_param(12,$a->gff_source,SQL_VARCHAR);
  $sth->bind_param(13,$a->gff_feature,SQL_VARCHAR);
  $sth->bind_param(14,$a->dbID,SQL_INTEGER);

  $sth->execute();

  $sth->finish();

  # also update description & display label - may need to create these if
  # not already there
  $sth = $self->prepare("SELECT description FROM analysis_description WHERE analysis_id= ?");
  $sth->execute($a->dbID);
  my $web_data; #this is an anonymous reference to a hash, will have to be dumped into string before writing to db
  if ($sth->fetchrow_hashref) { # update if exists
      $web_data = $self->dump_data($a->web_data()) if ($a->web_data());
      $sth = $self->prepare
      ("UPDATE analysis_description SET description = ?, display_label = ?, displayable = ?, web_data = ? WHERE analysis_id = ?");
      $sth->bind_param(1,$a->description,SQL_LONGVARCHAR);     
      $sth->bind_param(2,$a->display_label(),SQL_VARCHAR);
      $sth->bind_param(3,$a->displayable,SQL_TINYINT);
      #      print "after $web_data\n";
      $sth->bind_param(4,$web_data,SQL_LONGVARCHAR);
      $sth->bind_param(5,$a->dbID,SQL_INTEGER);
      $sth->execute();

  } else { # create new entry

    if( $a->description() || $a->display_label() || $a->web_data) {
	$web_data = $self->dump_data($a->web_data()) if ($a->web_data());
      #my $web_data = $self->dump_data($a->web_data());
      $sth = $self->prepare( "INSERT IGNORE INTO analysis_description (analysis_id, display_label, description, displayable, web_data) VALUES (?,?,?,?,?)");
	$sth->bind_param(1,$a->dbID,SQL_INTEGER);	
	$sth->bind_param(2,$a->display_label(),SQL_VARCHAR);
	$sth->bind_param(3,$a->description,SQL_LONGVARCHAR);     
	$sth->bind_param(4,$a->displayable,SQL_TINYINT);
	#my $web_data = $self->dump_data($a->web_data());
	$sth->bind_param(5,$web_data,SQL_LONGVARCHAR);
	$sth->execute();

    }

  }


    $sth->finish();

  # the logic_name cache needs to be re-updated now, since we may have just
  # changed the logic_name
  $self->fetch_all();

  return 1;
}



=head2 remove

  Arg [1]    : Bio::EnsEMBL::Analysis $anal
  Example    : $adaptor->remove($anal)
  Description: Removes this analysis from the database.  This is not really
               safe to execute in a multi process environment, so programs
               should not remove analysis while out on the farm.
  Returntype : none
  Exceptions : thrown if $anal arg is not an analysis object
  Caller     : ?
  Status     : Stable

=cut

sub remove {
  my ($self, $analysis) = @_;

  if (!defined $analysis || !ref $analysis) {
    throw("called remove on AnalysisAdaptor with a [$analysis]");
  }

  if(!$analysis->is_stored($self->db())) {
    return undef;
  }

  my $sth = $self->prepare("DELETE FROM analysis WHERE analysis_id = ?");
  $sth->bind_param(1,$analysis->dbID,SQL_INTEGER);
  $sth->execute();

  $sth = $self->prepare("DELETE FROM analysis_description WHERE analysis_id = ?");
  $sth->execute($analysis->dbID());

  # remove this analysis from the cache
  delete $self->{'_cache'}->{$analysis->dbID()};
  delete $self->{'_logic_name_cache'}->{lc($analysis->logic_name)};


  # unset the adaptor and dbID
  $analysis->dbID(undef);
  $analysis->adaptor(undef);

  return;
}



=head2 exists

  Arg [1]    : Bio::EnsEMBL::Analysis $anal
  Example    : if($adaptor->exists($anal)) #do something
  Description: Tests whether this Analysis already exists in the database
               by checking first if the adaptor and dbID are set and
               secondly by whether it is in this adaptors internal cache.
               Note that this will not actually check the database and will
               not find and analysis which were recently added by other
               processes.  You are better off simply trying to store an
               analysis which will reliably ensure that it is not stored twice
               in the database.
  Returntype : int dbID if analysis is found, otherwise returns undef
  Exceptions : thrown if $anal arg is not an analysis object
  Caller     : store
  Status     : Stable

=cut

sub exists {
  my ($self,$anal) = @_;

  if(!ref($anal) || !$anal->isa("Bio::EnsEMBL::Analysis")) {
    throw("Object is not a Bio::EnsEMBL::Analysis");
  }

  #if this analysis is stored in this db already return its dbID
  if($anal->is_stored($self->db())) {
    return $anal->dbID();
  }

  #this analysis object is not stored but one exactly like it may have been
  foreach my $cacheId (keys %{$self->{_cache}}) {
    if ($self->{_cache}->{$cacheId}->compare($anal) >= 0) {
      # $anal->dbID( $cacheId );
      # $anal->adaptor( $self );
      return $cacheId;
    }
  }

  #no analysis like this one exists in the database
  return undef;
}


=head2 _objFromHashref

  Arg [1]    : hashref $rowHash
  Description: Private helper function generates an Analysis object from a 
               mysql row hash reference.
  Returntype : Bio::EnsEMBL::Analysis
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::AnalsisAdaptor::fetch_* methods
  Status     : Stable

=cut

sub _objFromHashref {
  my $self = shift;
  my $h = shift;

  my $web_data = $h->{web_data} ? $self->get_dumped_data($h->{web_data}) : '';

  return Bio::EnsEMBL::Analysis->new_fast({
    dbID             => $h->{analysis_id},
    adaptor          => $self,
    _db               => $h->{db},
    _db_file          => $h->{db_file},
    _db_version       => $h->{db_version},
    _program          => $h->{program},
    _program_version  => $h->{program_version},
    _program_file     => $h->{program_file},
    _gff_source       => $h->{gff_source},
    _gff_feature      => $h->{gff_feature},
    _module           => $h->{module},
    _module_version   => $h->{module_version},
    _parameters       => $h->{parameters},
    _created          => $h->{created},
    _logic_name       => $h->{logic_name},
    _description      => $h->{description},
    _display_label    => $h->{display_label},
    _displayable      => $h->{displayable},
    _web_data         => $web_data,
  });
}



1;