Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,1513 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::ArchiveStableIdAdaptor + +=head1 SYNOPSIS + + my $registry = "Bio::EnsEMBL::Registry"; + + my $archiveStableIdAdaptor = + $registry->get_adaptor( 'Human', 'Core', 'ArchiveStableId' ); + + my $stable_id = 'ENSG00000068990'; + + my $arch_id = $archiveStableIdAdaptor->fetch_by_stable_id($stable_id); + + print("Latest incarnation of this stable ID:\n"); + printf( " Stable ID: %s.%d\n", + $arch_id->stable_id(), $arch_id->version() ); + print(" Release: " + . $arch_id->release() . " (" + . $arch_id->assembly() . ", " + . $arch_id->db_name() + . ")\n" ); + + print "\nStable ID history:\n\n"; + + my $history = + $archiveStableIdAdaptor->fetch_history_tree_by_stable_id( + $stable_id); + + foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) { + printf( " Stable ID: %s.%d\n", $a->stable_id(), $a->version() ); + print(" Release: " + . $a->release() . " (" + . $a->assembly() . ", " + . $a->db_name() + . ")\n\n" ); + } + +=head1 DESCRIPTION + +ArchiveStableIdAdaptor does all SQL to create ArchiveStableIds and works +of + + stable_id_event + mapping_session + peptite_archive + gene_archive + +tables inside the core database. + +This whole module has a status of At Risk as it is under development. + +=head1 METHODS + + fetch_by_stable_id + fetch_by_stable_id_version + fetch_by_stable_id_dbname + fetch_all_by_archive_id + fetch_predecessors_by_archive_id + fetch_successors_by_archive_id + fetch_history_tree_by_stable_id + add_all_current_to_history + list_dbnames + previous_dbname + next_dbname + get_peptide + get_current_release + get_current_assembly + +=head1 RELATED MODULES + + Bio::EnsEMBL::ArchiveStableId + Bio::EnsEMBL::StableIdEvent + Bio::EnsEMBL::StableIdHistoryTree + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor; + +use strict; +use warnings; +no warnings qw(uninitialized); + +use Bio::EnsEMBL::DBSQL::BaseAdaptor; +our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); + +use Bio::EnsEMBL::ArchiveStableId; +use Bio::EnsEMBL::StableIdEvent; +use Bio::EnsEMBL::StableIdHistoryTree; +use Bio::EnsEMBL::Utils::Exception qw(deprecate warning throw); + +use constant MAX_ROWS => 30; +use constant NUM_HIGH_SCORERS => 20; + + +=head2 fetch_by_stable_id + + Arg [1] : string $stable_id + Arg [2] : (optional) string $type + Example : none + Description : Retrives an ArchiveStableId that is the latest incarnation of + given stable_id. + Returntype : Bio::EnsEMBL::ArchiveStableId or undef if not in database + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub fetch_by_stable_id { + my $self = shift; + my $stable_id = shift; + + my $arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $stable_id, + -adaptor => $self + ); + + @_ ? $arch_id->type(shift) : $self->_resolve_type($arch_id); + + if ($self->lookup_current($arch_id)) { + + # stable ID is in current release + $arch_id->version($arch_id->current_version); + $arch_id->db_name($self->dbc->dbname); + $arch_id->release($self->get_current_release); + $arch_id->assembly($self->get_current_assembly); + + } else { + + # look for latest version of this stable id + my $extra_sql = defined($arch_id->{'type'}) ? + " AND sie.type = '@{[lc($arch_id->{'type'})]}'" : ''; + + my $r = $self->_fetch_archive_id($stable_id, $extra_sql, $extra_sql); + + if ($r->{'new_stable_id'} and $r->{'new_stable_id'} eq $stable_id) { + # latest event is a self event, use new_* data + $arch_id->version($r->{'new_version'}); + $arch_id->release($r->{'new_release'}); + $arch_id->assembly($r->{'new_assembly'}); + $arch_id->db_name($r->{'new_db_name'}); + } else { + # latest event is a deletion event (or mapping to other ID; this clause + # is only used to cope with buggy data where deletion events are + # missing), use old_* data + $arch_id->version($r->{'old_version'}); + $arch_id->release($r->{'old_release'}); + $arch_id->assembly($r->{'old_assembly'}); + $arch_id->db_name($r->{'old_db_name'}); + } + + $arch_id->type(ucfirst(lc($r->{'type'}))); + } + + if (! defined $arch_id->db_name) { + # couldn't find stable ID in archive or current db + return undef; + } + + $arch_id->is_latest(1); + + return $arch_id; +} + + +=head2 fetch_by_stable_id_version + + Arg [1] : string $stable_id + Arg [2] : int $version + Example : none + Description : Retrieve an ArchiveStableId with given version and stable ID. + Returntype : Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub fetch_by_stable_id_version { + my $self = shift; + my $stable_id = shift; + my $version = shift; + + my $arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $stable_id, + -version => $version, + -adaptor => $self + ); + + @_ ? $arch_id->type(shift) : $self->_resolve_type($arch_id); + + if ($self->lookup_current($arch_id) && $arch_id->is_current) { + + # this version is the current one + $arch_id->db_name($self->dbc->dbname); + $arch_id->release($self->get_current_release); + $arch_id->assembly($self->get_current_assembly); + + } else { + + # find latest release this stable ID version is found in archive + my $extra_sql1 = qq(AND sie.old_version = "$version"); + my $extra_sql2 = qq(AND sie.new_version = "$version"); + my $r = $self->_fetch_archive_id($stable_id, $extra_sql1, $extra_sql2); + + if ($r->{'new_stable_id'} and $r->{'new_stable_id'} eq $stable_id + and $r->{'new_version'} == $version) { + # latest event is a self event, use new_* data + $arch_id->release($r->{'new_release'}); + $arch_id->assembly($r->{'new_assembly'}); + $arch_id->db_name($r->{'new_db_name'}); + } else { + # latest event is a deletion event (or mapping to other ID; this clause + # is only used to cope with buggy data where deletion events are + # missing), use old_* data + $arch_id->release($r->{'old_release'}); + $arch_id->assembly($r->{'old_assembly'}); + $arch_id->db_name($r->{'old_db_name'}); + } + + $arch_id->type(ucfirst(lc($r->{'type'}))); + } + + if (! defined $arch_id->db_name) { + # couldn't find stable ID version in archive or current release + return undef; + } + + return $arch_id; +} + + +=head2 fetch_by_stable_id_dbname + + Arg [1] : string $stable_id + Arg [2] : string $db_name + Example : none + Description : Create an ArchiveStableId from given arguments. + Returntype : Bio::EnsEMBL::ArchiveStableId or undef if not in database + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub fetch_by_stable_id_dbname { + my $self = shift; + my $stable_id = shift; + my $db_name = shift; + + my $arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $stable_id, + -db_name => $db_name, + -adaptor => $self + ); + + @_ ? $arch_id->type(shift) : $self->_resolve_type($arch_id); + + if ($self->lookup_current($arch_id) and $db_name eq $self->dbc->dbname) { + + # this version is the current one + $arch_id->version($arch_id->current_version); + $arch_id->release($self->get_current_release); + $arch_id->assembly($self->get_current_assembly); + + } else { + + # find version for this dbname in the stable ID archive + my $extra_sql = defined($arch_id->{'type'}) ? + " AND sie.type = '@{[lc($arch_id->{'type'})]}'" : ''; + my $extra_sql1 = $extra_sql . qq( AND ms.old_db_name = "$db_name"); + my $extra_sql2 = $extra_sql . qq( AND ms.new_db_name = "$db_name"); + my $r = $self->_fetch_archive_id($stable_id, $extra_sql1, $extra_sql2); + + if ($r->{'new_stable_id'} and $r->{'new_stable_id'} eq $stable_id + and $r->{'new_db_name'} eq $db_name) { + + # latest event is a self event, use new_* data + $arch_id->release($r->{'new_release'}); + $arch_id->assembly($r->{'new_assembly'}); + $arch_id->version($r->{'new_version'}); + } else { + # latest event is a deletion event (or mapping to other ID; this clause + # is only used to cope with buggy data where deletion events are + # missing), use old_* data + $arch_id->release($r->{'old_release'}); + $arch_id->assembly($r->{'old_assembly'}); + $arch_id->version($r->{'old_version'}); + } + + $arch_id->type(ucfirst(lc($r->{'type'}))); + } + + if (! defined $arch_id->version ) { + # couldn't find stable ID version in archive or current release + return undef; + } + + return $arch_id; +} + +# +# Helper method to do fetch ArchiveStableId from db. +# Used by fetch_by_stable_id(), fetch_by_stable_id_version() and +# fetch_by_stable_id_dbname(). +# Returns hashref as returned by DBI::sth::fetchrow_hashref +# +sub _fetch_archive_id { + my $self = shift; + my $stable_id = shift; + my $extra_sql1 = shift; + my $extra_sql2 = shift; + + # using a UNION is much faster in this query than somthing like + # "... AND (sie.old_stable_id = ? OR sie.new_stable_id = ?)" + my $sql = qq( + (SELECT * FROM stable_id_event sie, mapping_session ms + WHERE sie.mapping_session_id = ms.mapping_session_id + AND sie.old_stable_id = ? + $extra_sql1) + UNION + (SELECT * FROM stable_id_event sie, mapping_session ms + WHERE sie.mapping_session_id = ms.mapping_session_id + AND sie.new_stable_id = ? + $extra_sql2) + ORDER BY created DESC + LIMIT 1 + ); + + my $sth = $self->prepare($sql); + $sth->execute($stable_id,$stable_id); + my $r = $sth->fetchrow_hashref; + $sth->finish; + + return $r; +} + + +=head2 fetch_all_by_archive_id + + Arg [1] : Bio::EnsEMBL::ArchiveStableId $archive_id + Arg [2] : String $return_type - type of ArchiveStableId to fetch + Example : my $arch_id = $arch_adaptor->fetch_by_stable_id('ENSG0001'); + my @archived_transcripts = + $arch_adaptor->fetch_all_by_archive_id($arch_id, 'Transcript'); + Description : Given a ArchiveStableId it retrieves associated ArchiveStableIds + of specified type (e.g. retrieve transcripts for genes or vice + versa). + + See also fetch_associated_archived() for a different approach to + retrieve this data. + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_all_gene_archive_ids, + get_all_transcript_archive_ids, get_all_translation_archive_ids + Status : At Risk + : under development + +=cut + +sub fetch_all_by_archive_id { + my $self = shift; + my $archive_id = shift; + my $return_type = shift; + + my @result = (); + my $lc_self_type = lc($archive_id->type); + my $lc_return_type = lc($return_type); + + my $sql = qq( + SELECT + ga.${lc_return_type}_stable_id, + ga.${lc_return_type}_version, + m.old_db_name, + m.old_release, + m.old_assembly + FROM gene_archive ga, mapping_session m + WHERE ga.${lc_self_type}_stable_id = ? + AND ga.${lc_self_type}_version = ? + AND ga.mapping_session_id = m.mapping_session_id + ); + + my $sth = $self->prepare($sql); + $sth->bind_param(1, $archive_id->stable_id, SQL_VARCHAR); + $sth->bind_param(2, $archive_id->version, SQL_SMALLINT); + $sth->execute; + + my ($stable_id, $version, $db_name, $release, $assembly); + $sth->bind_columns(\$stable_id, \$version, \$db_name, \$release, \$assembly); + + while ($sth->fetch) { + my $new_arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $stable_id, + -version => $version, + -db_name => $db_name, + -release => $release, + -assembly => $assembly, + -type => $return_type, + -adaptor => $self + ); + + push( @result, $new_arch_id ); + } + + $sth->finish(); + return \@result; +} + + +=head2 fetch_associated_archived + + Arg[1] : Bio::EnsEMBL::ArchiveStableId $arch_id - + the ArchiveStableId to fetch associated archived IDs for + Example : my ($arch_gene, $arch_tr, $arch_tl, $pep_seq) = + @{ $archive_adaptor->fetch_associated_archived($arch_id) }; + Description : Fetches associated archived stable IDs from the db for a given + ArchiveStableId (version is taken into account). + Return type : Listref of + ArchiveStableId archived gene + ArchiveStableId archived transcript + (optional) ArchiveStableId archived translation + (optional) peptide sequence + Exceptions : thrown on missing or wrong argument + thrown if ArchiveStableID has no type + Caller : Bio::EnsEMBL::ArchiveStableId->get_all_associated_archived() + Status : At Risk + : under development + +=cut + +sub fetch_associated_archived { + my $self = shift; + my $arch_id = shift; + + throw("Need a Bio::EnsEMBL::ArchiveStableId") unless ($arch_id + and ref($arch_id) and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')); + + my $type = $arch_id->type(); + + if ( !defined($type) ) { + throw("Can't deduce ArchiveStableId type."); + } + + $type = lc($type); + + my $sql = qq( + SELECT ga.gene_stable_id, + ga.gene_version, + ga.transcript_stable_id, + ga.transcript_version, + ga.translation_stable_id, + ga.translation_version, + pa.peptide_seq, + ms.old_release, + ms.old_assembly, + ms.old_db_name + FROM (mapping_session ms, gene_archive ga) + LEFT JOIN peptide_archive pa + ON ga.peptide_archive_id = pa.peptide_archive_id + WHERE ga.mapping_session_id = ms.mapping_session_id + AND ga.${type}_stable_id = ? + AND ga.${type}_version = ? + ); + + my $sth = $self->prepare($sql); + $sth->bind_param(1, $arch_id->stable_id, SQL_VARCHAR); + $sth->bind_param(2, $arch_id->version, SQL_SMALLINT); + $sth->execute; + + my @result = (); + + while (my $r = $sth->fetchrow_hashref) { + + my @row = (); + + # create ArchiveStableIds genes, transcripts and translations + push @row, Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $r->{'gene_stable_id'}, + -version => $r->{'gene_version'}, + -db_name => $r->{'old_db_name'}, + -release => $r->{'old_release'}, + -assembly => $r->{'old_assembly'}, + -type => 'Gene', + -adaptor => $self + ); + + push @row, Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $r->{'transcript_stable_id'}, + -version => $r->{'transcript_version'}, + -db_name => $r->{'old_db_name'}, + -release => $r->{'old_release'}, + -assembly => $r->{'old_assembly'}, + -type => 'Transcript', + -adaptor => $self + ); + + if ($r->{'translation_stable_id'}) { + push @row, Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $r->{'translation_stable_id'}, + -version => $r->{'translation_version'}, + -db_name => $r->{'old_db_name'}, + -release => $r->{'old_release'}, + -assembly => $r->{'old_assembly'}, + -type => 'Translation', + -adaptor => $self + ); + + # push peptide sequence onto result list + push @row, $r->{'peptide_seq'}; + } + + push @result, \@row; + } + + return \@result; +} + + +=head2 fetch_predecessors_by_archive_id + + Arg [1] : Bio::EnsEMBL::ArchiveStableId + Example : none + Description : Retrieve a list of ArchiveStableIds that were mapped to the + given one. This method goes back only one level, to retrieve + a full predecessor history use fetch_predecessor_history, or + ideally fetch_history_tree_by_stable_id for the complete + history network. + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_all_predecessors + Status : At Risk + : under development + +=cut + +sub fetch_predecessors_by_archive_id { + my $self = shift; + my $arch_id = shift; + + my @result; + + if( ! ( defined $arch_id->stable_id() && + defined $arch_id->db_name() )) { + throw( "Need db_name for predecessor retrieval" ); + } + + my $sql = qq( + SELECT + sie.old_stable_id, + sie.old_version, + sie.type, + m.old_db_name, + m.old_release, + m.old_assembly + FROM mapping_session m, stable_id_event sie + WHERE sie.mapping_session_id = m.mapping_session_id + AND sie.new_stable_id = ? + AND m.new_db_name = ? + ); + + my $sth = $self->prepare($sql); + $sth->bind_param(1, $arch_id->stable_id, SQL_VARCHAR); + $sth->bind_param(2, $arch_id->db_name, SQL_VARCHAR); + $sth->execute(); + + my ($old_stable_id, $old_version, $type, $old_db_name, $old_release, $old_assembly); + $sth->bind_columns(\$old_stable_id, \$old_version, \$type, \$old_db_name, \$old_release, \$old_assembly); + + while ($sth->fetch) { + if (defined $old_stable_id) { + my $old_arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $old_stable_id, + -version => $old_version, + -db_name => $old_db_name, + -release => $old_release, + -assembly => $old_assembly, + -type => $type, + -adaptor => $self + ); + push( @result, $old_arch_id ); + } + } + $sth->finish(); + + # if you didn't find any predecessors, there might be a gap in the + # mapping_session history (i.e. databases in mapping_session don't chain). To + # bridge the gap, look in the previous mapping_session for identical + # stable_id.version + unless (@result) { + + $sql = qq( + SELECT + sie.new_stable_id, + sie.new_version, + sie.type, + m.new_db_name, + m.new_release, + m.new_assembly + FROM mapping_session m, stable_id_event sie + WHERE sie.mapping_session_id = m.mapping_session_id + AND sie.new_stable_id = ? + AND m.new_db_name = ? + ); + + $sth = $self->prepare($sql); + + my $curr_dbname = $arch_id->db_name; + + PREV: + while (my $prev_dbname = $self->previous_dbname($curr_dbname)) { + + $sth->bind_param(1,$arch_id->stable_id, SQL_VARCHAR); + $sth->bind_param(2,$prev_dbname, SQL_VARCHAR); + $sth->execute(); + + $sth->bind_columns(\$old_stable_id, \$old_version, \$type, \$old_db_name, \$old_release, \$old_assembly); + + while( $sth->fetch() ) { + if (defined $old_stable_id) { + my $old_arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $old_stable_id, + -version => $old_version, + -db_name => $old_db_name, + -release => $old_release, + -assembly => $old_assembly, + -type => $type, + -adaptor => $self + ); + push( @result, $old_arch_id ); + + last PREV; + } + } + + $curr_dbname = $prev_dbname; + + } + + $sth->finish(); + } + + return \@result; +} + + +=head2 fetch_successors_by_archive_id + + Arg [1] : Bio::EnsEMBL::ArchiveStableId + Example : none + Description : Retrieve a list of ArchiveStableIds that the given one was + mapped to. This method goes forward only one level, to retrieve + a full successor history use fetch_successor_history, or + ideally fetch_history_tree_by_stable_id for the complete + history network. + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_all_successors + Status : At Risk + : under development + +=cut + +sub fetch_successors_by_archive_id { + my $self = shift; + my $arch_id = shift; + my @result; + + + if( ! ( defined $arch_id->stable_id() && + defined $arch_id->db_name() )) { + throw( "Need db_name for successor retrieval" ); + } + + my $sql = qq( + SELECT + sie.new_stable_id, + sie.new_version, + sie.type, + m.new_db_name, + m.new_release, + m.new_assembly + FROM mapping_session m, stable_id_event sie + WHERE sie.mapping_session_id = m.mapping_session_id + AND sie.old_stable_id = ? + AND m.old_db_name = ? + ); + + my $sth = $self->prepare( $sql ); + $sth->bind_param(1,$arch_id->stable_id,SQL_VARCHAR); + $sth->bind_param(2,$arch_id->db_name,SQL_VARCHAR); + $sth->execute(); + + my ($new_stable_id, $new_version, $type, $new_db_name, $new_release, $new_assembly); + $sth->bind_columns(\$new_stable_id, \$new_version, \$type, \$new_db_name, \$new_release, \$new_assembly); + + while( $sth->fetch() ) { + if( defined $new_stable_id ) { + my $new_arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $new_stable_id, + -version => $new_version, + -db_name => $new_db_name, + -release => $new_release, + -assembly => $new_assembly, + -type => $type, + -adaptor => $self + ); + + push( @result, $new_arch_id ); + } + } + $sth->finish(); + + # if you didn't find any successors, there might be a gap in the + # mapping_session history (i.e. databases in mapping_session don't chain). To + # bridge the gap, look in the next mapping_session for identical + # stable_id.version + unless (@result) { + + $sql = qq( + SELECT + sie.old_stable_id, + sie.old_version, + sie.type, + m.old_db_name, + m.old_release, + m.old_assembly + FROM mapping_session m, stable_id_event sie + WHERE sie.mapping_session_id = m.mapping_session_id + AND sie.old_stable_id = ? + AND m.old_db_name = ? + ); + + $sth = $self->prepare($sql); + + my $curr_dbname = $arch_id->db_name; + + NEXTDB: + while (my $next_dbname = $self->next_dbname($curr_dbname)) { + + $sth->bind_param(1, $arch_id->stable_id, SQL_VARCHAR); + $sth->bind_param(2, $next_dbname, SQL_VARCHAR); + $sth->execute(); + + $sth->bind_columns(\$new_stable_id, \$new_version, \$type, \$new_db_name, \$new_release, \$new_assembly); + + while( $sth->fetch() ) { + if (defined $new_stable_id) { + my $new_arch_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $new_stable_id, + -version => $new_version, + -db_name => $new_db_name, + -release => $new_release, + -assembly => $new_assembly, + -type => $type, + -adaptor => $self + ); + + push( @result, $new_arch_id ); + + last NEXTDB; + } + } + + $curr_dbname = $next_dbname; + + } + + $sth->finish(); + } + + return \@result; +} + + + +=head2 fetch_history_tree_by_stable_id + + Arg[1] : String $stable_id - the stable ID to fetch the history tree for + Arg[2] : (optional) Int $num_high_scorers + number of mappings per stable ID allowed when filtering + Arg[3] : (optional) Int $max_rows + maximum number of stable IDs in history tree (used for + filtering) + Example : my $history = $archive_adaptor->fetch_history_tree_by_stable_id( + 'ENSG00023747897'); + Description : Returns the history tree for a given stable ID. This will + include a network of all stable IDs it is related to. The + method will try to return a minimal (sparse) set of nodes + (ArchiveStableIds) and links (StableIdEvents) by removing any + redundant entries and consolidating mapping events so that only + changes are recorded. + Return type : Bio::EnsEMBL::StableIdHistoryTree + Exceptions : thrown on missing argument + Caller : Bio::EnsEMBL::ArchiveStableId::get_history_tree, general + Status : At Risk + : under development + +=cut + +sub fetch_history_tree_by_stable_id { + my ($self, $stable_id, $num_high_scorers, $max_rows) = @_; + + throw("Expecting a stable ID argument.") unless $stable_id; + + $num_high_scorers ||= NUM_HIGH_SCORERS; + $max_rows ||= MAX_ROWS; + + # using a UNION is much faster in this query than somthing like + # "... AND (sie.old_stable_id = ?) OR (sie.new_stable_id = ?)" + my $sql = qq( + SELECT sie.old_stable_id, sie.old_version, + ms.old_db_name, ms.old_release, ms.old_assembly, + sie.new_stable_id, sie.new_version, + ms.new_db_name, ms.new_release, ms.new_assembly, + sie.type, sie.score + FROM stable_id_event sie, mapping_session ms + WHERE sie.mapping_session_id = ms.mapping_session_id + AND sie.old_stable_id = ? + UNION + SELECT sie.old_stable_id, sie.old_version, + ms.old_db_name, ms.old_release, ms.old_assembly, + sie.new_stable_id, sie.new_version, + ms.new_db_name, ms.new_release, ms.new_assembly, + sie.type, sie.score + FROM stable_id_event sie, mapping_session ms + WHERE sie.mapping_session_id = ms.mapping_session_id + AND sie.new_stable_id = ? + ); + + my $sth = $self->prepare($sql); + + my $history = Bio::EnsEMBL::StableIdHistoryTree->new( + -CURRENT_DBNAME => $self->dbc->dbname, + -CURRENT_RELEASE => $self->get_current_release, + -CURRENT_ASSEMBLY => $self->get_current_assembly, + ); + + # remember stable IDs you need to do and those that are done. Initialise the + # former hash with the focus stable ID + my %do = ($stable_id => 1); + my %done; + + # while we got someting to do + while (my ($id) = keys(%do)) { + + # if we already have more than MAX_ROWS stable IDs in this tree, we can't + # build the full tree. Return undef. + if (scalar(keys(%done)) > $max_rows) { + # warning("Too many related stable IDs (".scalar(keys(%done)).") to draw a history tree."); + $history->is_incomplete(1); + $sth->finish; + last; + } + + # mark this stable ID as done + delete $do{$id}; + $done{$id} = 1; + + # fetch all stable IDs related to this one from the database + $sth->bind_param(1, $id, SQL_VARCHAR); + $sth->bind_param(2, $id, SQL_VARCHAR); + $sth->execute; + + my @events; + + while (my $r = $sth->fetchrow_hashref) { + + # + # create old and new ArchiveStableIds and a StableIdEvent to link them + # add all of these to the history tree + # + my ($old_id, $new_id); + + if ($r->{'old_stable_id'}) { + $old_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $r->{'old_stable_id'}, + -version => $r->{'old_version'}, + -db_name => $r->{'old_db_name'}, + -release => $r->{'old_release'}, + -assembly => $r->{'old_assembly'}, + -type => $r->{'type'}, + -adaptor => $self + ); + } + + if ($r->{'new_stable_id'}) { + $new_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $r->{'new_stable_id'}, + -version => $r->{'new_version'}, + -db_name => $r->{'new_db_name'}, + -release => $r->{'new_release'}, + -assembly => $r->{'new_assembly'}, + -type => $r->{'type'}, + -adaptor => $self + ); + } + + my $event = Bio::EnsEMBL::StableIdEvent->new( + -old_id => $old_id, + -new_id => $new_id, + -score => $r->{'score'} + ); + + push @events, $event; + + } + + # filter out low-scoring events; the number of highest scoring events + # returned is defined by NUM_HIGH_SCORERS + my @others; + + foreach my $event (@events) { + + my $old_id = $event->old_ArchiveStableId; + my $new_id = $event->new_ArchiveStableId; + + # creation, deletion and mapping-to-self events are added to the history + # tree directly + if (!$old_id || !$new_id || ($old_id->stable_id eq $new_id->stable_id)) { + $history->add_StableIdEvents($event); + } else { + push @others, $event; + } + + } + + #if (scalar(@others) > $num_high_scorers) { + # warn "Filtering ".(scalar(@others) - $num_high_scorers). + # " low-scoring events.\n"; + #} + + my $k = 0; + foreach my $event (sort { $b->score <=> $a->score } @others) { + $history->add_StableIdEvents($event); + + # mark stable IDs as todo if appropriate + $do{$event->old_ArchiveStableId->stable_id} = 1 + unless $done{$event->old_ArchiveStableId->stable_id}; + $do{$event->new_ArchiveStableId->stable_id} = 1 + unless $done{$event->new_ArchiveStableId->stable_id}; + + last if (++$k == $num_high_scorers); + } + + } + + $sth->finish; + + # try to consolidate the tree (remove redundant nodes, bridge gaps) + $history->consolidate_tree; + + # now add ArchiveStableIds for current Ids not found in the archive + $self->add_all_current_to_history($history); + + # calculate grid coordinates for the sorted tree; this will also try to + # untangle the tree + $history->calculate_coords; + + return $history; +} + + +=head2 add_all_current_to_history + + Arg[1] : Bio::EnsEMBL::StableIdHistoryTree $history - + the StableIdHistoryTree object to add the current IDs to + Description : This method adds the current versions of all stable IDs found + in a StableIdHistoryTree object to the tree, by creating + appropriate Events for the stable IDs found in the *_stable_id + tables. This is a helper method for + fetch_history_tree_by_stable_id(), see there for more + documentation. + Return type : none (passed-in object is manipulated) + Exceptions : thrown on missing or wrong argument + Caller : internal + Status : At Risk + : under development + +=cut + +sub add_all_current_to_history { + my $self = shift; + my $history = shift; + + unless ($history and $history->isa('Bio::EnsEMBL::StableIdHistoryTree')) { + throw("Need a Bio::EnsEMBL::StableIdHistoryTree."); + } + + my @ids = @{ $history->get_unique_stable_ids }; + my $id_string = join("', '", @ids); + + my $tmp_id = Bio::EnsEMBL::ArchiveStableId->new(-stable_id => $ids[0]); + my $type = lc($self->_resolve_type($tmp_id)); + return unless ($type); + + # get current stable IDs from db + my $sql = qq( + SELECT stable_id, version FROM ${type} + WHERE stable_id IN ('$id_string') + ); + my $sth = $self->prepare($sql); + $sth->execute; + + while (my ($stable_id, $version) = $sth->fetchrow_array) { + + my $new_id = Bio::EnsEMBL::ArchiveStableId->new( + -stable_id => $stable_id, + -version => $version, + -current_version => $version, + -db_name => $self->dbc->dbname, + -release => $self->get_current_release, + -assembly => $self->get_current_assembly, + -type => $type, + -adaptor => $self + ); + + my $event = $history->get_latest_StableIdEvent($new_id); + next unless ($event); + + if ($event->old_ArchiveStableId and + $event->old_ArchiveStableId->stable_id eq $stable_id) { + + # latest event was a self event + # update it with current stable ID and add to tree + $event->new_ArchiveStableId($new_id); + + } else { + + # latest event was a non-self event + # create a new event where the old_id is the new_id from latest + my $new_event = Bio::EnsEMBL::StableIdEvent->new( + -old_id => $event->new_ArchiveStableId, + -new_id => $new_id, + -score => $event->score, + ); + $history->add_StableIdEvents($new_event); + } + + } + + # refresh node cache + $history->flush_ArchiveStableIds; + $history->add_ArchiveStableIds_for_events; +} + + +=head2 fetch_successor_history + + Arg [1] : Bio::EnsEMBL::ArchiveStableId $arch_id + Example : none + Description : Gives back a list of archive stable ids which are successors in + the stable_id_event tree of the given stable_id. Might well be + empty. + + This method isn't deprecated, but in most cases you will rather + want to use fetch_history_tree_by_stable_id(). + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Since every ArchiveStableId knows about it's successors, this is + a linked tree. + Exceptions : none + Caller : webcode for archive + Status : At Risk + : under development + +=cut + +sub fetch_successor_history { + my $self = shift; + my $arch_id = shift; + + my $current_db_name = $self->list_dbnames->[0]; + my $dbname = $arch_id->db_name; + + if ($dbname eq $current_db_name) { + return [$arch_id]; + } + + my $old = []; + my @result = (); + + push @$old, $arch_id; + + while ($dbname ne $current_db_name) { + my $new = []; + while (my $asi = (shift @$old)) { + push @$new, @{ $asi->get_all_successors }; + } + + if (@$new) { + $dbname = $new->[0]->db_name; + } else { + last; + } + + # filter duplicates + my %unique = map { join(":", $_->stable_id, $_->version, $_->release) => + $_ } @$new; + @$new = values %unique; + + @$old = @$new; + push @result, @$new; + } + + return \@result; +} + + +=head2 fetch_predecessor_history + + Arg [1] : Bio::EnsEMBL::ArchiveStableId $arch_id + Example : none + Description : Gives back a list of archive stable ids which are predecessors + in the stable_id_event tree of the given stable_id. Might well + be empty. + + This method isn't deprecated, but in most cases you will rather + want to use fetch_history_tree_by_stable_id(). + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Since every ArchiveStableId knows about it's successors, this is + a linked tree. + Exceptions : none + Caller : webcode for archive + Status : At Risk + : under development + +=cut + +sub fetch_predecessor_history { + my $self = shift; + my $arch_id = shift; + + my $oldest_db_name = $self->list_dbnames->[-1]; + my $dbname = $arch_id->db_name; + + if ($dbname eq $oldest_db_name) { + return [$arch_id]; + } + + my $old = []; + my @result = (); + + push @$old, $arch_id; + + while ($dbname ne $oldest_db_name) { + my $new = []; + while (my $asi = (shift @$old)) { + push @$new, @{ $asi->get_all_predecessors }; + } + + if( @$new ) { + $dbname = $new->[0]->db_name; + } else { + last; + } + + # filter duplicates + my %unique = map { join(":", $_->stable_id, $_->version, $_->release) => + $_ } @$new; + @$new = values %unique; + + @$old = @$new; + push @result, @$new; + } + + return \@result; +} + + +=head2 list_dbnames + + Args : none + Example : none + Description : A list of available database names from the latest (current) to + the oldest (ordered). + Returntype : listref of strings + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub list_dbnames { + my $self = shift; + + if( ! defined $self->{'dbnames'} ) { + + my $sql = qq( + SELECT old_db_name, new_db_name + FROM mapping_session + ORDER BY created DESC + ); + my $sth = $self->prepare( $sql ); + $sth->execute(); + my ( $old_db_name, $new_db_name ); + + my @dbnames = (); + my %seen; + + $sth->bind_columns( \$old_db_name, \$new_db_name ); + + while( $sth->fetch() ) { + # this code now can deal with non-chaining mapping sessions + push(@{ $self->{'dbnames'} }, $new_db_name) unless ($seen{$new_db_name}); + $seen{$new_db_name} = 1; + + push(@{ $self->{'dbnames'} }, $old_db_name) unless ($seen{$old_db_name}); + $seen{$old_db_name} = 1; + } + + $sth->finish(); + + } + + return $self->{'dbnames'}; +} + + +=head2 previous_dbname + + Arg[1] : String $dbname - focus db name + Example : my $prev_db = $self->previous_dbname($curr_db); + Description : Returns the name of the next oldest database which has mapping + session information. + Return type : String (or undef if not available) + Exceptions : none + Caller : general + Status : At Risk + +=cut + +sub previous_dbname { + my $self = shift; + my $dbname = shift; + + my $curr_idx = $self->_dbname_index($dbname); + my @dbnames = @{ $self->list_dbnames }; + + if ($curr_idx == @dbnames) { + # this is the oldest dbname, so no previous one available + return undef; + } else { + return $dbnames[$curr_idx+1]; + } +} + + +=head2 next_dbname + + Arg[1] : String $dbname - focus db name + Example : my $prev_db = $self->next_dbname($curr_db); + Description : Returns the name of the next newest database which has mapping + session information. + Return type : String (or undef if not available) + Exceptions : none + Caller : general + Status : At Risk + +=cut + +sub next_dbname { + my $self = shift; + my $dbname = shift; + + my $curr_idx = $self->_dbname_index($dbname); + my @dbnames = @{ $self->list_dbnames }; + + if ($curr_idx == 0) { + # this is the latest dbname, so no next one available + return undef; + } else { + return $dbnames[$curr_idx-1]; + } +} + + +# +# helper method to return the array index of a database in the ordered list of +# available databases (as returned by list_dbnames() +# +sub _dbname_index { + my $self = shift; + my $dbname = shift; + + my @dbnames = @{ $self->list_dbnames }; + + for (my $i = 0; $i < @dbnames; $i++) { + if ($dbnames[$i] eq $dbname) { + return $i; + } + } +} + + +=head2 get_peptide + + Arg [1] : Bio::EnsEMBL::ArchiveStableId $arch_id + Example : none + Description : Retrieves the peptide string for given ArchiveStableId. If its + not a peptide or not in the database returns undef. + Returntype : string or undef + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_peptide, general + Status : At Risk + : under development + +=cut + +sub get_peptide { + my $self = shift; + my $arch_id = shift; + + if ( lc( $arch_id->type() ) ne 'translation' ) { + return undef; + } + + my $sql = qq( + SELECT pa.peptide_seq + FROM peptide_archive pa, gene_archive ga + WHERE ga.translation_stable_id = ? + AND ga.translation_version = ? + AND ga.peptide_archive_id = pa.peptide_archive_id + ); + + my $sth = $self->prepare($sql); + $sth->bind_param( 1, $arch_id->stable_id, SQL_VARCHAR ); + $sth->bind_param( 2, $arch_id->version, SQL_SMALLINT ); + $sth->execute(); + + my ($peptide_seq) = $sth->fetchrow_array(); + $sth->finish(); + + return $peptide_seq; +} ## end sub get_peptide + + +=head2 get_current_release + + Example : my $current_release = $archive_adaptor->get_current_release; + Description : Returns the current release number (as found in the meta table). + Return type : Int + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_current_release { + my $self = shift; + + unless ($self->{'current_release'}) { + my $mca = $self->db->get_MetaContainer; + my ($release) = @{ $mca->list_value_by_key('schema_version') }; + $self->{'current_release'} = $release; + } + + return $self->{'current_release'}; +} + + +=head2 get_current_assembly + + Example : my $current_assembly = $archive_adaptor->get_current_assembly; + Description : Returns the current assembly version (as found in the meta + table). + Return type : String + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub get_current_assembly { + my $self = shift; + + unless ($self->{'current_assembly'}) { + my $mca = $self->db->get_MetaContainer; + my ($assembly) = @{ $mca->list_value_by_key('assembly.default') }; + $self->{'current_assembly'} = $assembly; + } + + return $self->{'current_assembly'}; +} + + +=head2 lookup_current + + Arg[1] : Bio::EnsEMBL::ArchiveStableId $arch_id - + the stalbe ID to find the current version for + Example : if ($self->lookup_version($arch_id) { + $arch_id->version($arch_id->current_version); + $arch_id->db_name($self->dbc->dbname); + Description : Look in [gene|transcript|translation]_stable_id if you can find + a current version for this stable ID. Set + ArchiveStableId->current_version if found. + Return type : Boolean (TRUE if current version found, else FALSE) + Exceptions : none + Caller : general + Status : At Risk + : under development + +=cut + +sub lookup_current { + my $self = shift; + my $arch_id = shift; + + my $type = lc( $arch_id->type ); + + unless ($type) { + warning("Can't lookup current version without a type."); + return 0; + } + + my $sql = qq( + SELECT version FROM ${type} + WHERE stable_id = ? + ); + my $sth = $self->prepare($sql); + $sth->execute( $arch_id->stable_id ); + my ($version) = $sth->fetchrow_array; + $sth->finish; + + if ($version) { + $arch_id->current_version($version); + return 1; + } + + # didn't find a current version + return 0; +} ## end sub lookup_current + + +# infer type from stable ID format +sub _resolve_type { + my $self = shift; + my $arch_id = shift; + + my $stable_id = $arch_id->stable_id(); + my $id_type; + + # first, try to infer type from stable ID format + # + # Anopheles IDs + if ($stable_id =~ /^AGAP.*/) { + if ($stable_id =~ /.*-RA/) { + $id_type = "Transcript"; + } elsif ($stable_id =~ /.*-PA/) { + $id_type = "Translation"; + } else { + $id_type = "Gene"; + } + + # standard Ensembl IDs + } elsif ($stable_id =~ /.*G\d+$/) { + $id_type = "Gene"; + } elsif ($stable_id =~ /.*T\d+$/) { + $id_type = "Transcript"; + } elsif ($stable_id =~ /.*P\d+$/) { + $id_type = "Translation"; + } elsif ($stable_id =~ /.*E\d+$/) { + $id_type = "Exon"; + + # if guessing fails, look in db + } else { + my $sql = qq( + SELECT type from stable_id_event + WHERE old_stable_id = ? + OR new_stable_id = ? + ); + my $sth = $self->prepare($sql); + $sth->execute($stable_id, $stable_id); + ($id_type) = $sth->fetchrow_array; + $sth->finish; + } + + warning("Couldn't resolve stable ID type.") unless ($id_type); + + $arch_id->type($id_type); +} + + +1; +